LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
zlaqr5.f
Go to the documentation of this file.
1*> \brief \b ZLAQR5 performs a single small-bulge multi-shift QR sweep.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> Download ZLAQR5 + dependencies
9*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaqr5.f">
10*> [TGZ]</a>
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaqr5.f">
12*> [ZIP]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaqr5.f">
14*> [TXT]</a>
15*
16* Definition:
17* ===========
18*
19* SUBROUTINE ZLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
20* H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
21* WV, LDWV, NH, WH, LDWH )
22*
23* .. Scalar Arguments ..
24* INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
25* $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
26* LOGICAL WANTT, WANTZ
27* ..
28* .. Array Arguments ..
29* COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
30* $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
31* ..
32*
33*
34*> \par Purpose:
35* =============
36*>
37*> \verbatim
38*>
39*> ZLAQR5, called by ZLAQR0, performs a
40*> single small-bulge multi-shift QR sweep.
41*> \endverbatim
42*
43* Arguments:
44* ==========
45*
46*> \param[in] WANTT
47*> \verbatim
48*> WANTT is LOGICAL
49*> WANTT = .true. if the triangular Schur factor
50*> is being computed. WANTT is set to .false. otherwise.
51*> \endverbatim
52*>
53*> \param[in] WANTZ
54*> \verbatim
55*> WANTZ is LOGICAL
56*> WANTZ = .true. if the unitary Schur factor is being
57*> computed. WANTZ is set to .false. otherwise.
58*> \endverbatim
59*>
60*> \param[in] KACC22
61*> \verbatim
62*> KACC22 is INTEGER with value 0, 1, or 2.
63*> Specifies the computation mode of far-from-diagonal
64*> orthogonal updates.
65*> = 0: ZLAQR5 does not accumulate reflections and does not
66*> use matrix-matrix multiply to update far-from-diagonal
67*> matrix entries.
68*> = 1: ZLAQR5 accumulates reflections and uses matrix-matrix
69*> multiply to update the far-from-diagonal matrix entries.
70*> = 2: Same as KACC22 = 1. This option used to enable exploiting
71*> the 2-by-2 structure during matrix multiplications, but
72*> this is no longer supported.
73*> \endverbatim
74*>
75*> \param[in] N
76*> \verbatim
77*> N is INTEGER
78*> N is the order of the Hessenberg matrix H upon which this
79*> subroutine operates.
80*> \endverbatim
81*>
82*> \param[in] KTOP
83*> \verbatim
84*> KTOP is INTEGER
85*> \endverbatim
86*>
87*> \param[in] KBOT
88*> \verbatim
89*> KBOT is INTEGER
90*> These are the first and last rows and columns of an
91*> isolated diagonal block upon which the QR sweep is to be
92*> applied. It is assumed without a check that
93*> either KTOP = 1 or H(KTOP,KTOP-1) = 0
94*> and
95*> either KBOT = N or H(KBOT+1,KBOT) = 0.
96*> \endverbatim
97*>
98*> \param[in] NSHFTS
99*> \verbatim
100*> NSHFTS is INTEGER
101*> NSHFTS gives the number of simultaneous shifts. NSHFTS
102*> must be positive and even.
103*> \endverbatim
104*>
105*> \param[in,out] S
106*> \verbatim
107*> S is COMPLEX*16 array, dimension (NSHFTS)
108*> S contains the shifts of origin that define the multi-
109*> shift QR sweep. On output S may be reordered.
110*> \endverbatim
111*>
112*> \param[in,out] H
113*> \verbatim
114*> H is COMPLEX*16 array, dimension (LDH,N)
115*> On input H contains a Hessenberg matrix. On output a
116*> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
117*> to the isolated diagonal block in rows and columns KTOP
118*> through KBOT.
119*> \endverbatim
120*>
121*> \param[in] LDH
122*> \verbatim
123*> LDH is INTEGER
124*> LDH is the leading dimension of H just as declared in the
125*> calling procedure. LDH >= MAX(1,N).
126*> \endverbatim
127*>
128*> \param[in] ILOZ
129*> \verbatim
130*> ILOZ is INTEGER
131*> \endverbatim
132*>
133*> \param[in] IHIZ
134*> \verbatim
135*> IHIZ is INTEGER
136*> Specify the rows of Z to which transformations must be
137*> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
138*> \endverbatim
139*>
140*> \param[in,out] Z
141*> \verbatim
142*> Z is COMPLEX*16 array, dimension (LDZ,IHIZ)
143*> If WANTZ = .TRUE., then the QR Sweep unitary
144*> similarity transformation is accumulated into
145*> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
146*> If WANTZ = .FALSE., then Z is unreferenced.
147*> \endverbatim
148*>
149*> \param[in] LDZ
150*> \verbatim
151*> LDZ is INTEGER
152*> LDA is the leading dimension of Z just as declared in
153*> the calling procedure. LDZ >= N.
154*> \endverbatim
155*>
156*> \param[out] V
157*> \verbatim
158*> V is COMPLEX*16 array, dimension (LDV,NSHFTS/2)
159*> \endverbatim
160*>
161*> \param[in] LDV
162*> \verbatim
163*> LDV is INTEGER
164*> LDV is the leading dimension of V as declared in the
165*> calling procedure. LDV >= 3.
166*> \endverbatim
167*>
168*> \param[out] U
169*> \verbatim
170*> U is COMPLEX*16 array, dimension (LDU,2*NSHFTS)
171*> \endverbatim
172*>
173*> \param[in] LDU
174*> \verbatim
175*> LDU is INTEGER
176*> LDU is the leading dimension of U just as declared in the
177*> in the calling subroutine. LDU >= 2*NSHFTS.
178*> \endverbatim
179*>
180*> \param[in] NV
181*> \verbatim
182*> NV is INTEGER
183*> NV is the number of rows in WV agailable for workspace.
184*> NV >= 1.
185*> \endverbatim
186*>
187*> \param[out] WV
188*> \verbatim
189*> WV is COMPLEX*16 array, dimension (LDWV,2*NSHFTS)
190*> \endverbatim
191*>
192*> \param[in] LDWV
193*> \verbatim
194*> LDWV is INTEGER
195*> LDWV is the leading dimension of WV as declared in the
196*> in the calling subroutine. LDWV >= NV.
197*> \endverbatim
198*
199*> \param[in] NH
200*> \verbatim
201*> NH is INTEGER
202*> NH is the number of columns in array WH available for
203*> workspace. NH >= 1.
204*> \endverbatim
205*>
206*> \param[out] WH
207*> \verbatim
208*> WH is COMPLEX*16 array, dimension (LDWH,NH)
209*> \endverbatim
210*>
211*> \param[in] LDWH
212*> \verbatim
213*> LDWH is INTEGER
214*> Leading dimension of WH just as declared in the
215*> calling procedure. LDWH >= 2*NSHFTS.
216*> \endverbatim
217*>
218* Authors:
219* ========
220*
221*> \author Univ. of Tennessee
222*> \author Univ. of California Berkeley
223*> \author Univ. of Colorado Denver
224*> \author NAG Ltd.
225*
226*> \ingroup laqr5
227*
228*> \par Contributors:
229* ==================
230*>
231*> Karen Braman and Ralph Byers, Department of Mathematics,
232*> University of Kansas, USA
233*>
234*> Lars Karlsson, Daniel Kressner, and Bruno Lang
235*>
236*> Thijs Steel, Department of Computer science,
237*> KU Leuven, Belgium
238*
239*> \par References:
240* ================
241*>
242*> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
243*> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
244*> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
245*> 929--947, 2002.
246*>
247*> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
248*> chains of bulges in multishift QR algorithms.
249*> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
250*>
251* =====================================================================
252 SUBROUTINE zlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
253 $ S,
254 $ H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
255 $ WV, LDWV, NH, WH, LDWH )
256 IMPLICIT NONE
257*
258* -- LAPACK auxiliary routine --
259* -- LAPACK is a software package provided by Univ. of Tennessee, --
260* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
261*
262* .. Scalar Arguments ..
263 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
264 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
265 LOGICAL WANTT, WANTZ
266* ..
267* .. Array Arguments ..
268 COMPLEX*16 H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
269 $ WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
270* ..
271*
272* ================================================================
273* .. Parameters ..
274 COMPLEX*16 ZERO, ONE
275 PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ),
276 $ one = ( 1.0d0, 0.0d0 ) )
277 DOUBLE PRECISION RZERO, RONE
278 parameter( rzero = 0.0d0, rone = 1.0d0 )
279* ..
280* .. Local Scalars ..
281 COMPLEX*16 ALPHA, BETA, CDUM, REFSUM, T1, T2, T3
282 DOUBLE PRECISION H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
283 $ SMLNUM, TST1, TST2, ULP
284 INTEGER I2, I4, INCOL, J, JBOT, JCOL, JLEN,
285 $ jrow, jtop, k, k1, kdu, kms, krcol,
286 $ m, m22, mbot, mtop, nbmps, ndcol,
287 $ ns, nu
288 LOGICAL ACCUM, BMP22
289* ..
290* .. External Functions ..
291 DOUBLE PRECISION DLAMCH
292 EXTERNAL DLAMCH
293* ..
294* .. Intrinsic Functions ..
295*
296 INTRINSIC abs, dble, dconjg, dimag, max, min, mod
297* ..
298* .. Local Arrays ..
299 COMPLEX*16 VT( 3 )
300* ..
301* .. External Subroutines ..
302 EXTERNAL zgemm, zlacpy, zlaqr1, zlarfg, zlaset,
303 $ ztrmm
304* ..
305* .. Statement Functions ..
306 DOUBLE PRECISION CABS1
307* ..
308* .. Statement Function definitions ..
309 cabs1( cdum ) = abs( dble( cdum ) ) + abs( dimag( cdum ) )
310* ..
311* .. Executable Statements ..
312*
313* ==== If there are no shifts, then there is nothing to do. ====
314*
315 IF( nshfts.LT.2 )
316 $ RETURN
317*
318* ==== If the active block is empty or 1-by-1, then there
319* . is nothing to do. ====
320*
321 IF( ktop.GE.kbot )
322 $ RETURN
323*
324* ==== NSHFTS is supposed to be even, but if it is odd,
325* . then simply reduce it by one. ====
326*
327 ns = nshfts - mod( nshfts, 2 )
328*
329* ==== Machine constants for deflation ====
330*
331 safmin = dlamch( 'SAFE MINIMUM' )
332 safmax = rone / safmin
333 ulp = dlamch( 'PRECISION' )
334 smlnum = safmin*( dble( n ) / ulp )
335*
336* ==== Use accumulated reflections to update far-from-diagonal
337* . entries ? ====
338*
339 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
340*
341* ==== clear trash ====
342*
343 IF( ktop+2.LE.kbot )
344 $ h( ktop+2, ktop ) = zero
345*
346* ==== NBMPS = number of 2-shift bulges in the chain ====
347*
348 nbmps = ns / 2
349*
350* ==== KDU = width of slab ====
351*
352 kdu = 4*nbmps
353*
354* ==== Create and chase chains of NBMPS bulges ====
355*
356 DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
357*
358* JTOP = Index from which updates from the right start.
359*
360 IF( accum ) THEN
361 jtop = max( ktop, incol )
362 ELSE IF( wantt ) THEN
363 jtop = 1
364 ELSE
365 jtop = ktop
366 END IF
367*
368 ndcol = incol + kdu
369 IF( accum )
370 $ CALL zlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
371*
372* ==== Near-the-diagonal bulge chase. The following loop
373* . performs the near-the-diagonal part of a small bulge
374* . multi-shift QR sweep. Each 4*NBMPS column diagonal
375* . chunk extends from column INCOL to column NDCOL
376* . (including both column INCOL and column NDCOL). The
377* . following loop chases a 2*NBMPS+1 column long chain of
378* . NBMPS bulges 2*NBMPS columns to the right. (INCOL
379* . may be less than KTOP and and NDCOL may be greater than
380* . KBOT indicating phantom columns from which to chase
381* . bulges before they are actually introduced or to which
382* . to chase bulges beyond column KBOT.) ====
383*
384 DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
385*
386* ==== Bulges number MTOP to MBOT are active double implicit
387* . shift bulges. There may or may not also be small
388* . 2-by-2 bulge, if there is room. The inactive bulges
389* . (if any) must wait until the active bulges have moved
390* . down the diagonal to make room. The phantom matrix
391* . paradigm described above helps keep track. ====
392*
393 mtop = max( 1, ( ktop-krcol ) / 2+1 )
394 mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
395 m22 = mbot + 1
396 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
397 $ ( kbot-2 )
398*
399* ==== Generate reflections to chase the chain right
400* . one column. (The minimum value of K is KTOP-1.) ====
401*
402 IF ( bmp22 ) THEN
403*
404* ==== Special case: 2-by-2 reflection at bottom treated
405* . separately ====
406*
407 k = krcol + 2*( m22-1 )
408 IF( k.EQ.ktop-1 ) THEN
409 CALL zlaqr1( 2, h( k+1, k+1 ), ldh, s( 2*m22-1 ),
410 $ s( 2*m22 ), v( 1, m22 ) )
411 beta = v( 1, m22 )
412 CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
413 ELSE
414 beta = h( k+1, k )
415 v( 2, m22 ) = h( k+2, k )
416 CALL zlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
417 h( k+1, k ) = beta
418 h( k+2, k ) = zero
419 END IF
420
421*
422* ==== Perform update from right within
423* . computational window. ====
424*
425 t1 = v( 1, m22 )
426 t2 = t1*dconjg( v( 2, m22 ) )
427 DO 30 j = jtop, min( kbot, k+3 )
428 refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
429 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
430 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
431 30 CONTINUE
432*
433* ==== Perform update from left within
434* . computational window. ====
435*
436 IF( accum ) THEN
437 jbot = min( ndcol, kbot )
438 ELSE IF( wantt ) THEN
439 jbot = n
440 ELSE
441 jbot = kbot
442 END IF
443 t1 = dconjg( v( 1, m22 ) )
444 t2 = t1*v( 2, m22 )
445 DO 40 j = k+1, jbot
446 refsum = h( k+1, j ) +
447 $ dconjg( v( 2, m22 ) )*h( k+2, j )
448 h( k+1, j ) = h( k+1, j ) - refsum*t1
449 h( k+2, j ) = h( k+2, j ) - refsum*t2
450 40 CONTINUE
451*
452* ==== The following convergence test requires that
453* . the tradition small-compared-to-nearby-diagonals
454* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
455* . criteria both be satisfied. The latter improves
456* . accuracy in some examples. Falling back on an
457* . alternate convergence criterion when TST1 or TST2
458* . is zero (as done here) is traditional but probably
459* . unnecessary. ====
460*
461 IF( k.GE.ktop ) THEN
462 IF( h( k+1, k ).NE.zero ) THEN
463 tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
464 IF( tst1.EQ.rzero ) THEN
465 IF( k.GE.ktop+1 )
466 $ tst1 = tst1 + cabs1( h( k, k-1 ) )
467 IF( k.GE.ktop+2 )
468 $ tst1 = tst1 + cabs1( h( k, k-2 ) )
469 IF( k.GE.ktop+3 )
470 $ tst1 = tst1 + cabs1( h( k, k-3 ) )
471 IF( k.LE.kbot-2 )
472 $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
473 IF( k.LE.kbot-3 )
474 $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
475 IF( k.LE.kbot-4 )
476 $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
477 END IF
478 IF( cabs1( h( k+1, k ) )
479 $ .LE.max( smlnum, ulp*tst1 ) ) THEN
480 h12 = max( cabs1( h( k+1, k ) ),
481 $ cabs1( h( k, k+1 ) ) )
482 h21 = min( cabs1( h( k+1, k ) ),
483 $ cabs1( h( k, k+1 ) ) )
484 h11 = max( cabs1( h( k+1, k+1 ) ),
485 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
486 h22 = min( cabs1( h( k+1, k+1 ) ),
487 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
488 scl = h11 + h12
489 tst2 = h22*( h11 / scl )
490*
491 IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
492 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
493 END IF
494 END IF
495 END IF
496*
497* ==== Accumulate orthogonal transformations. ====
498*
499 IF( accum ) THEN
500 kms = k - incol
501 DO 50 j = max( 1, ktop-incol ), kdu
502 refsum = v( 1, m22 )*( u( j, kms+1 )+
503 $ v( 2, m22 )*u( j, kms+2 ) )
504 u( j, kms+1 ) = u( j, kms+1 ) - refsum
505 u( j, kms+2 ) = u( j, kms+2 ) -
506 $ refsum*dconjg( v( 2, m22 ) )
507 50 CONTINUE
508 ELSE IF( wantz ) THEN
509 DO 60 j = iloz, ihiz
510 refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
511 $ z( j, k+2 ) )
512 z( j, k+1 ) = z( j, k+1 ) - refsum
513 z( j, k+2 ) = z( j, k+2 ) -
514 $ refsum*dconjg( v( 2, m22 ) )
515 60 CONTINUE
516 END IF
517 END IF
518*
519* ==== Normal case: Chain of 3-by-3 reflections ====
520*
521 DO 80 m = mbot, mtop, -1
522 k = krcol + 2*( m-1 )
523 IF( k.EQ.ktop-1 ) THEN
524 CALL zlaqr1( 3, h( ktop, ktop ), ldh, s( 2*m-1 ),
525 $ s( 2*m ), v( 1, m ) )
526 alpha = v( 1, m )
527 CALL zlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
528 ELSE
529*
530* ==== Perform delayed transformation of row below
531* . Mth bulge. Exploit fact that first two elements
532* . of row are actually zero. ====
533*
534 t1 = v( 1, m )
535 t2 = t1*dconjg( v( 2, m ) )
536 t3 = t1*dconjg( v( 3, m ) )
537 refsum = v( 3, m )*h( k+3, k+2 )
538 h( k+3, k ) = -refsum*t1
539 h( k+3, k+1 ) = -refsum*t2
540 h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*t3
541*
542* ==== Calculate reflection to move
543* . Mth bulge one step. ====
544*
545 beta = h( k+1, k )
546 v( 2, m ) = h( k+2, k )
547 v( 3, m ) = h( k+3, k )
548 CALL zlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
549*
550* ==== A Bulge may collapse because of vigilant
551* . deflation or destructive underflow. In the
552* . underflow case, try the two-small-subdiagonals
553* . trick to try to reinflate the bulge. ====
554*
555 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
556 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
557*
558* ==== Typical case: not collapsed (yet). ====
559*
560 h( k+1, k ) = beta
561 h( k+2, k ) = zero
562 h( k+3, k ) = zero
563 ELSE
564*
565* ==== Atypical case: collapsed. Attempt to
566* . reintroduce ignoring H(K+1,K) and H(K+2,K).
567* . If the fill resulting from the new
568* . reflector is too large, then abandon it.
569* . Otherwise, use the new one. ====
570*
571 CALL zlaqr1( 3, h( k+1, k+1 ), ldh, s( 2*m-1 ),
572 $ s( 2*m ), vt )
573 alpha = vt( 1 )
574 CALL zlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
575 t1 = dconjg( vt( 1 ) )
576 t2 = t1*vt( 2 )
577 t3 = t1*vt( 3 )
578 refsum = h( k+1, k )+dconjg( vt( 2 ) )*h( k+2, k )
579*
580 IF( cabs1( h( k+2, k )-refsum*t2 )+
581 $ cabs1( refsum*t3 ).GT.ulp*
582 $ ( cabs1( h( k, k ) )+cabs1( h( k+1,
583 $ k+1 ) )+cabs1( h( k+2, k+2 ) ) ) ) THEN
584*
585* ==== Starting a new bulge here would
586* . create non-negligible fill. Use
587* . the old one with trepidation. ====
588*
589 h( k+1, k ) = beta
590 h( k+2, k ) = zero
591 h( k+3, k ) = zero
592 ELSE
593*
594* ==== Starting a new bulge here would
595* . create only negligible fill.
596* . Replace the old reflector with
597* . the new one. ====
598*
599 h( k+1, k ) = h( k+1, k ) - refsum*t1
600 h( k+2, k ) = zero
601 h( k+3, k ) = zero
602 v( 1, m ) = vt( 1 )
603 v( 2, m ) = vt( 2 )
604 v( 3, m ) = vt( 3 )
605 END IF
606 END IF
607 END IF
608*
609* ==== Apply reflection from the right and
610* . the first column of update from the left.
611* . These updates are required for the vigilant
612* . deflation check. We still delay most of the
613* . updates from the left for efficiency. ====
614*
615 t1 = v( 1, m )
616 t2 = t1*dconjg( v( 2, m ) )
617 t3 = t1*dconjg( v( 3, m ) )
618 DO 70 j = jtop, min( kbot, k+3 )
619 refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
620 $ + v( 3, m )*h( j, k+3 )
621 h( j, k+1 ) = h( j, k+1 ) - refsum*t1
622 h( j, k+2 ) = h( j, k+2 ) - refsum*t2
623 h( j, k+3 ) = h( j, k+3 ) - refsum*t3
624 70 CONTINUE
625*
626* ==== Perform update from left for subsequent
627* . column. ====
628*
629 t1 = dconjg( v( 1, m ) )
630 t2 = t1*v( 2, m )
631 t3 = t1*v( 3, m )
632 refsum = h( k+1, k+1 )
633 $ + dconjg( v( 2, m ) )*h( k+2, k+1 )
634 $ + dconjg( v( 3, m ) )*h( k+3, k+1 )
635 h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
636 h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
637 h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
638*
639* ==== The following convergence test requires that
640* . the tradition small-compared-to-nearby-diagonals
641* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
642* . criteria both be satisfied. The latter improves
643* . accuracy in some examples. Falling back on an
644* . alternate convergence criterion when TST1 or TST2
645* . is zero (as done here) is traditional but probably
646* . unnecessary. ====
647*
648 IF( k.LT.ktop)
649 $ cycle
650 IF( h( k+1, k ).NE.zero ) THEN
651 tst1 = cabs1( h( k, k ) ) + cabs1( h( k+1, k+1 ) )
652 IF( tst1.EQ.rzero ) THEN
653 IF( k.GE.ktop+1 )
654 $ tst1 = tst1 + cabs1( h( k, k-1 ) )
655 IF( k.GE.ktop+2 )
656 $ tst1 = tst1 + cabs1( h( k, k-2 ) )
657 IF( k.GE.ktop+3 )
658 $ tst1 = tst1 + cabs1( h( k, k-3 ) )
659 IF( k.LE.kbot-2 )
660 $ tst1 = tst1 + cabs1( h( k+2, k+1 ) )
661 IF( k.LE.kbot-3 )
662 $ tst1 = tst1 + cabs1( h( k+3, k+1 ) )
663 IF( k.LE.kbot-4 )
664 $ tst1 = tst1 + cabs1( h( k+4, k+1 ) )
665 END IF
666 IF( cabs1( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
667 $ THEN
668 h12 = max( cabs1( h( k+1, k ) ),
669 $ cabs1( h( k, k+1 ) ) )
670 h21 = min( cabs1( h( k+1, k ) ),
671 $ cabs1( h( k, k+1 ) ) )
672 h11 = max( cabs1( h( k+1, k+1 ) ),
673 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
674 h22 = min( cabs1( h( k+1, k+1 ) ),
675 $ cabs1( h( k, k )-h( k+1, k+1 ) ) )
676 scl = h11 + h12
677 tst2 = h22*( h11 / scl )
678*
679 IF( tst2.EQ.rzero .OR. h21*( h12 / scl ).LE.
680 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
681 END IF
682 END IF
683 80 CONTINUE
684*
685* ==== Multiply H by reflections from the left ====
686*
687 IF( accum ) THEN
688 jbot = min( ndcol, kbot )
689 ELSE IF( wantt ) THEN
690 jbot = n
691 ELSE
692 jbot = kbot
693 END IF
694*
695 DO 100 m = mbot, mtop, -1
696 k = krcol + 2*( m-1 )
697 t1 = dconjg( v( 1, m ) )
698 t2 = t1*v( 2, m )
699 t3 = t1*v( 3, m )
700 DO 90 j = max( ktop, krcol + 2*m ), jbot
701 refsum = h( k+1, j ) + dconjg( v( 2, m ) )*h( k+2, j )
702 $ + dconjg( v( 3, m ) )*h( k+3, j )
703 h( k+1, j ) = h( k+1, j ) - refsum*t1
704 h( k+2, j ) = h( k+2, j ) - refsum*t2
705 h( k+3, j ) = h( k+3, j ) - refsum*t3
706 90 CONTINUE
707 100 CONTINUE
708*
709* ==== Accumulate orthogonal transformations. ====
710*
711 IF( accum ) THEN
712*
713* ==== Accumulate U. (If needed, update Z later
714* . with an efficient matrix-matrix
715* . multiply.) ====
716*
717 DO 120 m = mbot, mtop, -1
718 k = krcol + 2*( m-1 )
719 kms = k - incol
720 i2 = max( 1, ktop-incol )
721 i2 = max( i2, kms-(krcol-incol)+1 )
722 i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
723 t1 = v( 1, m )
724 t2 = t1*dconjg( v( 2, m ) )
725 t3 = t1*dconjg( v( 3, m ) )
726 DO 110 j = i2, i4
727 refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
728 $ + v( 3, m )*u( j, kms+3 )
729 u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
730 u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
731 u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
732 110 CONTINUE
733 120 CONTINUE
734 ELSE IF( wantz ) THEN
735*
736* ==== U is not accumulated, so update Z
737* . now by multiplying by reflections
738* . from the right. ====
739*
740 DO 140 m = mbot, mtop, -1
741 k = krcol + 2*( m-1 )
742 t1 = v( 1, m )
743 t2 = t1*dconjg( v( 2, m ) )
744 t3 = t1*dconjg( v( 3, m ) )
745 DO 130 j = iloz, ihiz
746 refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
747 $ + v( 3, m )*z( j, k+3 )
748 z( j, k+1 ) = z( j, k+1 ) - refsum*t1
749 z( j, k+2 ) = z( j, k+2 ) - refsum*t2
750 z( j, k+3 ) = z( j, k+3 ) - refsum*t3
751 130 CONTINUE
752 140 CONTINUE
753 END IF
754*
755* ==== End of near-the-diagonal bulge chase. ====
756*
757 145 CONTINUE
758*
759* ==== Use U (if accumulated) to update far-from-diagonal
760* . entries in H. If required, use U to update Z as
761* . well. ====
762*
763 IF( accum ) THEN
764 IF( wantt ) THEN
765 jtop = 1
766 jbot = n
767 ELSE
768 jtop = ktop
769 jbot = kbot
770 END IF
771 k1 = max( 1, ktop-incol )
772 nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
773*
774* ==== Horizontal Multiply ====
775*
776 DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
777 jlen = min( nh, jbot-jcol+1 )
778 CALL zgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
779 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
780 $ ldwh )
781 CALL zlacpy( 'ALL', nu, jlen, wh, ldwh,
782 $ h( incol+k1, jcol ), ldh )
783 150 CONTINUE
784*
785* ==== Vertical multiply ====
786*
787 DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
788 jlen = min( nv, max( ktop, incol )-jrow )
789 CALL zgemm( 'N', 'N', jlen, nu, nu, one,
790 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
791 $ ldu, zero, wv, ldwv )
792 CALL zlacpy( 'ALL', jlen, nu, wv, ldwv,
793 $ h( jrow, incol+k1 ), ldh )
794 160 CONTINUE
795*
796* ==== Z multiply (also vertical) ====
797*
798 IF( wantz ) THEN
799 DO 170 jrow = iloz, ihiz, nv
800 jlen = min( nv, ihiz-jrow+1 )
801 CALL zgemm( 'N', 'N', jlen, nu, nu, one,
802 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
803 $ ldu, zero, wv, ldwv )
804 CALL zlacpy( 'ALL', jlen, nu, wv, ldwv,
805 $ z( jrow, incol+k1 ), ldz )
806 170 CONTINUE
807 END IF
808 END IF
809 180 CONTINUE
810*
811* ==== End of ZLAQR5 ====
812*
813 END
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:188
subroutine zlacpy(uplo, m, n, a, lda, b, ldb)
ZLACPY copies all or part of one two-dimensional array to another.
Definition zlacpy.f:101
subroutine zlaqr1(n, h, ldh, s1, s2, v)
ZLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition zlaqr1.f:105
subroutine zlaqr5(wantt, wantz, kacc22, n, ktop, kbot, nshfts, s, h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv, wv, ldwv, nh, wh, ldwh)
ZLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition zlaqr5.f:256
subroutine zlarfg(n, alpha, x, incx, tau)
ZLARFG generates an elementary reflector (Householder matrix).
Definition zlarfg.f:104
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition zlaset.f:104
subroutine ztrmm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
ZTRMM
Definition ztrmm.f:177