SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
dlaqr6.f
Go to the documentation of this file.
1 SUBROUTINE dlaqr6( JOB, WANTT, WANTZ, KACC22, N, KTOP, KBOT,
2 $ NSHFTS, SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ,
3 $ V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH )
4*
5* Contribution from the Department of Computing Science and HPC2N,
6* Umea University, Sweden
7*
8* -- ScaLAPACK auxiliary routine (version 2.0.2) --
9* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10* May 1 2012
11*
12 IMPLICIT NONE
13*
14* .. Scalar Arguments ..
15 CHARACTER JOB
16 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
17 $ ldwh, ldwv, ldz, n, nh, nshfts, nv
18 LOGICAL WANTT, WANTZ
19* ..
20* .. Array Arguments ..
21 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
22 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
23 $ z( ldz, * )
24* ..
25*
26* This auxiliary subroutine called by PDLAQR5 performs a
27* single small-bulge multi-shift QR sweep, moving the chain
28* of bulges from top to bottom in the submatrix
29* H(KTOP:KBOT,KTOP:KBOT), collecting the transformations in the
30* matrix HV *or* accumulating the transformations in the matrix
31* Z (see below).
32*
33* This is a modified version of DLAQR5 from LAPACK 3.1.
34*
35* ======================================================================
36*
37* JOB (input) character scalar
38* Set the kind of job to do in DLAQR6, as follows:
39* JOB = 'I': Introduce and chase bulges in submatrix
40* JOB = 'C': Chase bulges from top to bottom of submatrix
41* JOB = 'O': Chase bulges off submatrix
42*
43* WANTT (input) logical scalar
44* WANTT = .true. if the quasi-triangular Schur factor
45* is being computed. WANTT is set to .false. otherwise.
46*
47* WANTZ (input) logical scalar
48* WANTZ = .true. if the orthogonal Schur factor is being
49* computed. WANTZ is set to .false. otherwise.
50*
51* KACC22 (input) integer with value 0, 1, or 2.
52* Specifies the computation mode of far-from-diagonal
53* orthogonal updates.
54* = 0: DLAQR6 does not accumulate reflections and does not
55* use matrix-matrix multiply to update far-from-diagonal
56* matrix entries.
57* = 1: DLAQR6 accumulates reflections and uses matrix-matrix
58* multiply to update the far-from-diagonal matrix entries.
59* = 2: DLAQR6 accumulates reflections, uses matrix-matrix
60* multiply to update the far-from-diagonal matrix entries,
61* and takes advantage of 2-by-2 block structure during
62* matrix multiplies.
63*
64* N (input) integer scalar
65* N is the order of the Hessenberg matrix H upon which this
66* subroutine operates.
67*
68* KTOP (input) integer scalar
69* KBOT (input) integer scalar
70* These are the first and last rows and columns of an
71* isolated diagonal block upon which the QR sweep is to be
72* applied. It is assumed without a check that
73* either KTOP = 1 or H(KTOP,KTOP-1) = 0
74* and
75* either KBOT = N or H(KBOT+1,KBOT) = 0.
76*
77* NSHFTS (input) integer scalar
78* NSHFTS gives the number of simultaneous shifts. NSHFTS
79* must be positive and even.
80*
81* SR (input) DOUBLE PRECISION array of size (NSHFTS)
82* SI (input) DOUBLE PRECISION array of size (NSHFTS)
83* SR contains the real parts and SI contains the imaginary
84* parts of the NSHFTS shifts of origin that define the
85* multi-shift QR sweep.
86*
87* H (input/output) DOUBLE PRECISION array of size (LDH,N)
88* On input H contains a Hessenberg matrix. On output a
89* multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
90* to the isolated diagonal block in rows and columns KTOP
91* through KBOT.
92*
93* LDH (input) integer scalar
94* LDH is the leading dimension of H just as declared in the
95* calling procedure. LDH.GE.MAX(1,N).
96*
97* ILOZ (input) INTEGER
98* IHIZ (input) INTEGER
99* Specify the rows of Z to which transformations must be
100* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
101*
102* Z (input/output) DOUBLE PRECISION array of size (LDZ,IHI)
103* If WANTZ = .TRUE., then the QR Sweep orthogonal
104* similarity transformation is accumulated into
105* Z(ILOZ:IHIZ,ILO:IHI) from the right.
106* If WANTZ = .FALSE., then Z is unreferenced.
107*
108* LDZ (input) integer scalar
109* LDA is the leading dimension of Z just as declared in
110* the calling procedure. LDZ.GE.N.
111*
112* V (workspace) DOUBLE PRECISION array of size (LDV,NSHFTS/2)
113*
114* LDV (input) integer scalar
115* LDV is the leading dimension of V as declared in the
116* calling procedure. LDV.GE.3.
117*
118* U (workspace) DOUBLE PRECISION array of size
119* (LDU,3*NSHFTS-3)
120*
121* LDU (input) integer scalar
122* LDU is the leading dimension of U just as declared in the
123* in the calling subroutine. LDU.GE.3*NSHFTS-3.
124*
125* NH (input) integer scalar
126* NH is the number of columns in array WH available for
127* workspace. NH.GE.1 is required for usage of this
128* workspace, otherwise the updates of the far-from-diagonal
129* elements will be updated without level 3 BLAS.
130*
131* WH (workspace) DOUBLE PRECISION array of size (LDWH,NH)
132*
133* LDWH (input) integer scalar
134* Leading dimension of WH just as declared in the
135* calling procedure. LDWH.GE.3*NSHFTS-3.
136*
137* NV (input) integer scalar
138* NV is the number of rows in WV agailable for workspace.
139* NV.GE.1 is required for usage of this
140* workspace, otherwise the updates of the far-from-diagonal
141* elements will be updated without level 3 BLAS.
142*
143* WV (workspace) DOUBLE PRECISION array of size
144* (LDWV,3*NSHFTS-3)
145*
146* LDWV (input) integer scalar
147* LDWV is the leading dimension of WV as declared in the
148* in the calling subroutine. LDWV.GE.NV.
149*
150*
151* ================================================================
152* Based on contributions by
153* Karen Braman and Ralph Byers, Department of Mathematics,
154* University of Kansas, USA
155*
156* Robert Granat, Department of Computing Science and HPC2N,
157* Umea University, Sweden
158*
159* ============================================================
160* Reference:
161*
162* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
163* Algorithm Part I: Maintaining Well Focused Shifts, and
164* Level 3 Performance, SIAM Journal of Matrix Analysis,
165* volume 23, pages 929--947, 2002.
166*
167* ============================================================
168* .. Parameters ..
169 DOUBLE PRECISION ZERO, ONE
170 PARAMETER ( ZERO = 0.0d0, one = 1.0d0 )
171* ..
172* .. Local Scalars ..
173 DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
174 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
175 $ ulp
176 INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
177 $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
178 $ m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,
179 $ ns, nu, sincol, eincol, uincol, iphv, chunk,
180 $ threads, jlen2, jcol2, gchunk, jrow2, maxchunk
181 LOGICAL ACCUM, BLK22, BMP22, INTRO, CHASE, OFF, ALL
182* ..
183* .. External Functions ..
184 LOGICAL LSAME
185 INTEGER PILAENVX
186 DOUBLE PRECISION DLAMCH
187 EXTERNAL lsame, dlamch, pilaenvx
188* ..
189* .. Intrinsic Functions ..
190*
191 INTRINSIC abs, dble, max, min, mod
192* ..
193* .. Local Arrays ..
194 DOUBLE PRECISION VT( 3 )
195* ..
196* .. External Subroutines ..
197 EXTERNAL dgemm, dlabad, dlamov, dlaqr1, dlarfg, dlaset,
198 $ dtrmm
199* ..
200* .. Executable Statements ..
201*
202* ==== If there are no shifts, then there is nothing to do. ====
203*
204 IF( nshfts.LT.2 )
205 $ RETURN
206*
207* ==== If the active block is empty or 1-by-1, then there
208* . is nothing to do. ====
209*
210 IF( ktop.GE.kbot )
211 $ RETURN
212 threads = 1
213*
214* ==== Shuffle shifts into pairs of real shifts and pairs
215* . of complex conjugate shifts assuming complex
216* . conjugate shifts are already adjacent to one
217* . another. ====
218*
219 DO 10 i = 1, nshfts - 2, 2
220 IF( si( i ).NE.-si( i+1 ) ) THEN
221*
222 swap = sr( i )
223 sr( i ) = sr( i+1 )
224 sr( i+1 ) = sr( i+2 )
225 sr( i+2 ) = swap
226*
227 swap = si( i )
228 si( i ) = si( i+1 )
229 si( i+1 ) = si( i+2 )
230 si( i+2 ) = swap
231 END IF
232 10 CONTINUE
233*
234* ==== NSHFTS is supposed to be even, but if it is odd,
235* . then simply reduce it by one. The shuffle above
236* . ensures that the dropped shift is real and that
237* . the remaining shifts are paired. ====
238*
239 ns = nshfts - mod( nshfts, 2 )
240*
241* ==== Machine constants for deflation ====
242*
243 safmin = dlamch( 'SAFE MINIMUM' )
244 safmax = one / safmin
245 CALL dlabad( safmin, safmax )
246 ulp = dlamch( 'PRECISION' )
247 smlnum = safmin*( dble( n ) / ulp )
248*
249* ==== Use accumulated reflections to update far-from-diagonal
250* . entries ? This is only performed if both NH and NV is
251* greater than 1. ====
252*
253 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
254 accum = accum .AND. nh.GE.1 .AND. nv.GE.1
255*
256* ==== If so, exploit the 2-by-2 block structure? ====
257*
258 blk22 = ( ns.GT.2 ) .AND. ( kacc22.EQ.2 )
259*
260* ==== Decode JOB ====
261*
262 all = lsame( job, 'A' )
263 IF( .NOT. all )
264 $ intro = lsame( job, 'I' )
265 IF( .NOT. all .AND. .NOT. intro )
266 $ chase = lsame( job, 'C' )
267 IF( .NOT. all .AND. .NOT. intro .AND. .NOT. chase ) THEN
268 off = lsame( job, 'O' )
269 IF( .NOT. off )
270 $ RETURN
271 END IF
272*
273* ==== clear trash ====
274*
275 IF( intro.OR.all .AND. ktop+2.LE.kbot )
276 $ h( ktop+2, ktop ) = zero
277*
278* ==== NBMPS = number of 2-shift bulges in the chain ====
279*
280 nbmps = ns / 2
281*
282* ==== KDU = width of slab ====
283*
284 kdu = 6*nbmps - 3
285*
286* Set loop limits for bulge-chasing depending on working mode
287*
288 IF( all ) THEN
289 sincol = 3*( 1-nbmps ) + ktop - 1
290 eincol = kbot - 2
291 uincol = 3*nbmps - 2
292 ELSEIF( intro ) THEN
293 sincol = 3*( 1-nbmps ) + ktop - 1
294 eincol = kbot - 3*nbmps - 1
295 uincol = 3*nbmps - 2
296 ELSEIF( chase ) THEN
297 sincol = ktop
298 eincol = kbot - 3*nbmps - 1
299 uincol = 3*nbmps - 2
300 ELSEIF( off ) THEN
301 sincol = ktop
302 eincol = kbot - 2
303 uincol = 3*nbmps - 2
304 END IF
305 iphv = 0
306*
307* ==== Create and/or chase chains of NBMPS bulges ====
308*
309 DO 220 incol = sincol, eincol, uincol
310 ndcol = min( incol + kdu, eincol )
311 IF( accum )
312 $ CALL dlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
313*
314* ==== Near-the-diagonal bulge chase. The following loop
315* . performs the near-the-diagonal part of a small bulge
316* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
317* . chunk extends from column INCOL to column NDCOL
318* . (including both column INCOL and column NDCOL). The
319* . following loop chases a 3*NBMPS column long chain of
320* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
321* . may be less than KTOP and and NDCOL may be greater than
322* . KBOT indicating phantom columns from which to chase
323* . bulges before they are actually introduced or to which
324* . to chase bulges beyond column KBOT.) ====
325*
326 DO 150 krcol = incol, min( eincol, incol+3*nbmps-3, kbot-2 )
327*
328* ==== Bulges number MTOP to MBOT are active double implicit
329* . shift bulges. There may or may not also be small
330* . 2-by-2 bulge, if there is room. The inactive bulges
331* . (if any) must wait until the active bulges have moved
332* . down the diagonal to make room. The phantom matrix
333* . paradigm described above helps keep track. ====
334*
335 mtop = max( 1, ( ( ktop-1 )-krcol+2 ) / 3+1 )
336 mbot = min( nbmps, ( kbot-krcol ) / 3 )
337 m22 = mbot + 1
338 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+3*( m22-1 ) ).EQ.
339 $ ( kbot-2 )
340*
341* ==== Generate reflections to chase the chain right
342* . one column. (The minimum value of K is KTOP-1.) ====
343*
344 DO 20 m = mtop, mbot
345 k = krcol + 3*( m-1 )
346 IF( k.EQ.ktop-1 ) THEN
347 CALL dlaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
348 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
349 $ v( 1, m ) )
350 alpha = v( 1, m )
351 CALL dlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
352 ELSE
353 beta = h( k+1, k )
354 v( 2, m ) = h( k+2, k )
355 v( 3, m ) = h( k+3, k )
356 CALL dlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
357*
358* ==== A Bulge may collapse because of vigilant
359* . deflation or destructive underflow. In the
360* . underflow case, try the two-small-subdiagonals
361* . trick to try to reinflate the bulge. ====
362*
363 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
364 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
365*
366* ==== Typical case: not collapsed (yet). ====
367*
368 h( k+1, k ) = beta
369 h( k+2, k ) = zero
370 h( k+3, k ) = zero
371 ELSE
372*
373* ==== Atypical case: collapsed. Attempt to
374* . reintroduce ignoring H(K+1,K) and H(K+2,K).
375* . If the fill resulting from the new
376* . reflector is too large, then abandon it.
377* . Otherwise, use the new one. ====
378*
379 CALL dlaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
380 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
381 $ vt )
382 alpha = vt( 1 )
383 CALL dlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
384 refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
385 $ h( k+2, k ) )
386*
387 IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
388 $ abs( refsum*vt( 3 ) ).GT.ulp*
389 $ ( abs( h( k, k ) )+abs( h( k+1,
390 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
391*
392* ==== Starting a new bulge here would
393* . create non-negligible fill. Use
394* . the old one with trepidation. ====
395*
396 h( k+1, k ) = beta
397 h( k+2, k ) = zero
398 h( k+3, k ) = zero
399 ELSE
400*
401* ==== Stating a new bulge here would
402* . create only negligible fill.
403* . Replace the old reflector with
404* . the new one. ====
405*
406 h( k+1, k ) = h( k+1, k ) - refsum
407 h( k+2, k ) = zero
408 h( k+3, k ) = zero
409 v( 1, m ) = vt( 1 )
410 v( 2, m ) = vt( 2 )
411 v( 3, m ) = vt( 3 )
412 END IF
413 END IF
414 END IF
415 20 CONTINUE
416*
417* ==== Generate a 2-by-2 reflection, if needed. ====
418*
419 k = krcol + 3*( m22-1 )
420 IF( bmp22 ) THEN
421 IF( k.EQ.ktop-1 ) THEN
422 CALL dlaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
423 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
424 $ v( 1, m22 ) )
425 beta = v( 1, m22 )
426 CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
427 ELSE
428 beta = h( k+1, k )
429 v( 2, m22 ) = h( k+2, k )
430 CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
431 h( k+1, k ) = beta
432 h( k+2, k ) = zero
433 END IF
434 ELSE
435*
436* ==== Initialize V(1,M22) here to avoid possible undefined
437* . variable problems later. ====
438*
439 v( 1, m22 ) = zero
440 END IF
441*
442* ==== Multiply H by reflections from the left ====
443*
444 IF( accum ) THEN
445 jbot = min( max(incol+kdu,ndcol), kbot )
446 ELSE IF( wantt ) THEN
447 jbot = n
448 ELSE
449 jbot = kbot
450 END IF
451 DO 40 j = max( ktop, krcol ), jbot
452 mend = min( mbot, ( j-krcol+2 ) / 3 )
453 DO 30 m = mtop, mend
454 k = krcol + 3*( m-1 )
455 refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*
456 $ h( k+2, j )+v( 3, m )*h( k+3, j ) )
457 h( k+1, j ) = h( k+1, j ) - refsum
458 h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
459 h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
460 30 CONTINUE
461 40 CONTINUE
462 IF( bmp22 ) THEN
463 k = krcol + 3*( m22-1 )
464 DO 50 j = max( k+1, ktop ), jbot
465 refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*
466 $ h( k+2, j ) )
467 h( k+1, j ) = h( k+1, j ) - refsum
468 h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
469 50 CONTINUE
470 END IF
471*
472* ==== Multiply H by reflections from the right.
473* . Delay filling in the last row until the
474* . vigilant deflation check is complete. ====
475*
476 IF( accum ) THEN
477 jtop = max( ktop, incol )
478 ELSE IF( wantt ) THEN
479 jtop = 1
480 ELSE
481 jtop = ktop
482 END IF
483 DO 90 m = mtop, mbot
484 IF( v( 1, m ).NE.zero ) THEN
485 k = krcol + 3*( m-1 )
486 DO 60 j = jtop, min( kbot, k+3 )
487 refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
488 $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
489 h( j, k+1 ) = h( j, k+1 ) - refsum
490 h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )
491 h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )
492 60 CONTINUE
493*
494 IF( accum ) THEN
495*
496* ==== Accumulate U. (If necessary, update Z later
497* . with with an efficient matrix-matrix
498* . multiply.) ====
499*
500 kms = k - incol
501 DO 70 j = max( 1, ktop-incol ), kdu
502 refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
503 $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
504 u( j, kms+1 ) = u( j, kms+1 ) - refsum
505 u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )
506 u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )
507 70 CONTINUE
508 ELSE IF( wantz ) THEN
509*
510* ==== U is not accumulated, so update Z
511* . now by multiplying by reflections
512* . from the right. ====
513*
514 DO 80 j = iloz, ihiz
515 refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
516 $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
517 z( j, k+1 ) = z( j, k+1 ) - refsum
518 z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )
519 z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )
520 80 CONTINUE
521 END IF
522 END IF
523 90 CONTINUE
524*
525* ==== Special case: 2-by-2 reflection (if needed) ====
526*
527 k = krcol + 3*( m22-1 )
528 IF( bmp22 ) THEN
529 IF( v( 1, m22 ).NE.zero ) THEN
530 DO 100 j = jtop, min( kbot, k+3 )
531 refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
532 $ h( j, k+2 ) )
533 h( j, k+1 ) = h( j, k+1 ) - refsum
534 h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )
535 100 CONTINUE
536*
537 IF( accum ) THEN
538 kms = k - incol
539 DO 110 j = max( 1, ktop-incol ), kdu
540 refsum = v( 1, m22 )*( u( j, kms+1 ) +
541 $ v( 2, m22 )*u( j, kms+2 ) )
542 u( j, kms+1 ) = u( j, kms+1 ) - refsum
543 u( j, kms+2 ) = u( j, kms+2 ) -
544 $ refsum*v( 2, m22 )
545 110 CONTINUE
546 ELSE IF( wantz ) THEN
547 DO 120 j = iloz, ihiz
548 refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
549 $ z( j, k+2 ) )
550 z( j, k+1 ) = z( j, k+1 ) - refsum
551 z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )
552 120 CONTINUE
553 END IF
554 END IF
555 END IF
556*
557* ==== Vigilant deflation check ====
558*
559 mstart = mtop
560 IF( krcol+3*( mstart-1 ).LT.ktop )
561 $ mstart = mstart + 1
562 mend = mbot
563 IF( bmp22 )
564 $ mend = mend + 1
565 IF( krcol.EQ.kbot-2 )
566 $ mend = mend + 1
567 DO 130 m = mstart, mend
568 k = min( kbot-1, krcol+3*( m-1 ) )
569*
570* ==== The following convergence test requires that
571* . the tradition small-compared-to-nearby-diagonals
572* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
573* . criteria both be satisfied. The latter improves
574* . accuracy in some examples. Falling back on an
575* . alternate convergence criterion when TST1 or TST2
576* . is zero (as done here) is traditional but probably
577* . unnecessary. ====
578*
579 IF( h( k+1, k ).NE.zero ) THEN
580 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
581 IF( tst1.EQ.zero ) THEN
582 IF( k.GE.ktop+1 )
583 $ tst1 = tst1 + abs( h( k, k-1 ) )
584 IF( k.GE.ktop+2 )
585 $ tst1 = tst1 + abs( h( k, k-2 ) )
586 IF( k.GE.ktop+3 )
587 $ tst1 = tst1 + abs( h( k, k-3 ) )
588 IF( k.LE.kbot-2 )
589 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
590 IF( k.LE.kbot-3 )
591 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
592 IF( k.LE.kbot-4 )
593 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
594 END IF
595 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
596 $ THEN
597 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
598 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
599 h11 = max( abs( h( k+1, k+1 ) ),
600 $ abs( h( k, k )-h( k+1, k+1 ) ) )
601 h22 = min( abs( h( k+1, k+1 ) ),
602 $ abs( h( k, k )-h( k+1, k+1 ) ) )
603 scl = h11 + h12
604 tst2 = h22*( h11 / scl )
605*
606 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
607 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
608 END IF
609 END IF
610 130 CONTINUE
611*
612* ==== Fill in the last row of each bulge. ====
613*
614 mend = min( nbmps, ( kbot-krcol-1 ) / 3 )
615 DO 140 m = mtop, mend
616 k = krcol + 3*( m-1 )
617 refsum = v( 1, m )*v( 3, m )*h( k+4, k+3 )
618 h( k+4, k+1 ) = -refsum
619 h( k+4, k+2 ) = -refsum*v( 2, m )
620 h( k+4, k+3 ) = h( k+4, k+3 ) - refsum*v( 3, m )
621 140 CONTINUE
622*
623* ==== End of near-the-diagonal bulge chase. ====
624*
625 150 CONTINUE
626*
627* ==== Use U (if accumulated) to update far-from-diagonal
628* . entries in H. If required, use U to update Z as
629* . well. ====
630*
631 IF( accum ) THEN
632 IF( wantt ) THEN
633 jtop = 1
634 jbot = n
635 ELSE
636 jtop = ktop
637 jbot = kbot
638 END IF
639 k1 = max( 1, ktop-incol )
640 nu = ( kdu-max( 0, max(incol+kdu,ndcol)-kbot ) ) - k1 + 1
641 IF( ( .NOT.blk22 ) .OR. ( incol.LT.ktop ) .OR.
642 $ ( ndcol.GT.kbot ) .OR. ( ns.LE.2 ) .OR.
643 $ nu.LT.kdu ) THEN
644*
645* ==== Updates not exploiting the 2-by-2 block
646* . structure of U. K1 and NU keep track of
647* . the location and size of U in the special
648* . cases of introducing bulges and chasing
649* . bulges off the bottom. In these special
650* . cases and in case the number of shifts
651* . is NS = 2, there is no 2-by-2 block
652* . structure to exploit. ====
653*
654* ==== Horizontal Multiply ====
655*
656 DO 160 jcol = min(max(incol+kdu,ndcol),kbot)+ 1, jbot, nh
657 jlen = min( nh, jbot-jcol+1 )
658 CALL dgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
659 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
660 $ ldwh )
661 CALL dlamov( 'ALL', nu, jlen, wh, ldwh,
662 $ h( incol+k1, jcol ), ldh )
663 160 CONTINUE
664*
665* ==== Vertical multiply ====
666*
667 DO 170 jrow = jtop, max( ktop, incol ) - 1, nv
668 jlen = min( nv, max( ktop, incol )-jrow )
669 CALL dgemm( 'N', 'N', jlen, nu, nu, one,
670 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
671 $ ldu, zero, wv, ldwv )
672 CALL dlamov( 'ALL', jlen, nu, wv, ldwv,
673 $ h( jrow, incol+k1 ), ldh )
674 170 CONTINUE
675*
676* ==== Z multiply (also vertical) ====
677*
678 IF( wantz ) THEN
679 DO 180 jrow = iloz, ihiz, nv
680 jlen = min( nv, ihiz-jrow+1 )
681 CALL dgemm( 'N', 'N', jlen, nu, nu, one,
682 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
683 $ ldu, zero, wv, ldwv )
684 CALL dlamov( 'ALL', jlen, nu, wv, ldwv,
685 $ z( jrow, incol+k1 ), ldz )
686 180 CONTINUE
687 END IF
688 ELSE
689*
690* ==== Updates exploiting U's 2-by-2 block structure.
691* . (I2, I4, J2, J4 are the last rows and columns
692* . of the blocks.) ====
693*
694 i2 = ( kdu+1 ) / 2
695 i4 = kdu
696 j2 = i4 - i2
697 j4 = kdu
698*
699* ==== KZS and KNZ deal with the band of zeros
700* . along the diagonal of one of the triangular
701* . blocks. ====
702*
703 kzs = ( j4-j2 ) - ( ns+1 )
704 knz = ns + 1
705*
706* ==== Horizontal multiply ====
707*
708 DO 190 jcol = min(max(incol+kdu,ndcol),kbot)+ 1, jbot, nh
709 jlen = min( nh, jbot-jcol+1 )
710*
711* ==== Copy bottom of H to top+KZS of scratch ====
712* (The first KZS rows get multiplied by zero.) ====
713*
714 CALL dlamov( 'ALL', knz, jlen, h( incol+1+j2, jcol ),
715 $ ldh, wh( kzs+1, 1 ), ldwh )
716 CALL dlaset( 'ALL', kzs, jlen, zero, zero, wh, ldwh )
717*
718* ==== Multiply by U21' ====
719*
720 CALL dtrmm( 'L', 'U', 'C', 'N', knz, jlen, one,
721 $ u( j2+1, 1+kzs ), ldu, wh( kzs+1, 1 ),
722 $ ldwh )
723*
724* ==== Multiply top of H by U11' ====
725*
726 CALL dgemm( 'C', 'N', i2, jlen, j2, one, u, ldu,
727 $ h( incol+1, jcol ), ldh, one, wh, ldwh )
728*
729* ==== Copy top of H to bottom of WH ====
730*
731 CALL dlamov( 'ALL', j2, jlen, h( incol+1, jcol ), ldh,
732 $ wh( i2+1, 1 ), ldwh )
733*
734* ==== Multiply by U21' ====
735*
736 CALL dtrmm( 'L', 'L', 'C', 'N', j2, jlen, one,
737 $ u( 1, i2+1 ), ldu, wh( i2+1, 1 ), ldwh )
738*
739* ==== Multiply by U22 ====
740*
741 CALL dgemm( 'C', 'N', i4-i2, jlen, j4-j2, one,
742 $ u( j2+1, i2+1 ), ldu,
743 $ h( incol+1+j2, jcol ), ldh, one,
744 $ wh( i2+1, 1 ), ldwh )
745*
746* ==== Copy it back ====
747*
748 CALL dlamov( 'ALL', kdu, jlen, wh, ldwh,
749 $ h( incol+1, jcol ), ldh )
750 190 CONTINUE
751*
752* ==== Vertical multiply ====
753*
754 DO 200 jrow = jtop, max( incol, ktop ) - 1, nv
755 jlen = min( nv, max( incol, ktop )-jrow )
756*
757* ==== Copy right of H to scratch (the first KZS
758* . columns get multiplied by zero) ====
759*
760 CALL dlamov( 'ALL', jlen, knz, h( jrow, incol+1+j2 ),
761 $ ldh, wv( 1, 1+kzs ), ldwv )
762 CALL dlaset( 'ALL', jlen, kzs, zero, zero, wv, ldwv )
763*
764* ==== Multiply by U21 ====
765*
766 CALL dtrmm( 'R', 'U', 'N', 'N', jlen, knz, one,
767 $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
768 $ ldwv )
769*
770* ==== Multiply by U11 ====
771*
772 CALL dgemm( 'N', 'N', jlen, i2, j2, one,
773 $ h( jrow, incol+1 ), ldh, u, ldu, one, wv,
774 $ ldwv )
775*
776* ==== Copy left of H to right of scratch ====
777*
778 CALL dlamov( 'ALL', jlen, j2, h( jrow, incol+1 ), ldh,
779 $ wv( 1, 1+i2 ), ldwv )
780*
781* ==== Multiply by U21 ====
782*
783 CALL dtrmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
784 $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ), ldwv )
785*
786* ==== Multiply by U22 ====
787*
788 CALL dgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
789 $ h( jrow, incol+1+j2 ), ldh,
790 $ u( j2+1, i2+1 ), ldu, one, wv( 1, 1+i2 ),
791 $ ldwv )
792*
793* ==== Copy it back ====
794*
795 CALL dlamov( 'ALL', jlen, kdu, wv, ldwv,
796 $ h( jrow, incol+1 ), ldh )
797 200 CONTINUE
798*
799* ==== Multiply Z (also vertical) ====
800*
801 IF( wantz ) THEN
802 DO 210 jrow = iloz, ihiz, nv
803 jlen = min( nv, ihiz-jrow+1 )
804*
805* ==== Copy right of Z to left of scratch (first
806* . KZS columns get multiplied by zero) ====
807*
808 CALL dlamov( 'ALL', jlen, knz,
809 $ z( jrow, incol+1+j2 ), ldz,
810 $ wv( 1, 1+kzs ), ldwv )
811*
812* ==== Multiply by U12 ====
813*
814 CALL dlaset( 'ALL', jlen, kzs, zero, zero, wv,
815 $ ldwv )
816 CALL dtrmm( 'R', 'U', 'N', 'N', jlen, knz, one,
817 $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
818 $ ldwv )
819*
820* ==== Multiply by U11 ====
821*
822 CALL dgemm( 'N', 'N', jlen, i2, j2, one,
823 $ z( jrow, incol+1 ), ldz, u, ldu, one,
824 $ wv, ldwv )
825*
826* ==== Copy left of Z to right of scratch ====
827*
828 CALL dlamov( 'ALL', jlen, j2, z( jrow, incol+1 ),
829 $ ldz, wv( 1, 1+i2 ), ldwv )
830*
831* ==== Multiply by U21 ====
832*
833 CALL dtrmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
834 $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ),
835 $ ldwv )
836*
837* ==== Multiply by U22 ====
838*
839 CALL dgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
840 $ z( jrow, incol+1+j2 ), ldz,
841 $ u( j2+1, i2+1 ), ldu, one,
842 $ wv( 1, 1+i2 ), ldwv )
843*
844* ==== Copy the result back to Z ====
845*
846 CALL dlamov( 'ALL', jlen, kdu, wv, ldwv,
847 $ z( jrow, incol+1 ), ldz )
848 210 CONTINUE
849 END IF
850 END IF
851 END IF
852 220 CONTINUE
853*
854* ==== Clear out workspaces and return. ====
855*
856 IF( n.GE.5 )
857 $ CALL dlaset( 'Lower', n-4, n-4, zero, zero, h(5,1), ldh )
858*
859* ==== End of DLAQR6 ====
860*
861 END
subroutine dlaqr6(job, wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv, wv, ldwv, nh, wh, ldwh)
Definition dlaqr6.f:4
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181