SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
slaqr6.f
Go to the documentation of this file.
1 SUBROUTINE slaqr6( JOB, WANTT, WANTZ, KACC22, N, KTOP, KBOT,
2 $ NSHFTS, SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ,
3 $ V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH )
4*
5* Contribution from the Department of Computing Science and HPC2N,
6* Umea University, Sweden
7*
8* -- ScaLAPACK auxiliary routine (version 2.0.2) --
9* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10* May 1 2012
11*
12 IMPLICIT NONE
13*
14* .. Scalar Arguments ..
15 CHARACTER JOB
16 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
17 $ ldwh, ldwv, ldz, n, nh, nshfts, nv
18 LOGICAL WANTT, WANTZ
19* ..
20* .. Array Arguments ..
21 REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
22 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
23 $ z( ldz, * )
24* ..
25*
26* This auxiliary subroutine called by PSLAQR5 performs a
27* single small-bulge multi-shift QR sweep, moving the chain
28* of bulges from top to bottom in the submatrix
29* H(KTOP:KBOT,KTOP:KBOT), collecting the transformations in the
30* matrix HV *or* accumulating the transformations in the matrix
31* Z (see below).
32*
33* This is a modified version of DLAQR5 from LAPACK 3.1.
34*
35* ======================================================================
36*
37* JOB (input) character scalar
38* Set the kind of job to do in SLAQR6, as follows:
39* JOB = 'I': Introduce and chase bulges in submatrix
40* JOB = 'C': Chase bulges from top to bottom of submatrix
41* JOB = 'O': Chase bulges off submatrix
42*
43* WANTT (input) logical scalar
44* WANTT = .true. if the quasi-triangular Schur factor
45* is being computed. WANTT is set to .false. otherwise.
46*
47* WANTZ (input) logical scalar
48* WANTZ = .true. if the orthogonal Schur factor is being
49* computed. WANTZ is set to .false. otherwise.
50*
51* KACC22 (input) integer with value 0, 1, or 2.
52* Specifies the computation mode of far-from-diagonal
53* orthogonal updates.
54* = 0: SLAQR6 does not accumulate reflections and does not
55* use matrix-matrix multiply to update far-from-diagonal
56* matrix entries.
57* = 1: SLAQR6 accumulates reflections and uses matrix-matrix
58* multiply to update the far-from-diagonal matrix entries.
59* = 2: SLAQR6 accumulates reflections, uses matrix-matrix
60* multiply to update the far-from-diagonal matrix entries,
61* and takes advantage of 2-by-2 block structure during
62* matrix multiplies.
63*
64* N (input) integer scalar
65* N is the order of the Hessenberg matrix H upon which this
66* subroutine operates.
67*
68* KTOP (input) integer scalar
69* KBOT (input) integer scalar
70* These are the first and last rows and columns of an
71* isolated diagonal block upon which the QR sweep is to be
72* applied. It is assumed without a check that
73* either KTOP = 1 or H(KTOP,KTOP-1) = 0
74* and
75* either KBOT = N or H(KBOT+1,KBOT) = 0.
76*
77* NSHFTS (input) integer scalar
78* NSHFTS gives the number of simultaneous shifts. NSHFTS
79* must be positive and even.
80*
81* SR (input) REAL array of size (NSHFTS)
82* SI (input) REAL array of size (NSHFTS)
83* SR contains the real parts and SI contains the imaginary
84* parts of the NSHFTS shifts of origin that define the
85* multi-shift QR sweep.
86*
87* H (input/output) REAL array of size (LDH,N)
88* On input H contains a Hessenberg matrix. On output a
89* multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
90* to the isolated diagonal block in rows and columns KTOP
91* through KBOT.
92*
93* LDH (input) integer scalar
94* LDH is the leading dimension of H just as declared in the
95* calling procedure. LDH.GE.MAX(1,N).
96*
97* ILOZ (input) INTEGER
98* IHIZ (input) INTEGER
99* Specify the rows of Z to which transformations must be
100* applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
101*
102* Z (input/output) REAL array of size (LDZ,IHI)
103* If WANTZ = .TRUE., then the QR Sweep orthogonal
104* similarity transformation is accumulated into
105* Z(ILOZ:IHIZ,ILO:IHI) from the right.
106* If WANTZ = .FALSE., then Z is unreferenced.
107*
108* LDZ (input) integer scalar
109* LDA is the leading dimension of Z just as declared in
110* the calling procedure. LDZ.GE.N.
111*
112* V (workspace) REAL array of size (LDV,NSHFTS/2)
113*
114* LDV (input) integer scalar
115* LDV is the leading dimension of V as declared in the
116* calling procedure. LDV.GE.3.
117*
118* U (workspace) REAL array of size
119* (LDU,3*NSHFTS-3)
120*
121* LDU (input) integer scalar
122* LDU is the leading dimension of U just as declared in the
123* in the calling subroutine. LDU.GE.3*NSHFTS-3.
124*
125* NH (input) integer scalar
126* NH is the number of columns in array WH available for
127* workspace. NH.GE.1 is required for usage of this
128* workspace, otherwise the updates of the far-from-diagonal
129* elements will be updated without level 3 BLAS.
130*
131* WH (workspace) REAL array of size (LDWH,NH)
132*
133* LDWH (input) integer scalar
134* Leading dimension of WH just as declared in the
135* calling procedure. LDWH.GE.3*NSHFTS-3.
136*
137* NV (input) integer scalar
138* NV is the number of rows in WV agailable for workspace.
139* NV.GE.1 is required for usage of this
140* workspace, otherwise the updates of the far-from-diagonal
141* elements will be updated without level 3 BLAS.
142*
143* WV (workspace) REAL array of size
144* (LDWV,3*NSHFTS-3)
145*
146* LDWV (input) integer scalar
147* LDWV is the leading dimension of WV as declared in the
148* in the calling subroutine. LDWV.GE.NV.
149*
150*
151* ================================================================
152* Based on contributions by
153* Karen Braman and Ralph Byers, Department of Mathematics,
154* University of Kansas, USA
155*
156* Robert Granat, Department of Computing Science and HPC2N,
157* Umea University, Sweden
158*
159* ============================================================
160* Reference:
161*
162* K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
163* Algorithm Part I: Maintaining Well Focused Shifts, and
164* Level 3 Performance, SIAM Journal of Matrix Analysis,
165* volume 23, pages 929--947, 2002.
166*
167* ============================================================
168* .. Parameters ..
169 REAL ZERO, ONE
170 PARAMETER ( ZERO = 0.0e0, one = 1.0e0 )
171* ..
172* .. Local Scalars ..
173 REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
174 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
175 $ ulp
176 INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
177 $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
178 $ m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,
179 $ ns, nu, sincol, eincol, uincol, iphv, chunk,
180 $ threads, jlen2, jcol2, gchunk, jrow2, maxchunk
181 LOGICAL ACCUM, BLK22, BMP22, INTRO, CHASE, OFF, ALL
182* ..
183* .. External Functions ..
184 LOGICAL LSAME
185 INTEGER PILAENVX
186 REAL SLAMCH
187 EXTERNAL lsame, slamch, pilaenvx
188* ..
189* .. Intrinsic Functions ..
190*
191 INTRINSIC abs, float, max, min, mod
192* ..
193* .. Local Arrays ..
194 REAL VT( 3 )
195* ..
196* .. External Subroutines ..
197 EXTERNAL sgemm, slabad, slamov, slaqr1, slarfg, slaset,
198 $ strmm
199* ..
200* .. Executable Statements ..
201*
202* ==== If there are no shifts, then there is nothing to do. ====
203*
204 IF( nshfts.LT.2 )
205 $ RETURN
206*
207* ==== If the active block is empty or 1-by-1, then there
208* . is nothing to do. ====
209*
210 IF( ktop.GE.kbot )
211 $ RETURN
212 threads = 1
213*
214* ==== Shuffle shifts into pairs of real shifts and pairs
215* . of complex conjugate shifts assuming complex
216* . conjugate shifts are already adjacent to one
217* . another. ====
218*
219 DO 10 i = 1, nshfts - 2, 2
220 IF( si( i ).NE.-si( i+1 ) ) THEN
221*
222 swap = sr( i )
223 sr( i ) = sr( i+1 )
224 sr( i+1 ) = sr( i+2 )
225 sr( i+2 ) = swap
226*
227 swap = si( i )
228 si( i ) = si( i+1 )
229 si( i+1 ) = si( i+2 )
230 si( i+2 ) = swap
231 END IF
232 10 CONTINUE
233*
234* ==== NSHFTS is supposed to be even, but if it is odd,
235* . then simply reduce it by one. The shuffle above
236* . ensures that the dropped shift is real and that
237* . the remaining shifts are paired. ====
238*
239 ns = nshfts - mod( nshfts, 2 )
240*
241* ==== Machine constants for deflation ====
242*
243 safmin = slamch( 'SAFE MINIMUM' )
244 safmax = one / safmin
245 CALL slabad( safmin, safmax )
246 ulp = slamch( 'PRECISION' )
247 smlnum = safmin*( float( n ) / ulp )
248*
249* ==== Use accumulated reflections to update far-from-diagonal
250* . entries ? This is only performed if both NH and NV is
251* greater than 1. ====
252*
253 accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
254 accum = accum .AND. nh.GE.1 .AND. nv.GE.1
255*
256* ==== If so, exploit the 2-by-2 block structure? ====
257*
258 blk22 = ( ns.GT.2 ) .AND. ( kacc22.EQ.2 )
259*
260* ==== Decode JOB ====
261*
262 all = lsame( job, 'A' )
263 IF( .NOT. all )
264 $ intro = lsame( job, 'I' )
265 IF( .NOT. all .AND. .NOT. intro )
266 $ chase = lsame( job, 'C' )
267 IF( .NOT. all .AND. .NOT. intro .AND. .NOT. chase ) THEN
268 off = lsame( job, 'O' )
269 IF( .NOT. off )
270 $ RETURN
271 END IF
272*
273* ==== clear trash ====
274*
275 IF( intro.OR.all .AND. ktop+2.LE.kbot )
276 $ h( ktop+2, ktop ) = zero
277*
278* ==== NBMPS = number of 2-shift bulges in the chain ====
279*
280 nbmps = ns / 2
281*
282* ==== KDU = width of slab ====
283*
284 kdu = 6*nbmps - 3
285*
286* Set loop limits for bulge-chasing depending on working mode
287*
288 IF( all ) THEN
289 sincol = 3*( 1-nbmps ) + ktop - 1
290 eincol = kbot - 2
291 uincol = 3*nbmps - 2
292 ELSEIF( intro ) THEN
293 sincol = 3*( 1-nbmps ) + ktop - 1
294 eincol = kbot - 3*nbmps - 1
295 uincol = 3*nbmps - 2
296 ELSEIF( chase ) THEN
297 sincol = ktop
298 eincol = kbot - 3*nbmps - 1
299 uincol = 3*nbmps - 2
300 ELSEIF( off ) THEN
301 sincol = ktop
302 eincol = kbot - 2
303 uincol = 3*nbmps - 2
304 END IF
305 iphv = 0
306*
307* ==== Create and/or chase chains of NBMPS bulges ====
308*
309 DO 220 incol = sincol, eincol, uincol
310 ndcol = min( incol + kdu, eincol )
311 IF( accum )
312 $ CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
313*
314* ==== Near-the-diagonal bulge chase. The following loop
315* . performs the near-the-diagonal part of a small bulge
316* . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
317* . chunk extends from column INCOL to column NDCOL
318* . (including both column INCOL and column NDCOL). The
319* . following loop chases a 3*NBMPS column long chain of
320* . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
321* . may be less than KTOP and and NDCOL may be greater than
322* . KBOT indicating phantom columns from which to chase
323* . bulges before they are actually introduced or to which
324* . to chase bulges beyond column KBOT.) ====
325*
326 DO 150 krcol = incol, min( eincol, incol+3*nbmps-3, kbot-2 )
327*
328* ==== Bulges number MTOP to MBOT are active double implicit
329* . shift bulges. There may or may not also be small
330* . 2-by-2 bulge, if there is room. The inactive bulges
331* . (if any) must wait until the active bulges have moved
332* . down the diagonal to make room. The phantom matrix
333* . paradigm described above helps keep track. ====
334*
335 mtop = max( 1, ( ( ktop-1 )-krcol+2 ) / 3+1 )
336 mbot = min( nbmps, ( kbot-krcol ) / 3 )
337 m22 = mbot + 1
338 bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+3*( m22-1 ) ).EQ.
339 $ ( kbot-2 )
340*
341* ==== Generate reflections to chase the chain right
342* . one column. (The minimum value of K is KTOP-1.) ====
343*
344 DO 20 m = mtop, mbot
345 k = krcol + 3*( m-1 )
346 IF( k.EQ.ktop-1 ) THEN
347 CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
348 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
349 $ v( 1, m ) )
350 alpha = v( 1, m )
351 CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
352 ELSE
353 beta = h( k+1, k )
354 v( 2, m ) = h( k+2, k )
355 v( 3, m ) = h( k+3, k )
356 CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
357*
358* ==== A Bulge may collapse because of vigilant
359* . deflation or destructive underflow. In the
360* . underflow case, try the two-small-subdiagonals
361* . trick to try to reinflate the bulge. ====
362*
363 IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
364 $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
365*
366* ==== Typical case: not collapsed (yet). ====
367*
368 h( k+1, k ) = beta
369 h( k+2, k ) = zero
370 h( k+3, k ) = zero
371 ELSE
372*
373* ==== Atypical case: collapsed. Attempt to
374* . reintroduce ignoring H(K+1,K) and H(K+2,K).
375* . If the fill resulting from the new
376* . reflector is too large, then abandon it.
377* . Otherwise, use the new one. ====
378*
379 CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
380 $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
381 $ vt )
382 alpha = vt( 1 )
383 CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
384 refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
385 $ h( k+2, k ) )
386*
387 IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
388 $ abs( refsum*vt( 3 ) ).GT.ulp*
389 $ ( abs( h( k, k ) )+abs( h( k+1,
390 $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
391*
392* ==== Starting a new bulge here would
393* . create non-negligible fill. Use
394* . the old one with trepidation. ====
395*
396 h( k+1, k ) = beta
397 h( k+2, k ) = zero
398 h( k+3, k ) = zero
399 ELSE
400*
401* ==== Stating a new bulge here would
402* . create only negligible fill.
403* . Replace the old reflector with
404* . the new one. ====
405*
406 h( k+1, k ) = h( k+1, k ) - refsum
407 h( k+2, k ) = zero
408 h( k+3, k ) = zero
409 v( 1, m ) = vt( 1 )
410 v( 2, m ) = vt( 2 )
411 v( 3, m ) = vt( 3 )
412 END IF
413 END IF
414 END IF
415 20 CONTINUE
416*
417* ==== Generate a 2-by-2 reflection, if needed. ====
418*
419 k = krcol + 3*( m22-1 )
420 IF( bmp22 ) THEN
421 IF( k.EQ.ktop-1 ) THEN
422 CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
423 $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
424 $ v( 1, m22 ) )
425 beta = v( 1, m22 )
426 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
427 ELSE
428 beta = h( k+1, k )
429 v( 2, m22 ) = h( k+2, k )
430 CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
431 h( k+1, k ) = beta
432 h( k+2, k ) = zero
433 END IF
434 ELSE
435*
436* ==== Initialize V(1,M22) here to avoid possible undefined
437* . variable problems later. ====
438*
439 v( 1, m22 ) = zero
440 END IF
441*
442* ==== Multiply H by reflections from the left ====
443*
444 IF( accum ) THEN
445 jbot = min( max(incol+kdu,ndcol), kbot )
446 ELSE IF( wantt ) THEN
447 jbot = n
448 ELSE
449 jbot = kbot
450 END IF
451 DO 40 j = max( ktop, krcol ), jbot
452 mend = min( mbot, ( j-krcol+2 ) / 3 )
453 DO 30 m = mtop, mend
454 k = krcol + 3*( m-1 )
455 refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*
456 $ h( k+2, j )+v( 3, m )*h( k+3, j ) )
457 h( k+1, j ) = h( k+1, j ) - refsum
458 h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
459 h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
460 30 CONTINUE
461 40 CONTINUE
462 IF( bmp22 ) THEN
463 k = krcol + 3*( m22-1 )
464 DO 50 j = max( k+1, ktop ), jbot
465 refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*
466 $ h( k+2, j ) )
467 h( k+1, j ) = h( k+1, j ) - refsum
468 h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
469 50 CONTINUE
470 END IF
471*
472* ==== Multiply H by reflections from the right.
473* . Delay filling in the last row until the
474* . vigilant deflation check is complete. ====
475*
476 IF( accum ) THEN
477 jtop = max( ktop, incol )
478 ELSE IF( wantt ) THEN
479 jtop = 1
480 ELSE
481 jtop = ktop
482 END IF
483 DO 90 m = mtop, mbot
484 IF( v( 1, m ).NE.zero ) THEN
485 k = krcol + 3*( m-1 )
486 DO 60 j = jtop, min( kbot, k+3 )
487 refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
488 $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
489 h( j, k+1 ) = h( j, k+1 ) - refsum
490 h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )
491 h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )
492 60 CONTINUE
493*
494 IF( accum ) THEN
495*
496* ==== Accumulate U. (If necessary, update Z later
497* . with with an efficient matrix-matrix
498* . multiply.) ====
499*
500 kms = k - incol
501 DO 70 j = max( 1, ktop-incol ), kdu
502 refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
503 $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
504 u( j, kms+1 ) = u( j, kms+1 ) - refsum
505 u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )
506 u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )
507 70 CONTINUE
508 ELSE IF( wantz ) THEN
509*
510* ==== U is not accumulated, so update Z
511* . now by multiplying by reflections
512* . from the right. ====
513*
514 DO 80 j = iloz, ihiz
515 refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
516 $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
517 z( j, k+1 ) = z( j, k+1 ) - refsum
518 z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )
519 z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )
520 80 CONTINUE
521 END IF
522 END IF
523 90 CONTINUE
524*
525* ==== Special case: 2-by-2 reflection (if needed) ====
526*
527 k = krcol + 3*( m22-1 )
528 IF( bmp22 ) THEN
529 IF( v( 1, m22 ).NE.zero ) THEN
530 DO 100 j = jtop, min( kbot, k+3 )
531 refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
532 $ h( j, k+2 ) )
533 h( j, k+1 ) = h( j, k+1 ) - refsum
534 h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )
535 100 CONTINUE
536*
537 IF( accum ) THEN
538 kms = k - incol
539 DO 110 j = max( 1, ktop-incol ), kdu
540 refsum = v( 1, m22 )*( u( j, kms+1 ) +
541 $ v( 2, m22 )*u( j, kms+2 ) )
542 u( j, kms+1 ) = u( j, kms+1 ) - refsum
543 u( j, kms+2 ) = u( j, kms+2 ) -
544 $ refsum*v( 2, m22 )
545 110 CONTINUE
546 ELSE IF( wantz ) THEN
547 DO 120 j = iloz, ihiz
548 refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
549 $ z( j, k+2 ) )
550 z( j, k+1 ) = z( j, k+1 ) - refsum
551 z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )
552 120 CONTINUE
553 END IF
554 END IF
555 END IF
556*
557* ==== Vigilant deflation check ====
558*
559 mstart = mtop
560 IF( krcol+3*( mstart-1 ).LT.ktop )
561 $ mstart = mstart + 1
562 mend = mbot
563 IF( bmp22 )
564 $ mend = mend + 1
565 IF( krcol.EQ.kbot-2 )
566 $ mend = mend + 1
567 DO 130 m = mstart, mend
568 k = min( kbot-1, krcol+3*( m-1 ) )
569*
570* ==== The following convergence test requires that
571* . the tradition small-compared-to-nearby-diagonals
572* . criterion and the Ahues & Tisseur (LAWN 122, 1997)
573* . criteria both be satisfied. The latter improves
574* . accuracy in some examples. Falling back on an
575* . alternate convergence criterion when TST1 or TST2
576* . is zero (as done here) is traditional but probably
577* . unnecessary. ====
578*
579 IF( h( k+1, k ).NE.zero ) THEN
580 tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
581 IF( tst1.EQ.zero ) THEN
582 IF( k.GE.ktop+1 )
583 $ tst1 = tst1 + abs( h( k, k-1 ) )
584 IF( k.GE.ktop+2 )
585 $ tst1 = tst1 + abs( h( k, k-2 ) )
586 IF( k.GE.ktop+3 )
587 $ tst1 = tst1 + abs( h( k, k-3 ) )
588 IF( k.LE.kbot-2 )
589 $ tst1 = tst1 + abs( h( k+2, k+1 ) )
590 IF( k.LE.kbot-3 )
591 $ tst1 = tst1 + abs( h( k+3, k+1 ) )
592 IF( k.LE.kbot-4 )
593 $ tst1 = tst1 + abs( h( k+4, k+1 ) )
594 END IF
595 IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
596 $ THEN
597 h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
598 h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
599 h11 = max( abs( h( k+1, k+1 ) ),
600 $ abs( h( k, k )-h( k+1, k+1 ) ) )
601 h22 = min( abs( h( k+1, k+1 ) ),
602 $ abs( h( k, k )-h( k+1, k+1 ) ) )
603 scl = h11 + h12
604 tst2 = h22*( h11 / scl )
605*
606 IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
607 $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
608 END IF
609 END IF
610 130 CONTINUE
611*
612* ==== Fill in the last row of each bulge. ====
613*
614 mend = min( nbmps, ( kbot-krcol-1 ) / 3 )
615 DO 140 m = mtop, mend
616 k = krcol + 3*( m-1 )
617 refsum = v( 1, m )*v( 3, m )*h( k+4, k+3 )
618 h( k+4, k+1 ) = -refsum
619 h( k+4, k+2 ) = -refsum*v( 2, m )
620 h( k+4, k+3 ) = h( k+4, k+3 ) - refsum*v( 3, m )
621 140 CONTINUE
622*
623* ==== End of near-the-diagonal bulge chase. ====
624*
625 150 CONTINUE
626*
627* ==== Use U (if accumulated) to update far-from-diagonal
628* . entries in H. If required, use U to update Z as
629* . well. ====
630*
631 IF( accum ) THEN
632 IF( wantt ) THEN
633 jtop = 1
634 jbot = n
635 ELSE
636 jtop = ktop
637 jbot = kbot
638 END IF
639 k1 = max( 1, ktop-incol )
640 nu = ( kdu-max( 0, max(incol+kdu,ndcol)-kbot ) ) - k1 + 1
641 IF( ( .NOT.blk22 ) .OR. ( incol.LT.ktop ) .OR.
642 $ ( ndcol.GT.kbot ) .OR. ( ns.LE.2 ) .OR.
643 $ nu.LT.kdu ) THEN
644*
645* ==== Updates not exploiting the 2-by-2 block
646* . structure of U. K1 and NU keep track of
647* . the location and size of U in the special
648* . cases of introducing bulges and chasing
649* . bulges off the bottom. In these special
650* . cases and in case the number of shifts
651* . is NS = 2, there is no 2-by-2 block
652* . structure to exploit. ====
653*
654* ==== Horizontal Multiply ====
655*
656 DO 160 jcol = min(max(incol+kdu,ndcol),kbot)+ 1, jbot, nh
657 jlen = min( nh, jbot-jcol+1 )
658 CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
659 $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
660 $ ldwh )
661 CALL slamov( 'ALL', nu, jlen, wh, ldwh,
662 $ h( incol+k1, jcol ), ldh )
663 160 CONTINUE
664*
665* ==== Vertical multiply ====
666*
667 DO 170 jrow = jtop, max( ktop, incol ) - 1, nv
668 jlen = min( nv, max( ktop, incol )-jrow )
669 CALL sgemm( 'N', 'N', jlen, nu, nu, one,
670 $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
671 $ ldu, zero, wv, ldwv )
672 CALL slamov( 'ALL', jlen, nu, wv, ldwv,
673 $ h( jrow, incol+k1 ), ldh )
674 170 CONTINUE
675*
676* ==== Z multiply (also vertical) ====
677*
678 IF( wantz ) THEN
679 DO 180 jrow = iloz, ihiz, nv
680 jlen = min( nv, ihiz-jrow+1 )
681 CALL sgemm( 'N', 'N', jlen, nu, nu, one,
682 $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
683 $ ldu, zero, wv, ldwv )
684 CALL slamov( 'ALL', jlen, nu, wv, ldwv,
685 $ z( jrow, incol+k1 ), ldz )
686 180 CONTINUE
687 END IF
688 ELSE
689*
690* ==== Updates exploiting U's 2-by-2 block structure.
691* . (I2, I4, J2, J4 are the last rows and columns
692* . of the blocks.) ====
693*
694 i2 = ( kdu+1 ) / 2
695 i4 = kdu
696 j2 = i4 - i2
697 j4 = kdu
698*
699* ==== KZS and KNZ deal with the band of zeros
700* . along the diagonal of one of the triangular
701* . blocks. ====
702*
703 kzs = ( j4-j2 ) - ( ns+1 )
704 knz = ns + 1
705*
706* ==== Horizontal multiply ====
707*
708 DO 190 jcol = min(max(incol+kdu,ndcol),kbot)+ 1, jbot, nh
709 jlen = min( nh, jbot-jcol+1 )
710*
711* ==== Copy bottom of H to top+KZS of scratch ====
712* (The first KZS rows get multiplied by zero.) ====
713*
714 CALL slamov( 'ALL', knz, jlen, h( incol+1+j2, jcol ),
715 $ ldh, wh( kzs+1, 1 ), ldwh )
716 CALL slaset( 'ALL', kzs, jlen, zero, zero, wh, ldwh )
717*
718* ==== Multiply by U21' ====
719*
720 CALL strmm( 'L', 'U', 'C', 'N', knz, jlen, one,
721 $ u( j2+1, 1+kzs ), ldu, wh( kzs+1, 1 ),
722 $ ldwh )
723*
724* ==== Multiply top of H by U11' ====
725*
726 CALL sgemm( 'C', 'N', i2, jlen, j2, one, u, ldu,
727 $ h( incol+1, jcol ), ldh, one, wh, ldwh )
728*
729* ==== Copy top of H to bottom of WH ====
730*
731 CALL slamov( 'ALL', j2, jlen, h( incol+1, jcol ), ldh,
732 $ wh( i2+1, 1 ), ldwh )
733*
734* ==== Multiply by U21' ====
735*
736 CALL strmm( 'L', 'L', 'C', 'N', j2, jlen, one,
737 $ u( 1, i2+1 ), ldu, wh( i2+1, 1 ), ldwh )
738*
739* ==== Multiply by U22 ====
740*
741 CALL sgemm( 'C', 'N', i4-i2, jlen, j4-j2, one,
742 $ u( j2+1, i2+1 ), ldu,
743 $ h( incol+1+j2, jcol ), ldh, one,
744 $ wh( i2+1, 1 ), ldwh )
745*
746* ==== Copy it back ====
747*
748 CALL slamov( 'ALL', kdu, jlen, wh, ldwh,
749 $ h( incol+1, jcol ), ldh )
750 190 CONTINUE
751*
752* ==== Vertical multiply ====
753*
754 DO 200 jrow = jtop, max( incol, ktop ) - 1, nv
755 jlen = min( nv, max( incol, ktop )-jrow )
756*
757* ==== Copy right of H to scratch (the first KZS
758* . columns get multiplied by zero) ====
759*
760 CALL slamov( 'ALL', jlen, knz, h( jrow, incol+1+j2 ),
761 $ ldh, wv( 1, 1+kzs ), ldwv )
762 CALL slaset( 'ALL', jlen, kzs, zero, zero, wv, ldwv )
763*
764* ==== Multiply by U21 ====
765*
766 CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,
767 $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
768 $ ldwv )
769*
770* ==== Multiply by U11 ====
771*
772 CALL sgemm( 'N', 'N', jlen, i2, j2, one,
773 $ h( jrow, incol+1 ), ldh, u, ldu, one, wv,
774 $ ldwv )
775*
776* ==== Copy left of H to right of scratch ====
777*
778 CALL slamov( 'ALL', jlen, j2, h( jrow, incol+1 ), ldh,
779 $ wv( 1, 1+i2 ), ldwv )
780*
781* ==== Multiply by U21 ====
782*
783 CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
784 $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ), ldwv )
785*
786* ==== Multiply by U22 ====
787*
788 CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
789 $ h( jrow, incol+1+j2 ), ldh,
790 $ u( j2+1, i2+1 ), ldu, one, wv( 1, 1+i2 ),
791 $ ldwv )
792*
793* ==== Copy it back ====
794*
795 CALL slamov( 'ALL', jlen, kdu, wv, ldwv,
796 $ h( jrow, incol+1 ), ldh )
797 200 CONTINUE
798*
799* ==== Multiply Z (also vertical) ====
800*
801 IF( wantz ) THEN
802 DO 210 jrow = iloz, ihiz, nv
803 jlen = min( nv, ihiz-jrow+1 )
804*
805* ==== Copy right of Z to left of scratch (first
806* . KZS columns get multiplied by zero) ====
807*
808 CALL slamov( 'ALL', jlen, knz,
809 $ z( jrow, incol+1+j2 ), ldz,
810 $ wv( 1, 1+kzs ), ldwv )
811*
812* ==== Multiply by U12 ====
813*
814 CALL slaset( 'ALL', jlen, kzs, zero, zero, wv,
815 $ ldwv )
816 CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,
817 $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
818 $ ldwv )
819*
820* ==== Multiply by U11 ====
821*
822 CALL sgemm( 'N', 'N', jlen, i2, j2, one,
823 $ z( jrow, incol+1 ), ldz, u, ldu, one,
824 $ wv, ldwv )
825*
826* ==== Copy left of Z to right of scratch ====
827*
828 CALL slamov( 'ALL', jlen, j2, z( jrow, incol+1 ),
829 $ ldz, wv( 1, 1+i2 ), ldwv )
830*
831* ==== Multiply by U21 ====
832*
833 CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
834 $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ),
835 $ ldwv )
836*
837* ==== Multiply by U22 ====
838*
839 CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
840 $ z( jrow, incol+1+j2 ), ldz,
841 $ u( j2+1, i2+1 ), ldu, one,
842 $ wv( 1, 1+i2 ), ldwv )
843*
844* ==== Copy the result back to Z ====
845*
846 CALL slamov( 'ALL', jlen, kdu, wv, ldwv,
847 $ z( jrow, incol+1 ), ldz )
848 210 CONTINUE
849 END IF
850 END IF
851 END IF
852 220 CONTINUE
853*
854* ==== Clear out workspaces and return. ====
855*
856 IF( n.GE.5 )
857 $ CALL slaset( 'Lower', n-4, n-4, zero, zero, h(5,1), ldh )
858*
859* ==== End of SLAQR6 ====
860*
861 END
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine slaqr6(job, wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv, wv, ldwv, nh, wh, ldwh)
Definition slaqr6.f:4