ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
slaqr6.f
Go to the documentation of this file.
1  SUBROUTINE slaqr6( JOB, WANTT, WANTZ, KACC22, N, KTOP, KBOT,
2  $ NSHFTS, SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ,
3  $ V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH )
4 *
5 * Contribution from the Department of Computing Science and HPC2N,
6 * Umea University, Sweden
7 *
8 * -- ScaLAPACK auxiliary routine (version 2.0.2) --
9 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10 * May 1 2012
11 *
12  IMPLICIT NONE
13 *
14 * .. Scalar Arguments ..
15  CHARACTER JOB
16  INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
17  $ ldwh, ldwv, ldz, n, nh, nshfts, nv
18  LOGICAL WANTT, WANTZ
19 * ..
20 * .. Array Arguments ..
21  REAL H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
22  $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
23  $ z( ldz, * )
24 * ..
25 *
26 * This auxiliary subroutine called by PSLAQR5 performs a
27 * single small-bulge multi-shift QR sweep, moving the chain
28 * of bulges from top to bottom in the submatrix
29 * H(KTOP:KBOT,KTOP:KBOT), collecting the transformations in the
30 * matrix HV *or* accumulating the transformations in the matrix
31 * Z (see below).
32 *
33 * This is a modified version of DLAQR5 from LAPACK 3.1.
34 *
35 * ======================================================================
36 *
37 * JOB (input) character scalar
38 * Set the kind of job to do in SLAQR6, as follows:
39 * JOB = 'I': Introduce and chase bulges in submatrix
40 * JOB = 'C': Chase bulges from top to bottom of submatrix
41 * JOB = 'O': Chase bulges off submatrix
42 *
43 * WANTT (input) logical scalar
44 * WANTT = .true. if the quasi-triangular Schur factor
45 * is being computed. WANTT is set to .false. otherwise.
46 *
47 * WANTZ (input) logical scalar
48 * WANTZ = .true. if the orthogonal Schur factor is being
49 * computed. WANTZ is set to .false. otherwise.
50 *
51 * KACC22 (input) integer with value 0, 1, or 2.
52 * Specifies the computation mode of far-from-diagonal
53 * orthogonal updates.
54 * = 0: SLAQR6 does not accumulate reflections and does not
55 * use matrix-matrix multiply to update far-from-diagonal
56 * matrix entries.
57 * = 1: SLAQR6 accumulates reflections and uses matrix-matrix
58 * multiply to update the far-from-diagonal matrix entries.
59 * = 2: SLAQR6 accumulates reflections, uses matrix-matrix
60 * multiply to update the far-from-diagonal matrix entries,
61 * and takes advantage of 2-by-2 block structure during
62 * matrix multiplies.
63 *
64 * N (input) integer scalar
65 * N is the order of the Hessenberg matrix H upon which this
66 * subroutine operates.
67 *
68 * KTOP (input) integer scalar
69 * KBOT (input) integer scalar
70 * These are the first and last rows and columns of an
71 * isolated diagonal block upon which the QR sweep is to be
72 * applied. It is assumed without a check that
73 * either KTOP = 1 or H(KTOP,KTOP-1) = 0
74 * and
75 * either KBOT = N or H(KBOT+1,KBOT) = 0.
76 *
77 * NSHFTS (input) integer scalar
78 * NSHFTS gives the number of simultaneous shifts. NSHFTS
79 * must be positive and even.
80 *
81 * SR (input) REAL array of size (NSHFTS)
82 * SI (input) REAL array of size (NSHFTS)
83 * SR contains the real parts and SI contains the imaginary
84 * parts of the NSHFTS shifts of origin that define the
85 * multi-shift QR sweep.
86 *
87 * H (input/output) REAL array of size (LDH,N)
88 * On input H contains a Hessenberg matrix. On output a
89 * multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
90 * to the isolated diagonal block in rows and columns KTOP
91 * through KBOT.
92 *
93 * LDH (input) integer scalar
94 * LDH is the leading dimension of H just as declared in the
95 * calling procedure. LDH.GE.MAX(1,N).
96 *
97 * ILOZ (input) INTEGER
98 * IHIZ (input) INTEGER
99 * Specify the rows of Z to which transformations must be
100 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
101 *
102 * Z (input/output) REAL array of size (LDZ,IHI)
103 * If WANTZ = .TRUE., then the QR Sweep orthogonal
104 * similarity transformation is accumulated into
105 * Z(ILOZ:IHIZ,ILO:IHI) from the right.
106 * If WANTZ = .FALSE., then Z is unreferenced.
107 *
108 * LDZ (input) integer scalar
109 * LDA is the leading dimension of Z just as declared in
110 * the calling procedure. LDZ.GE.N.
111 *
112 * V (workspace) REAL array of size (LDV,NSHFTS/2)
113 *
114 * LDV (input) integer scalar
115 * LDV is the leading dimension of V as declared in the
116 * calling procedure. LDV.GE.3.
117 *
118 * U (workspace) REAL array of size
119 * (LDU,3*NSHFTS-3)
120 *
121 * LDU (input) integer scalar
122 * LDU is the leading dimension of U just as declared in the
123 * in the calling subroutine. LDU.GE.3*NSHFTS-3.
124 *
125 * NH (input) integer scalar
126 * NH is the number of columns in array WH available for
127 * workspace. NH.GE.1 is required for usage of this
128 * workspace, otherwise the updates of the far-from-diagonal
129 * elements will be updated without level 3 BLAS.
130 *
131 * WH (workspace) REAL array of size (LDWH,NH)
132 *
133 * LDWH (input) integer scalar
134 * Leading dimension of WH just as declared in the
135 * calling procedure. LDWH.GE.3*NSHFTS-3.
136 *
137 * NV (input) integer scalar
138 * NV is the number of rows in WV agailable for workspace.
139 * NV.GE.1 is required for usage of this
140 * workspace, otherwise the updates of the far-from-diagonal
141 * elements will be updated without level 3 BLAS.
142 *
143 * WV (workspace) REAL array of size
144 * (LDWV,3*NSHFTS-3)
145 *
146 * LDWV (input) integer scalar
147 * LDWV is the leading dimension of WV as declared in the
148 * in the calling subroutine. LDWV.GE.NV.
149 *
150 *
151 * ================================================================
152 * Based on contributions by
153 * Karen Braman and Ralph Byers, Department of Mathematics,
154 * University of Kansas, USA
155 *
156 * Robert Granat, Department of Computing Science and HPC2N,
157 * Umea University, Sweden
158 *
159 * ============================================================
160 * Reference:
161 *
162 * K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
163 * Algorithm Part I: Maintaining Well Focused Shifts, and
164 * Level 3 Performance, SIAM Journal of Matrix Analysis,
165 * volume 23, pages 929--947, 2002.
166 *
167 * ============================================================
168 * .. Parameters ..
169  REAL ZERO, ONE
170  PARAMETER ( ZERO = 0.0e0, one = 1.0e0 )
171 * ..
172 * .. Local Scalars ..
173  REAL ALPHA, BETA, H11, H12, H21, H22, REFSUM,
174  $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
175  $ ulp
176  INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
177  $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
178  $ m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,
179  $ ns, nu, sincol, eincol, uincol, iphv, chunk,
180  $ threads, jlen2, jcol2, gchunk, jrow2, maxchunk
181  LOGICAL ACCUM, BLK22, BMP22, INTRO, CHASE, OFF, ALL
182 * ..
183 * .. External Functions ..
184  LOGICAL LSAME
185  INTEGER PILAENVX
186  REAL SLAMCH
187  EXTERNAL lsame, slamch, pilaenvx
188 * ..
189 * .. Intrinsic Functions ..
190 *
191  INTRINSIC abs, float, max, min, mod
192 * ..
193 * .. Local Arrays ..
194  REAL VT( 3 )
195 * ..
196 * .. External Subroutines ..
197  EXTERNAL sgemm, slabad, slamov, slaqr1, slarfg, slaset,
198  $ strmm
199 * ..
200 * .. Executable Statements ..
201 *
202 * ==== If there are no shifts, then there is nothing to do. ====
203 *
204  IF( nshfts.LT.2 )
205  $ RETURN
206 *
207 * ==== If the active block is empty or 1-by-1, then there
208 * . is nothing to do. ====
209 *
210  IF( ktop.GE.kbot )
211  $ RETURN
212  threads = 1
213 *
214 * ==== Shuffle shifts into pairs of real shifts and pairs
215 * . of complex conjugate shifts assuming complex
216 * . conjugate shifts are already adjacent to one
217 * . another. ====
218 *
219  DO 10 i = 1, nshfts - 2, 2
220  IF( si( i ).NE.-si( i+1 ) ) THEN
221 *
222  swap = sr( i )
223  sr( i ) = sr( i+1 )
224  sr( i+1 ) = sr( i+2 )
225  sr( i+2 ) = swap
226 *
227  swap = si( i )
228  si( i ) = si( i+1 )
229  si( i+1 ) = si( i+2 )
230  si( i+2 ) = swap
231  END IF
232  10 CONTINUE
233 *
234 * ==== NSHFTS is supposed to be even, but if it is odd,
235 * . then simply reduce it by one. The shuffle above
236 * . ensures that the dropped shift is real and that
237 * . the remaining shifts are paired. ====
238 *
239  ns = nshfts - mod( nshfts, 2 )
240 *
241 * ==== Machine constants for deflation ====
242 *
243  safmin = slamch( 'SAFE MINIMUM' )
244  safmax = one / safmin
245  CALL slabad( safmin, safmax )
246  ulp = slamch( 'PRECISION' )
247  smlnum = safmin*( float( n ) / ulp )
248 *
249 * ==== Use accumulated reflections to update far-from-diagonal
250 * . entries ? This is only performed if both NH and NV is
251 * greater than 1. ====
252 *
253  accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
254  accum = accum .AND. nh.GE.1 .AND. nv.GE.1
255 *
256 * ==== If so, exploit the 2-by-2 block structure? ====
257 *
258  blk22 = ( ns.GT.2 ) .AND. ( kacc22.EQ.2 )
259 *
260 * ==== Decode JOB ====
261 *
262  all = lsame( job, 'A' )
263  IF( .NOT. all )
264  $ intro = lsame( job, 'I' )
265  IF( .NOT. all .AND. .NOT. intro )
266  $ chase = lsame( job, 'C' )
267  IF( .NOT. all .AND. .NOT. intro .AND. .NOT. chase ) THEN
268  off = lsame( job, 'O' )
269  IF( .NOT. off )
270  $ RETURN
271  END IF
272 *
273 * ==== clear trash ====
274 *
275  IF( intro.OR.all .AND. ktop+2.LE.kbot )
276  $ h( ktop+2, ktop ) = zero
277 *
278 * ==== NBMPS = number of 2-shift bulges in the chain ====
279 *
280  nbmps = ns / 2
281 *
282 * ==== KDU = width of slab ====
283 *
284  kdu = 6*nbmps - 3
285 *
286 * Set loop limits for bulge-chasing depending on working mode
287 *
288  IF( all ) THEN
289  sincol = 3*( 1-nbmps ) + ktop - 1
290  eincol = kbot - 2
291  uincol = 3*nbmps - 2
292  ELSEIF( intro ) THEN
293  sincol = 3*( 1-nbmps ) + ktop - 1
294  eincol = kbot - 3*nbmps - 1
295  uincol = 3*nbmps - 2
296  ELSEIF( chase ) THEN
297  sincol = ktop
298  eincol = kbot - 3*nbmps - 1
299  uincol = 3*nbmps - 2
300  ELSEIF( off ) THEN
301  sincol = ktop
302  eincol = kbot - 2
303  uincol = 3*nbmps - 2
304  END IF
305  iphv = 0
306 *
307 * ==== Create and/or chase chains of NBMPS bulges ====
308 *
309  DO 220 incol = sincol, eincol, uincol
310  ndcol = min( incol + kdu, eincol )
311  IF( accum )
312  $ CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )
313 *
314 * ==== Near-the-diagonal bulge chase. The following loop
315 * . performs the near-the-diagonal part of a small bulge
316 * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
317 * . chunk extends from column INCOL to column NDCOL
318 * . (including both column INCOL and column NDCOL). The
319 * . following loop chases a 3*NBMPS column long chain of
320 * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
321 * . may be less than KTOP and and NDCOL may be greater than
322 * . KBOT indicating phantom columns from which to chase
323 * . bulges before they are actually introduced or to which
324 * . to chase bulges beyond column KBOT.) ====
325 *
326  DO 150 krcol = incol, min( eincol, incol+3*nbmps-3, kbot-2 )
327 *
328 * ==== Bulges number MTOP to MBOT are active double implicit
329 * . shift bulges. There may or may not also be small
330 * . 2-by-2 bulge, if there is room. The inactive bulges
331 * . (if any) must wait until the active bulges have moved
332 * . down the diagonal to make room. The phantom matrix
333 * . paradigm described above helps keep track. ====
334 *
335  mtop = max( 1, ( ( ktop-1 )-krcol+2 ) / 3+1 )
336  mbot = min( nbmps, ( kbot-krcol ) / 3 )
337  m22 = mbot + 1
338  bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+3*( m22-1 ) ).EQ.
339  $ ( kbot-2 )
340 *
341 * ==== Generate reflections to chase the chain right
342 * . one column. (The minimum value of K is KTOP-1.) ====
343 *
344  DO 20 m = mtop, mbot
345  k = krcol + 3*( m-1 )
346  IF( k.EQ.ktop-1 ) THEN
347  CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
348  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
349  $ v( 1, m ) )
350  alpha = v( 1, m )
351  CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
352  ELSE
353  beta = h( k+1, k )
354  v( 2, m ) = h( k+2, k )
355  v( 3, m ) = h( k+3, k )
356  CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
357 *
358 * ==== A Bulge may collapse because of vigilant
359 * . deflation or destructive underflow. In the
360 * . underflow case, try the two-small-subdiagonals
361 * . trick to try to reinflate the bulge. ====
362 *
363  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
364  $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
365 *
366 * ==== Typical case: not collapsed (yet). ====
367 *
368  h( k+1, k ) = beta
369  h( k+2, k ) = zero
370  h( k+3, k ) = zero
371  ELSE
372 *
373 * ==== Atypical case: collapsed. Attempt to
374 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
375 * . If the fill resulting from the new
376 * . reflector is too large, then abandon it.
377 * . Otherwise, use the new one. ====
378 *
379  CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
380  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
381  $ vt )
382  alpha = vt( 1 )
383  CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
384  refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
385  $ h( k+2, k ) )
386 *
387  IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
388  $ abs( refsum*vt( 3 ) ).GT.ulp*
389  $ ( abs( h( k, k ) )+abs( h( k+1,
390  $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
391 *
392 * ==== Starting a new bulge here would
393 * . create non-negligible fill. Use
394 * . the old one with trepidation. ====
395 *
396  h( k+1, k ) = beta
397  h( k+2, k ) = zero
398  h( k+3, k ) = zero
399  ELSE
400 *
401 * ==== Stating a new bulge here would
402 * . create only negligible fill.
403 * . Replace the old reflector with
404 * . the new one. ====
405 *
406  h( k+1, k ) = h( k+1, k ) - refsum
407  h( k+2, k ) = zero
408  h( k+3, k ) = zero
409  v( 1, m ) = vt( 1 )
410  v( 2, m ) = vt( 2 )
411  v( 3, m ) = vt( 3 )
412  END IF
413  END IF
414  END IF
415  20 CONTINUE
416 *
417 * ==== Generate a 2-by-2 reflection, if needed. ====
418 *
419  k = krcol + 3*( m22-1 )
420  IF( bmp22 ) THEN
421  IF( k.EQ.ktop-1 ) THEN
422  CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
423  $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
424  $ v( 1, m22 ) )
425  beta = v( 1, m22 )
426  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
427  ELSE
428  beta = h( k+1, k )
429  v( 2, m22 ) = h( k+2, k )
430  CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
431  h( k+1, k ) = beta
432  h( k+2, k ) = zero
433  END IF
434  ELSE
435 *
436 * ==== Initialize V(1,M22) here to avoid possible undefined
437 * . variable problems later. ====
438 *
439  v( 1, m22 ) = zero
440  END IF
441 *
442 * ==== Multiply H by reflections from the left ====
443 *
444  IF( accum ) THEN
445  jbot = min( max(incol+kdu,ndcol), kbot )
446  ELSE IF( wantt ) THEN
447  jbot = n
448  ELSE
449  jbot = kbot
450  END IF
451  DO 40 j = max( ktop, krcol ), jbot
452  mend = min( mbot, ( j-krcol+2 ) / 3 )
453  DO 30 m = mtop, mend
454  k = krcol + 3*( m-1 )
455  refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*
456  $ h( k+2, j )+v( 3, m )*h( k+3, j ) )
457  h( k+1, j ) = h( k+1, j ) - refsum
458  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )
459  h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )
460  30 CONTINUE
461  40 CONTINUE
462  IF( bmp22 ) THEN
463  k = krcol + 3*( m22-1 )
464  DO 50 j = max( k+1, ktop ), jbot
465  refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*
466  $ h( k+2, j ) )
467  h( k+1, j ) = h( k+1, j ) - refsum
468  h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )
469  50 CONTINUE
470  END IF
471 *
472 * ==== Multiply H by reflections from the right.
473 * . Delay filling in the last row until the
474 * . vigilant deflation check is complete. ====
475 *
476  IF( accum ) THEN
477  jtop = max( ktop, incol )
478  ELSE IF( wantt ) THEN
479  jtop = 1
480  ELSE
481  jtop = ktop
482  END IF
483  DO 90 m = mtop, mbot
484  IF( v( 1, m ).NE.zero ) THEN
485  k = krcol + 3*( m-1 )
486  DO 60 j = jtop, min( kbot, k+3 )
487  refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*
488  $ h( j, k+2 )+v( 3, m )*h( j, k+3 ) )
489  h( j, k+1 ) = h( j, k+1 ) - refsum
490  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )
491  h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )
492  60 CONTINUE
493 *
494  IF( accum ) THEN
495 *
496 * ==== Accumulate U. (If necessary, update Z later
497 * . with with an efficient matrix-matrix
498 * . multiply.) ====
499 *
500  kms = k - incol
501  DO 70 j = max( 1, ktop-incol ), kdu
502  refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*
503  $ u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )
504  u( j, kms+1 ) = u( j, kms+1 ) - refsum
505  u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )
506  u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )
507  70 CONTINUE
508  ELSE IF( wantz ) THEN
509 *
510 * ==== U is not accumulated, so update Z
511 * . now by multiplying by reflections
512 * . from the right. ====
513 *
514  DO 80 j = iloz, ihiz
515  refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*
516  $ z( j, k+2 )+v( 3, m )*z( j, k+3 ) )
517  z( j, k+1 ) = z( j, k+1 ) - refsum
518  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )
519  z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )
520  80 CONTINUE
521  END IF
522  END IF
523  90 CONTINUE
524 *
525 * ==== Special case: 2-by-2 reflection (if needed) ====
526 *
527  k = krcol + 3*( m22-1 )
528  IF( bmp22 ) THEN
529  IF( v( 1, m22 ).NE.zero ) THEN
530  DO 100 j = jtop, min( kbot, k+3 )
531  refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*
532  $ h( j, k+2 ) )
533  h( j, k+1 ) = h( j, k+1 ) - refsum
534  h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )
535  100 CONTINUE
536 *
537  IF( accum ) THEN
538  kms = k - incol
539  DO 110 j = max( 1, ktop-incol ), kdu
540  refsum = v( 1, m22 )*( u( j, kms+1 ) +
541  $ v( 2, m22 )*u( j, kms+2 ) )
542  u( j, kms+1 ) = u( j, kms+1 ) - refsum
543  u( j, kms+2 ) = u( j, kms+2 ) -
544  $ refsum*v( 2, m22 )
545  110 CONTINUE
546  ELSE IF( wantz ) THEN
547  DO 120 j = iloz, ihiz
548  refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*
549  $ z( j, k+2 ) )
550  z( j, k+1 ) = z( j, k+1 ) - refsum
551  z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )
552  120 CONTINUE
553  END IF
554  END IF
555  END IF
556 *
557 * ==== Vigilant deflation check ====
558 *
559  mstart = mtop
560  IF( krcol+3*( mstart-1 ).LT.ktop )
561  $ mstart = mstart + 1
562  mend = mbot
563  IF( bmp22 )
564  $ mend = mend + 1
565  IF( krcol.EQ.kbot-2 )
566  $ mend = mend + 1
567  DO 130 m = mstart, mend
568  k = min( kbot-1, krcol+3*( m-1 ) )
569 *
570 * ==== The following convergence test requires that
571 * . the tradition small-compared-to-nearby-diagonals
572 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
573 * . criteria both be satisfied. The latter improves
574 * . accuracy in some examples. Falling back on an
575 * . alternate convergence criterion when TST1 or TST2
576 * . is zero (as done here) is traditional but probably
577 * . unnecessary. ====
578 *
579  IF( h( k+1, k ).NE.zero ) THEN
580  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
581  IF( tst1.EQ.zero ) THEN
582  IF( k.GE.ktop+1 )
583  $ tst1 = tst1 + abs( h( k, k-1 ) )
584  IF( k.GE.ktop+2 )
585  $ tst1 = tst1 + abs( h( k, k-2 ) )
586  IF( k.GE.ktop+3 )
587  $ tst1 = tst1 + abs( h( k, k-3 ) )
588  IF( k.LE.kbot-2 )
589  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
590  IF( k.LE.kbot-3 )
591  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
592  IF( k.LE.kbot-4 )
593  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
594  END IF
595  IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
596  $ THEN
597  h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
598  h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
599  h11 = max( abs( h( k+1, k+1 ) ),
600  $ abs( h( k, k )-h( k+1, k+1 ) ) )
601  h22 = min( abs( h( k+1, k+1 ) ),
602  $ abs( h( k, k )-h( k+1, k+1 ) ) )
603  scl = h11 + h12
604  tst2 = h22*( h11 / scl )
605 *
606  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
607  $ max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero
608  END IF
609  END IF
610  130 CONTINUE
611 *
612 * ==== Fill in the last row of each bulge. ====
613 *
614  mend = min( nbmps, ( kbot-krcol-1 ) / 3 )
615  DO 140 m = mtop, mend
616  k = krcol + 3*( m-1 )
617  refsum = v( 1, m )*v( 3, m )*h( k+4, k+3 )
618  h( k+4, k+1 ) = -refsum
619  h( k+4, k+2 ) = -refsum*v( 2, m )
620  h( k+4, k+3 ) = h( k+4, k+3 ) - refsum*v( 3, m )
621  140 CONTINUE
622 *
623 * ==== End of near-the-diagonal bulge chase. ====
624 *
625  150 CONTINUE
626 *
627 * ==== Use U (if accumulated) to update far-from-diagonal
628 * . entries in H. If required, use U to update Z as
629 * . well. ====
630 *
631  IF( accum ) THEN
632  IF( wantt ) THEN
633  jtop = 1
634  jbot = n
635  ELSE
636  jtop = ktop
637  jbot = kbot
638  END IF
639  k1 = max( 1, ktop-incol )
640  nu = ( kdu-max( 0, max(incol+kdu,ndcol)-kbot ) ) - k1 + 1
641  IF( ( .NOT.blk22 ) .OR. ( incol.LT.ktop ) .OR.
642  $ ( ndcol.GT.kbot ) .OR. ( ns.LE.2 ) .OR.
643  $ nu.LT.kdu ) THEN
644 *
645 * ==== Updates not exploiting the 2-by-2 block
646 * . structure of U. K1 and NU keep track of
647 * . the location and size of U in the special
648 * . cases of introducing bulges and chasing
649 * . bulges off the bottom. In these special
650 * . cases and in case the number of shifts
651 * . is NS = 2, there is no 2-by-2 block
652 * . structure to exploit. ====
653 *
654 * ==== Horizontal Multiply ====
655 *
656  DO 160 jcol = min(max(incol+kdu,ndcol),kbot)+ 1, jbot, nh
657  jlen = min( nh, jbot-jcol+1 )
658  CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
659  $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
660  $ ldwh )
661  CALL slamov( 'ALL', nu, jlen, wh, ldwh,
662  $ h( incol+k1, jcol ), ldh )
663  160 CONTINUE
664 *
665 * ==== Vertical multiply ====
666 *
667  DO 170 jrow = jtop, max( ktop, incol ) - 1, nv
668  jlen = min( nv, max( ktop, incol )-jrow )
669  CALL sgemm( 'N', 'N', jlen, nu, nu, one,
670  $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
671  $ ldu, zero, wv, ldwv )
672  CALL slamov( 'ALL', jlen, nu, wv, ldwv,
673  $ h( jrow, incol+k1 ), ldh )
674  170 CONTINUE
675 *
676 * ==== Z multiply (also vertical) ====
677 *
678  IF( wantz ) THEN
679  DO 180 jrow = iloz, ihiz, nv
680  jlen = min( nv, ihiz-jrow+1 )
681  CALL sgemm( 'N', 'N', jlen, nu, nu, one,
682  $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
683  $ ldu, zero, wv, ldwv )
684  CALL slamov( 'ALL', jlen, nu, wv, ldwv,
685  $ z( jrow, incol+k1 ), ldz )
686  180 CONTINUE
687  END IF
688  ELSE
689 *
690 * ==== Updates exploiting U's 2-by-2 block structure.
691 * . (I2, I4, J2, J4 are the last rows and columns
692 * . of the blocks.) ====
693 *
694  i2 = ( kdu+1 ) / 2
695  i4 = kdu
696  j2 = i4 - i2
697  j4 = kdu
698 *
699 * ==== KZS and KNZ deal with the band of zeros
700 * . along the diagonal of one of the triangular
701 * . blocks. ====
702 *
703  kzs = ( j4-j2 ) - ( ns+1 )
704  knz = ns + 1
705 *
706 * ==== Horizontal multiply ====
707 *
708  DO 190 jcol = min(max(incol+kdu,ndcol),kbot)+ 1, jbot, nh
709  jlen = min( nh, jbot-jcol+1 )
710 *
711 * ==== Copy bottom of H to top+KZS of scratch ====
712 * (The first KZS rows get multiplied by zero.) ====
713 *
714  CALL slamov( 'ALL', knz, jlen, h( incol+1+j2, jcol ),
715  $ ldh, wh( kzs+1, 1 ), ldwh )
716  CALL slaset( 'ALL', kzs, jlen, zero, zero, wh, ldwh )
717 *
718 * ==== Multiply by U21' ====
719 *
720  CALL strmm( 'L', 'U', 'C', 'N', knz, jlen, one,
721  $ u( j2+1, 1+kzs ), ldu, wh( kzs+1, 1 ),
722  $ ldwh )
723 *
724 * ==== Multiply top of H by U11' ====
725 *
726  CALL sgemm( 'C', 'N', i2, jlen, j2, one, u, ldu,
727  $ h( incol+1, jcol ), ldh, one, wh, ldwh )
728 *
729 * ==== Copy top of H to bottom of WH ====
730 *
731  CALL slamov( 'ALL', j2, jlen, h( incol+1, jcol ), ldh,
732  $ wh( i2+1, 1 ), ldwh )
733 *
734 * ==== Multiply by U21' ====
735 *
736  CALL strmm( 'L', 'L', 'C', 'N', j2, jlen, one,
737  $ u( 1, i2+1 ), ldu, wh( i2+1, 1 ), ldwh )
738 *
739 * ==== Multiply by U22 ====
740 *
741  CALL sgemm( 'C', 'N', i4-i2, jlen, j4-j2, one,
742  $ u( j2+1, i2+1 ), ldu,
743  $ h( incol+1+j2, jcol ), ldh, one,
744  $ wh( i2+1, 1 ), ldwh )
745 *
746 * ==== Copy it back ====
747 *
748  CALL slamov( 'ALL', kdu, jlen, wh, ldwh,
749  $ h( incol+1, jcol ), ldh )
750  190 CONTINUE
751 *
752 * ==== Vertical multiply ====
753 *
754  DO 200 jrow = jtop, max( incol, ktop ) - 1, nv
755  jlen = min( nv, max( incol, ktop )-jrow )
756 *
757 * ==== Copy right of H to scratch (the first KZS
758 * . columns get multiplied by zero) ====
759 *
760  CALL slamov( 'ALL', jlen, knz, h( jrow, incol+1+j2 ),
761  $ ldh, wv( 1, 1+kzs ), ldwv )
762  CALL slaset( 'ALL', jlen, kzs, zero, zero, wv, ldwv )
763 *
764 * ==== Multiply by U21 ====
765 *
766  CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,
767  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
768  $ ldwv )
769 *
770 * ==== Multiply by U11 ====
771 *
772  CALL sgemm( 'N', 'N', jlen, i2, j2, one,
773  $ h( jrow, incol+1 ), ldh, u, ldu, one, wv,
774  $ ldwv )
775 *
776 * ==== Copy left of H to right of scratch ====
777 *
778  CALL slamov( 'ALL', jlen, j2, h( jrow, incol+1 ), ldh,
779  $ wv( 1, 1+i2 ), ldwv )
780 *
781 * ==== Multiply by U21 ====
782 *
783  CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
784  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ), ldwv )
785 *
786 * ==== Multiply by U22 ====
787 *
788  CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
789  $ h( jrow, incol+1+j2 ), ldh,
790  $ u( j2+1, i2+1 ), ldu, one, wv( 1, 1+i2 ),
791  $ ldwv )
792 *
793 * ==== Copy it back ====
794 *
795  CALL slamov( 'ALL', jlen, kdu, wv, ldwv,
796  $ h( jrow, incol+1 ), ldh )
797  200 CONTINUE
798 *
799 * ==== Multiply Z (also vertical) ====
800 *
801  IF( wantz ) THEN
802  DO 210 jrow = iloz, ihiz, nv
803  jlen = min( nv, ihiz-jrow+1 )
804 *
805 * ==== Copy right of Z to left of scratch (first
806 * . KZS columns get multiplied by zero) ====
807 *
808  CALL slamov( 'ALL', jlen, knz,
809  $ z( jrow, incol+1+j2 ), ldz,
810  $ wv( 1, 1+kzs ), ldwv )
811 *
812 * ==== Multiply by U12 ====
813 *
814  CALL slaset( 'ALL', jlen, kzs, zero, zero, wv,
815  $ ldwv )
816  CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,
817  $ u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),
818  $ ldwv )
819 *
820 * ==== Multiply by U11 ====
821 *
822  CALL sgemm( 'N', 'N', jlen, i2, j2, one,
823  $ z( jrow, incol+1 ), ldz, u, ldu, one,
824  $ wv, ldwv )
825 *
826 * ==== Copy left of Z to right of scratch ====
827 *
828  CALL slamov( 'ALL', jlen, j2, z( jrow, incol+1 ),
829  $ ldz, wv( 1, 1+i2 ), ldwv )
830 *
831 * ==== Multiply by U21 ====
832 *
833  CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,
834  $ u( 1, i2+1 ), ldu, wv( 1, 1+i2 ),
835  $ ldwv )
836 *
837 * ==== Multiply by U22 ====
838 *
839  CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,
840  $ z( jrow, incol+1+j2 ), ldz,
841  $ u( j2+1, i2+1 ), ldu, one,
842  $ wv( 1, 1+i2 ), ldwv )
843 *
844 * ==== Copy the result back to Z ====
845 *
846  CALL slamov( 'ALL', jlen, kdu, wv, ldwv,
847  $ z( jrow, incol+1 ), ldz )
848  210 CONTINUE
849  END IF
850  END IF
851  END IF
852  220 CONTINUE
853 *
854 * ==== Clear out workspaces and return. ====
855 *
856  IF( n.GE.5 )
857  $ CALL slaset( 'Lower', n-4, n-4, zero, zero, h(5,1), ldh )
858 *
859 * ==== End of SLAQR6 ====
860 *
861  END
max
#define max(A, B)
Definition: pcgemr.c:180
slaqr6
subroutine slaqr6(JOB, WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH)
Definition: slaqr6.f:4
min
#define min(A, B)
Definition: pcgemr.c:181