001:       SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
002:      $                   SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
003:      $                   LDU, NV, WV, LDWV, NH, WH, LDWH )
004: *
005: *  -- LAPACK auxiliary routine (version 3.2) --
006: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
007: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
008: *     November 2006
009: *
010: *     .. Scalar Arguments ..
011:       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
012:      $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
013:       LOGICAL            WANTT, WANTZ
014: *     ..
015: *     .. Array Arguments ..
016:       REAL               H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
017:      $                   V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
018:      $                   Z( LDZ, * )
019: *     ..
020: *
021: *     This auxiliary subroutine called by SLAQR0 performs a
022: *     single small-bulge multi-shift QR sweep.
023: *
024: *      WANTT  (input) logical scalar
025: *             WANTT = .true. if the quasi-triangular Schur factor
026: *             is being computed.  WANTT is set to .false. otherwise.
027: *
028: *      WANTZ  (input) logical scalar
029: *             WANTZ = .true. if the orthogonal Schur factor is being
030: *             computed.  WANTZ is set to .false. otherwise.
031: *
032: *      KACC22 (input) integer with value 0, 1, or 2.
033: *             Specifies the computation mode of far-from-diagonal
034: *             orthogonal updates.
035: *        = 0: SLAQR5 does not accumulate reflections and does not
036: *             use matrix-matrix multiply to update far-from-diagonal
037: *             matrix entries.
038: *        = 1: SLAQR5 accumulates reflections and uses matrix-matrix
039: *             multiply to update the far-from-diagonal matrix entries.
040: *        = 2: SLAQR5 accumulates reflections, uses matrix-matrix
041: *             multiply to update the far-from-diagonal matrix entries,
042: *             and takes advantage of 2-by-2 block structure during
043: *             matrix multiplies.
044: *
045: *      N      (input) integer scalar
046: *             N is the order of the Hessenberg matrix H upon which this
047: *             subroutine operates.
048: *
049: *      KTOP   (input) integer scalar
050: *      KBOT   (input) integer scalar
051: *             These are the first and last rows and columns of an
052: *             isolated diagonal block upon which the QR sweep is to be
053: *             applied. It is assumed without a check that
054: *                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
055: *             and
056: *                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
057: *
058: *      NSHFTS (input) integer scalar
059: *             NSHFTS gives the number of simultaneous shifts.  NSHFTS
060: *             must be positive and even.
061: *
062: *      SR     (input/output) REAL array of size (NSHFTS)
063: *      SI     (input/output) REAL array of size (NSHFTS)
064: *             SR contains the real parts and SI contains the imaginary
065: *             parts of the NSHFTS shifts of origin that define the
066: *             multi-shift QR sweep.  On output SR and SI may be
067: *             reordered.
068: *
069: *      H      (input/output) REAL array of size (LDH,N)
070: *             On input H contains a Hessenberg matrix.  On output a
071: *             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
072: *             to the isolated diagonal block in rows and columns KTOP
073: *             through KBOT.
074: *
075: *      LDH    (input) integer scalar
076: *             LDH is the leading dimension of H just as declared in the
077: *             calling procedure.  LDH.GE.MAX(1,N).
078: *
079: *      ILOZ   (input) INTEGER
080: *      IHIZ   (input) INTEGER
081: *             Specify the rows of Z to which transformations must be
082: *             applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
083: *
084: *      Z      (input/output) REAL array of size (LDZ,IHI)
085: *             If WANTZ = .TRUE., then the QR Sweep orthogonal
086: *             similarity transformation is accumulated into
087: *             Z(ILOZ:IHIZ,ILO:IHI) from the right.
088: *             If WANTZ = .FALSE., then Z is unreferenced.
089: *
090: *      LDZ    (input) integer scalar
091: *             LDA is the leading dimension of Z just as declared in
092: *             the calling procedure. LDZ.GE.N.
093: *
094: *      V      (workspace) REAL array of size (LDV,NSHFTS/2)
095: *
096: *      LDV    (input) integer scalar
097: *             LDV is the leading dimension of V as declared in the
098: *             calling procedure.  LDV.GE.3.
099: *
100: *      U      (workspace) REAL array of size
101: *             (LDU,3*NSHFTS-3)
102: *
103: *      LDU    (input) integer scalar
104: *             LDU is the leading dimension of U just as declared in the
105: *             in the calling subroutine.  LDU.GE.3*NSHFTS-3.
106: *
107: *      NH     (input) integer scalar
108: *             NH is the number of columns in array WH available for
109: *             workspace. NH.GE.1.
110: *
111: *      WH     (workspace) REAL array of size (LDWH,NH)
112: *
113: *      LDWH   (input) integer scalar
114: *             Leading dimension of WH just as declared in the
115: *             calling procedure.  LDWH.GE.3*NSHFTS-3.
116: *
117: *      NV     (input) integer scalar
118: *             NV is the number of rows in WV agailable for workspace.
119: *             NV.GE.1.
120: *
121: *      WV     (workspace) REAL array of size
122: *             (LDWV,3*NSHFTS-3)
123: *
124: *      LDWV   (input) integer scalar
125: *             LDWV is the leading dimension of WV as declared in the
126: *             in the calling subroutine.  LDWV.GE.NV.
127: *
128: *     ================================================================
129: *     Based on contributions by
130: *        Karen Braman and Ralph Byers, Department of Mathematics,
131: *        University of Kansas, USA
132: *
133: *     ================================================================
134: *     Reference:
135: *
136: *     K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
137: *     Algorithm Part I: Maintaining Well Focused Shifts, and
138: *     Level 3 Performance, SIAM Journal of Matrix Analysis,
139: *     volume 23, pages 929--947, 2002.
140: *
141: *     ================================================================
142: *     .. Parameters ..
143:       REAL               ZERO, ONE
144:       PARAMETER          ( ZERO = 0.0e0, ONE = 1.0e0 )
145: *     ..
146: *     .. Local Scalars ..
147:       REAL               ALPHA, BETA, H11, H12, H21, H22, REFSUM,
148:      $                   SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
149:      $                   ULP
150:       INTEGER            I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
151:      $                   JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
152:      $                   M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
153:      $                   NS, NU
154:       LOGICAL            ACCUM, BLK22, BMP22
155: *     ..
156: *     .. External Functions ..
157:       REAL               SLAMCH
158:       EXTERNAL           SLAMCH
159: *     ..
160: *     .. Intrinsic Functions ..
161: *
162:       INTRINSIC          ABS, MAX, MIN, MOD, REAL
163: *     ..
164: *     .. Local Arrays ..
165:       REAL               VT( 3 )
166: *     ..
167: *     .. External Subroutines ..
168:       EXTERNAL           SGEMM, SLABAD, SLACPY, SLAQR1, SLARFG, SLASET,
169:      $                   STRMM
170: *     ..
171: *     .. Executable Statements ..
172: *
173: *     ==== If there are no shifts, then there is nothing to do. ====
174: *
175:       IF( NSHFTS.LT.2 )
176:      $   RETURN
177: *
178: *     ==== If the active block is empty or 1-by-1, then there
179: *     .    is nothing to do. ====
180: *
181:       IF( KTOP.GE.KBOT )
182:      $   RETURN
183: *
184: *     ==== Shuffle shifts into pairs of real shifts and pairs
185: *     .    of complex conjugate shifts assuming complex
186: *     .    conjugate shifts are already adjacent to one
187: *     .    another. ====
188: *
189:       DO 10 I = 1, NSHFTS - 2, 2
190:          IF( SI( I ).NE.-SI( I+1 ) ) THEN
191: *
192:             SWAP = SR( I )
193:             SR( I ) = SR( I+1 )
194:             SR( I+1 ) = SR( I+2 )
195:             SR( I+2 ) = SWAP
196: *
197:             SWAP = SI( I )
198:             SI( I ) = SI( I+1 )
199:             SI( I+1 ) = SI( I+2 )
200:             SI( I+2 ) = SWAP
201:          END IF
202:    10 CONTINUE
203: *
204: *     ==== NSHFTS is supposed to be even, but if it is odd,
205: *     .    then simply reduce it by one.  The shuffle above
206: *     .    ensures that the dropped shift is real and that
207: *     .    the remaining shifts are paired. ====
208: *
209:       NS = NSHFTS - MOD( NSHFTS, 2 )
210: *
211: *     ==== Machine constants for deflation ====
212: *
213:       SAFMIN = SLAMCH( 'SAFE MINIMUM' )
214:       SAFMAX = ONE / SAFMIN
215:       CALL SLABAD( SAFMIN, SAFMAX )
216:       ULP = SLAMCH( 'PRECISION' )
217:       SMLNUM = SAFMIN*( REAL( N ) / ULP )
218: *
219: *     ==== Use accumulated reflections to update far-from-diagonal
220: *     .    entries ? ====
221: *
222:       ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
223: *
224: *     ==== If so, exploit the 2-by-2 block structure? ====
225: *
226:       BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
227: *
228: *     ==== clear trash ====
229: *
230:       IF( KTOP+2.LE.KBOT )
231:      $   H( KTOP+2, KTOP ) = ZERO
232: *
233: *     ==== NBMPS = number of 2-shift bulges in the chain ====
234: *
235:       NBMPS = NS / 2
236: *
237: *     ==== KDU = width of slab ====
238: *
239:       KDU = 6*NBMPS - 3
240: *
241: *     ==== Create and chase chains of NBMPS bulges ====
242: *
243:       DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
244:          NDCOL = INCOL + KDU
245:          IF( ACCUM )
246:      $      CALL SLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
247: *
248: *        ==== Near-the-diagonal bulge chase.  The following loop
249: *        .    performs the near-the-diagonal part of a small bulge
250: *        .    multi-shift QR sweep.  Each 6*NBMPS-2 column diagonal
251: *        .    chunk extends from column INCOL to column NDCOL
252: *        .    (including both column INCOL and column NDCOL). The
253: *        .    following loop chases a 3*NBMPS column long chain of
254: *        .    NBMPS bulges 3*NBMPS-2 columns to the right.  (INCOL
255: *        .    may be less than KTOP and and NDCOL may be greater than
256: *        .    KBOT indicating phantom columns from which to chase
257: *        .    bulges before they are actually introduced or to which
258: *        .    to chase bulges beyond column KBOT.)  ====
259: *
260:          DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
261: *
262: *           ==== Bulges number MTOP to MBOT are active double implicit
263: *           .    shift bulges.  There may or may not also be small
264: *           .    2-by-2 bulge, if there is room.  The inactive bulges
265: *           .    (if any) must wait until the active bulges have moved
266: *           .    down the diagonal to make room.  The phantom matrix
267: *           .    paradigm described above helps keep track.  ====
268: *
269:             MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
270:             MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
271:             M22 = MBOT + 1
272:             BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
273:      $              ( KBOT-2 )
274: *
275: *           ==== Generate reflections to chase the chain right
276: *           .    one column.  (The minimum value of K is KTOP-1.) ====
277: *
278:             DO 20 M = MTOP, MBOT
279:                K = KRCOL + 3*( M-1 )
280:                IF( K.EQ.KTOP-1 ) THEN
281:                   CALL SLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ),
282:      $                         SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
283:      $                         V( 1, M ) )
284:                   ALPHA = V( 1, M )
285:                   CALL SLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
286:                ELSE
287:                   BETA = H( K+1, K )
288:                   V( 2, M ) = H( K+2, K )
289:                   V( 3, M ) = H( K+3, K )
290:                   CALL SLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
291: *
292: *                 ==== A Bulge may collapse because of vigilant
293: *                 .    deflation or destructive underflow.  In the
294: *                 .    underflow case, try the two-small-subdiagonals
295: *                 .    trick to try to reinflate the bulge.  ====
296: *
297:                   IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
298:      $                ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
299: *
300: *                    ==== Typical case: not collapsed (yet). ====
301: *
302:                      H( K+1, K ) = BETA
303:                      H( K+2, K ) = ZERO
304:                      H( K+3, K ) = ZERO
305:                   ELSE
306: *
307: *                    ==== Atypical case: collapsed.  Attempt to
308: *                    .    reintroduce ignoring H(K+1,K) and H(K+2,K).
309: *                    .    If the fill resulting from the new
310: *                    .    reflector is too large, then abandon it.
311: *                    .    Otherwise, use the new one. ====
312: *
313:                      CALL SLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ),
314:      $                            SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
315:      $                            VT )
316:                      ALPHA = VT( 1 )
317:                      CALL SLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
318:                      REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )*
319:      $                        H( K+2, K ) )
320: *
321:                      IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+
322:      $                   ABS( REFSUM*VT( 3 ) ).GT.ULP*
323:      $                   ( ABS( H( K, K ) )+ABS( H( K+1,
324:      $                   K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN
325: *
326: *                       ==== Starting a new bulge here would
327: *                       .    create non-negligible fill.  Use
328: *                       .    the old one with trepidation. ====
329: *
330:                         H( K+1, K ) = BETA
331:                         H( K+2, K ) = ZERO
332:                         H( K+3, K ) = ZERO
333:                      ELSE
334: *
335: *                       ==== Stating a new bulge here would
336: *                       .    create only negligible fill.
337: *                       .    Replace the old reflector with
338: *                       .    the new one. ====
339: *
340:                         H( K+1, K ) = H( K+1, K ) - REFSUM
341:                         H( K+2, K ) = ZERO
342:                         H( K+3, K ) = ZERO
343:                         V( 1, M ) = VT( 1 )
344:                         V( 2, M ) = VT( 2 )
345:                         V( 3, M ) = VT( 3 )
346:                      END IF
347:                   END IF
348:                END IF
349:    20       CONTINUE
350: *
351: *           ==== Generate a 2-by-2 reflection, if needed. ====
352: *
353:             K = KRCOL + 3*( M22-1 )
354:             IF( BMP22 ) THEN
355:                IF( K.EQ.KTOP-1 ) THEN
356:                   CALL SLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ),
357:      $                         SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ),
358:      $                         V( 1, M22 ) )
359:                   BETA = V( 1, M22 )
360:                   CALL SLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
361:                ELSE
362:                   BETA = H( K+1, K )
363:                   V( 2, M22 ) = H( K+2, K )
364:                   CALL SLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
365:                   H( K+1, K ) = BETA
366:                   H( K+2, K ) = ZERO
367:                END IF
368:             END IF
369: *
370: *           ==== Multiply H by reflections from the left ====
371: *
372:             IF( ACCUM ) THEN
373:                JBOT = MIN( NDCOL, KBOT )
374:             ELSE IF( WANTT ) THEN
375:                JBOT = N
376:             ELSE
377:                JBOT = KBOT
378:             END IF
379:             DO 40 J = MAX( KTOP, KRCOL ), JBOT
380:                MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
381:                DO 30 M = MTOP, MEND
382:                   K = KRCOL + 3*( M-1 )
383:                   REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )*
384:      $                     H( K+2, J )+V( 3, M )*H( K+3, J ) )
385:                   H( K+1, J ) = H( K+1, J ) - REFSUM
386:                   H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
387:                   H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
388:    30          CONTINUE
389:    40       CONTINUE
390:             IF( BMP22 ) THEN
391:                K = KRCOL + 3*( M22-1 )
392:                DO 50 J = MAX( K+1, KTOP ), JBOT
393:                   REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )*
394:      $                     H( K+2, J ) )
395:                   H( K+1, J ) = H( K+1, J ) - REFSUM
396:                   H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
397:    50          CONTINUE
398:             END IF
399: *
400: *           ==== Multiply H by reflections from the right.
401: *           .    Delay filling in the last row until the
402: *           .    vigilant deflation check is complete. ====
403: *
404:             IF( ACCUM ) THEN
405:                JTOP = MAX( KTOP, INCOL )
406:             ELSE IF( WANTT ) THEN
407:                JTOP = 1
408:             ELSE
409:                JTOP = KTOP
410:             END IF
411:             DO 90 M = MTOP, MBOT
412:                IF( V( 1, M ).NE.ZERO ) THEN
413:                   K = KRCOL + 3*( M-1 )
414:                   DO 60 J = JTOP, MIN( KBOT, K+3 )
415:                      REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
416:      $                        H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
417:                      H( J, K+1 ) = H( J, K+1 ) - REFSUM
418:                      H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M )
419:                      H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M )
420:    60             CONTINUE
421: *
422:                   IF( ACCUM ) THEN
423: *
424: *                    ==== Accumulate U. (If necessary, update Z later
425: *                    .    with with an efficient matrix-matrix
426: *                    .    multiply.) ====
427: *
428:                      KMS = K - INCOL
429:                      DO 70 J = MAX( 1, KTOP-INCOL ), KDU
430:                         REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
431:      $                           U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
432:                         U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
433:                         U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M )
434:                         U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M )
435:    70                CONTINUE
436:                   ELSE IF( WANTZ ) THEN
437: *
438: *                    ==== U is not accumulated, so update Z
439: *                    .    now by multiplying by reflections
440: *                    .    from the right. ====
441: *
442:                      DO 80 J = ILOZ, IHIZ
443:                         REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
444:      $                           Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
445:                         Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
446:                         Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M )
447:                         Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M )
448:    80                CONTINUE
449:                   END IF
450:                END IF
451:    90       CONTINUE
452: *
453: *           ==== Special case: 2-by-2 reflection (if needed) ====
454: *
455:             K = KRCOL + 3*( M22-1 )
456:             IF( BMP22 .AND. ( V( 1, M22 ).NE.ZERO ) ) THEN
457:                DO 100 J = JTOP, MIN( KBOT, K+3 )
458:                   REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
459:      $                     H( J, K+2 ) )
460:                   H( J, K+1 ) = H( J, K+1 ) - REFSUM
461:                   H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
462:   100          CONTINUE
463: *
464:                IF( ACCUM ) THEN
465:                   KMS = K - INCOL
466:                   DO 110 J = MAX( 1, KTOP-INCOL ), KDU
467:                      REFSUM = V( 1, M22 )*( U( J, KMS+1 )+V( 2, M22 )*
468:      $                        U( J, KMS+2 ) )
469:                      U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
470:                      U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M22 )
471:   110             CONTINUE
472:                ELSE IF( WANTZ ) THEN
473:                   DO 120 J = ILOZ, IHIZ
474:                      REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
475:      $                        Z( J, K+2 ) )
476:                      Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
477:                      Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
478:   120             CONTINUE
479:                END IF
480:             END IF
481: *
482: *           ==== Vigilant deflation check ====
483: *
484:             MSTART = MTOP
485:             IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
486:      $         MSTART = MSTART + 1
487:             MEND = MBOT
488:             IF( BMP22 )
489:      $         MEND = MEND + 1
490:             IF( KRCOL.EQ.KBOT-2 )
491:      $         MEND = MEND + 1
492:             DO 130 M = MSTART, MEND
493:                K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
494: *
495: *              ==== The following convergence test requires that
496: *              .    the tradition small-compared-to-nearby-diagonals
497: *              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
498: *              .    criteria both be satisfied.  The latter improves
499: *              .    accuracy in some examples. Falling back on an
500: *              .    alternate convergence criterion when TST1 or TST2
501: *              .    is zero (as done here) is traditional but probably
502: *              .    unnecessary. ====
503: *
504:                IF( H( K+1, K ).NE.ZERO ) THEN
505:                   TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
506:                   IF( TST1.EQ.ZERO ) THEN
507:                      IF( K.GE.KTOP+1 )
508:      $                  TST1 = TST1 + ABS( H( K, K-1 ) )
509:                      IF( K.GE.KTOP+2 )
510:      $                  TST1 = TST1 + ABS( H( K, K-2 ) )
511:                      IF( K.GE.KTOP+3 )
512:      $                  TST1 = TST1 + ABS( H( K, K-3 ) )
513:                      IF( K.LE.KBOT-2 )
514:      $                  TST1 = TST1 + ABS( H( K+2, K+1 ) )
515:                      IF( K.LE.KBOT-3 )
516:      $                  TST1 = TST1 + ABS( H( K+3, K+1 ) )
517:                      IF( K.LE.KBOT-4 )
518:      $                  TST1 = TST1 + ABS( H( K+4, K+1 ) )
519:                   END IF
520:                   IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
521:      $                 THEN
522:                      H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
523:                      H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
524:                      H11 = MAX( ABS( H( K+1, K+1 ) ),
525:      $                     ABS( H( K, K )-H( K+1, K+1 ) ) )
526:                      H22 = MIN( ABS( H( K+1, K+1 ) ),
527:      $                     ABS( H( K, K )-H( K+1, K+1 ) ) )
528:                      SCL = H11 + H12
529:                      TST2 = H22*( H11 / SCL )
530: *
531:                      IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
532:      $                   MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
533:                   END IF
534:                END IF
535:   130       CONTINUE
536: *
537: *           ==== Fill in the last row of each bulge. ====
538: *
539:             MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
540:             DO 140 M = MTOP, MEND
541:                K = KRCOL + 3*( M-1 )
542:                REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
543:                H( K+4, K+1 ) = -REFSUM
544:                H( K+4, K+2 ) = -REFSUM*V( 2, M )
545:                H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M )
546:   140       CONTINUE
547: *
548: *           ==== End of near-the-diagonal bulge chase. ====
549: *
550:   150    CONTINUE
551: *
552: *        ==== Use U (if accumulated) to update far-from-diagonal
553: *        .    entries in H.  If required, use U to update Z as
554: *        .    well. ====
555: *
556:          IF( ACCUM ) THEN
557:             IF( WANTT ) THEN
558:                JTOP = 1
559:                JBOT = N
560:             ELSE
561:                JTOP = KTOP
562:                JBOT = KBOT
563:             END IF
564:             IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
565:      $          ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
566: *
567: *              ==== Updates not exploiting the 2-by-2 block
568: *              .    structure of U.  K1 and NU keep track of
569: *              .    the location and size of U in the special
570: *              .    cases of introducing bulges and chasing
571: *              .    bulges off the bottom.  In these special
572: *              .    cases and in case the number of shifts
573: *              .    is NS = 2, there is no 2-by-2 block
574: *              .    structure to exploit.  ====
575: *
576:                K1 = MAX( 1, KTOP-INCOL )
577:                NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
578: *
579: *              ==== Horizontal Multiply ====
580: *
581:                DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
582:                   JLEN = MIN( NH, JBOT-JCOL+1 )
583:                   CALL SGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
584:      $                        LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
585:      $                        LDWH )
586:                   CALL SLACPY( 'ALL', NU, JLEN, WH, LDWH,
587:      $                         H( INCOL+K1, JCOL ), LDH )
588:   160          CONTINUE
589: *
590: *              ==== Vertical multiply ====
591: *
592:                DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
593:                   JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
594:                   CALL SGEMM( 'N', 'N', JLEN, NU, NU, ONE,
595:      $                        H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
596:      $                        LDU, ZERO, WV, LDWV )
597:                   CALL SLACPY( 'ALL', JLEN, NU, WV, LDWV,
598:      $                         H( JROW, INCOL+K1 ), LDH )
599:   170          CONTINUE
600: *
601: *              ==== Z multiply (also vertical) ====
602: *
603:                IF( WANTZ ) THEN
604:                   DO 180 JROW = ILOZ, IHIZ, NV
605:                      JLEN = MIN( NV, IHIZ-JROW+1 )
606:                      CALL SGEMM( 'N', 'N', JLEN, NU, NU, ONE,
607:      $                           Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
608:      $                           LDU, ZERO, WV, LDWV )
609:                      CALL SLACPY( 'ALL', JLEN, NU, WV, LDWV,
610:      $                            Z( JROW, INCOL+K1 ), LDZ )
611:   180             CONTINUE
612:                END IF
613:             ELSE
614: *
615: *              ==== Updates exploiting U's 2-by-2 block structure.
616: *              .    (I2, I4, J2, J4 are the last rows and columns
617: *              .    of the blocks.) ====
618: *
619:                I2 = ( KDU+1 ) / 2
620:                I4 = KDU
621:                J2 = I4 - I2
622:                J4 = KDU
623: *
624: *              ==== KZS and KNZ deal with the band of zeros
625: *              .    along the diagonal of one of the triangular
626: *              .    blocks. ====
627: *
628:                KZS = ( J4-J2 ) - ( NS+1 )
629:                KNZ = NS + 1
630: *
631: *              ==== Horizontal multiply ====
632: *
633:                DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
634:                   JLEN = MIN( NH, JBOT-JCOL+1 )
635: *
636: *                 ==== Copy bottom of H to top+KZS of scratch ====
637: *                  (The first KZS rows get multiplied by zero.) ====
638: *
639:                   CALL SLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
640:      $                         LDH, WH( KZS+1, 1 ), LDWH )
641: *
642: *                 ==== Multiply by U21' ====
643: *
644:                   CALL SLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
645:                   CALL STRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
646:      $                        U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
647:      $                        LDWH )
648: *
649: *                 ==== Multiply top of H by U11' ====
650: *
651:                   CALL SGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
652:      $                        H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
653: *
654: *                 ==== Copy top of H to bottom of WH ====
655: *
656:                   CALL SLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
657:      $                         WH( I2+1, 1 ), LDWH )
658: *
659: *                 ==== Multiply by U21' ====
660: *
661:                   CALL STRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
662:      $                        U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
663: *
664: *                 ==== Multiply by U22 ====
665: *
666:                   CALL SGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
667:      $                        U( J2+1, I2+1 ), LDU,
668:      $                        H( INCOL+1+J2, JCOL ), LDH, ONE,
669:      $                        WH( I2+1, 1 ), LDWH )
670: *
671: *                 ==== Copy it back ====
672: *
673:                   CALL SLACPY( 'ALL', KDU, JLEN, WH, LDWH,
674:      $                         H( INCOL+1, JCOL ), LDH )
675:   190          CONTINUE
676: *
677: *              ==== Vertical multiply ====
678: *
679:                DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
680:                   JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
681: *
682: *                 ==== Copy right of H to scratch (the first KZS
683: *                 .    columns get multiplied by zero) ====
684: *
685:                   CALL SLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
686:      $                         LDH, WV( 1, 1+KZS ), LDWV )
687: *
688: *                 ==== Multiply by U21 ====
689: *
690:                   CALL SLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
691:                   CALL STRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
692:      $                        U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
693:      $                        LDWV )
694: *
695: *                 ==== Multiply by U11 ====
696: *
697:                   CALL SGEMM( 'N', 'N', JLEN, I2, J2, ONE,
698:      $                        H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
699:      $                        LDWV )
700: *
701: *                 ==== Copy left of H to right of scratch ====
702: *
703:                   CALL SLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
704:      $                         WV( 1, 1+I2 ), LDWV )
705: *
706: *                 ==== Multiply by U21 ====
707: *
708:                   CALL STRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
709:      $                        U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
710: *
711: *                 ==== Multiply by U22 ====
712: *
713:                   CALL SGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
714:      $                        H( JROW, INCOL+1+J2 ), LDH,
715:      $                        U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
716:      $                        LDWV )
717: *
718: *                 ==== Copy it back ====
719: *
720:                   CALL SLACPY( 'ALL', JLEN, KDU, WV, LDWV,
721:      $                         H( JROW, INCOL+1 ), LDH )
722:   200          CONTINUE
723: *
724: *              ==== Multiply Z (also vertical) ====
725: *
726:                IF( WANTZ ) THEN
727:                   DO 210 JROW = ILOZ, IHIZ, NV
728:                      JLEN = MIN( NV, IHIZ-JROW+1 )
729: *
730: *                    ==== Copy right of Z to left of scratch (first
731: *                    .     KZS columns get multiplied by zero) ====
732: *
733:                      CALL SLACPY( 'ALL', JLEN, KNZ,
734:      $                            Z( JROW, INCOL+1+J2 ), LDZ,
735:      $                            WV( 1, 1+KZS ), LDWV )
736: *
737: *                    ==== Multiply by U12 ====
738: *
739:                      CALL SLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
740:      $                            LDWV )
741:                      CALL STRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
742:      $                           U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
743:      $                           LDWV )
744: *
745: *                    ==== Multiply by U11 ====
746: *
747:                      CALL SGEMM( 'N', 'N', JLEN, I2, J2, ONE,
748:      $                           Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
749:      $                           WV, LDWV )
750: *
751: *                    ==== Copy left of Z to right of scratch ====
752: *
753:                      CALL SLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
754:      $                            LDZ, WV( 1, 1+I2 ), LDWV )
755: *
756: *                    ==== Multiply by U21 ====
757: *
758:                      CALL STRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
759:      $                           U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
760:      $                           LDWV )
761: *
762: *                    ==== Multiply by U22 ====
763: *
764:                      CALL SGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
765:      $                           Z( JROW, INCOL+1+J2 ), LDZ,
766:      $                           U( J2+1, I2+1 ), LDU, ONE,
767:      $                           WV( 1, 1+I2 ), LDWV )
768: *
769: *                    ==== Copy the result back to Z ====
770: *
771:                      CALL SLACPY( 'ALL', JLEN, KDU, WV, LDWV,
772:      $                            Z( JROW, INCOL+1 ), LDZ )
773:   210             CONTINUE
774:                END IF
775:             END IF
776:          END IF
777:   220 CONTINUE
778: *
779: *     ==== End of SLAQR5 ====
780: *
781:       END
782: