001:       SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
002:      $                   IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
003:      $                   LDT, NV, WV, LDWV, WORK, LWORK )
004: *
005: *  -- LAPACK auxiliary routine (version 3.2.1)                        --
006: *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
007: *  -- April 2009                                                      --
008: *
009: *     .. Scalar Arguments ..
010:       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
011:      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
012:       LOGICAL            WANTT, WANTZ
013: *     ..
014: *     .. Array Arguments ..
015:       REAL               H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
016:      $                   V( LDV, * ), WORK( * ), WV( LDWV, * ),
017:      $                   Z( LDZ, * )
018: *     ..
019: *
020: *     ******************************************************************
021: *     Aggressive early deflation:
022: *
023: *     This subroutine accepts as input an upper Hessenberg matrix
024: *     H and performs an orthogonal similarity transformation
025: *     designed to detect and deflate fully converged eigenvalues from
026: *     a trailing principal submatrix.  On output H has been over-
027: *     written by a new Hessenberg matrix that is a perturbation of
028: *     an orthogonal similarity transformation of H.  It is to be
029: *     hoped that the final version of H has many zero subdiagonal
030: *     entries.
031: *
032: *     ******************************************************************
033: *     WANTT   (input) LOGICAL
034: *          If .TRUE., then the Hessenberg matrix H is fully updated
035: *          so that the quasi-triangular Schur factor may be
036: *          computed (in cooperation with the calling subroutine).
037: *          If .FALSE., then only enough of H is updated to preserve
038: *          the eigenvalues.
039: *
040: *     WANTZ   (input) LOGICAL
041: *          If .TRUE., then the orthogonal matrix Z is updated so
042: *          so that the orthogonal Schur factor may be computed
043: *          (in cooperation with the calling subroutine).
044: *          If .FALSE., then Z is not referenced.
045: *
046: *     N       (input) INTEGER
047: *          The order of the matrix H and (if WANTZ is .TRUE.) the
048: *          order of the orthogonal matrix Z.
049: *
050: *     KTOP    (input) INTEGER
051: *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
052: *          KBOT and KTOP together determine an isolated block
053: *          along the diagonal of the Hessenberg matrix.
054: *
055: *     KBOT    (input) INTEGER
056: *          It is assumed without a check that either
057: *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
058: *          determine an isolated block along the diagonal of the
059: *          Hessenberg matrix.
060: *
061: *     NW      (input) INTEGER
062: *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
063: *
064: *     H       (input/output) REAL array, dimension (LDH,N)
065: *          On input the initial N-by-N section of H stores the
066: *          Hessenberg matrix undergoing aggressive early deflation.
067: *          On output H has been transformed by an orthogonal
068: *          similarity transformation, perturbed, and the returned
069: *          to Hessenberg form that (it is to be hoped) has some
070: *          zero subdiagonal entries.
071: *
072: *     LDH     (input) integer
073: *          Leading dimension of H just as declared in the calling
074: *          subroutine.  N .LE. LDH
075: *
076: *     ILOZ    (input) INTEGER
077: *     IHIZ    (input) INTEGER
078: *          Specify the rows of Z to which transformations must be
079: *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
080: *
081: *     Z       (input/output) REAL array, dimension (LDZ,N)
082: *          IF WANTZ is .TRUE., then on output, the orthogonal
083: *          similarity transformation mentioned above has been
084: *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
085: *          If WANTZ is .FALSE., then Z is unreferenced.
086: *
087: *     LDZ     (input) integer
088: *          The leading dimension of Z just as declared in the
089: *          calling subroutine.  1 .LE. LDZ.
090: *
091: *     NS      (output) integer
092: *          The number of unconverged (ie approximate) eigenvalues
093: *          returned in SR and SI that may be used as shifts by the
094: *          calling subroutine.
095: *
096: *     ND      (output) integer
097: *          The number of converged eigenvalues uncovered by this
098: *          subroutine.
099: *
100: *     SR      (output) REAL array, dimension KBOT
101: *     SI      (output) REAL array, dimension KBOT
102: *          On output, the real and imaginary parts of approximate
103: *          eigenvalues that may be used for shifts are stored in
104: *          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
105: *          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
106: *          The real and imaginary parts of converged eigenvalues
107: *          are stored in SR(KBOT-ND+1) through SR(KBOT) and
108: *          SI(KBOT-ND+1) through SI(KBOT), respectively.
109: *
110: *     V       (workspace) REAL array, dimension (LDV,NW)
111: *          An NW-by-NW work array.
112: *
113: *     LDV     (input) integer scalar
114: *          The leading dimension of V just as declared in the
115: *          calling subroutine.  NW .LE. LDV
116: *
117: *     NH      (input) integer scalar
118: *          The number of columns of T.  NH.GE.NW.
119: *
120: *     T       (workspace) REAL array, dimension (LDT,NW)
121: *
122: *     LDT     (input) integer
123: *          The leading dimension of T just as declared in the
124: *          calling subroutine.  NW .LE. LDT
125: *
126: *     NV      (input) integer
127: *          The number of rows of work array WV available for
128: *          workspace.  NV.GE.NW.
129: *
130: *     WV      (workspace) REAL array, dimension (LDWV,NW)
131: *
132: *     LDWV    (input) integer
133: *          The leading dimension of W just as declared in the
134: *          calling subroutine.  NW .LE. LDV
135: *
136: *     WORK    (workspace) REAL array, dimension LWORK.
137: *          On exit, WORK(1) is set to an estimate of the optimal value
138: *          of LWORK for the given values of N, NW, KTOP and KBOT.
139: *
140: *     LWORK   (input) integer
141: *          The dimension of the work array WORK.  LWORK = 2*NW
142: *          suffices, but greater efficiency may result from larger
143: *          values of LWORK.
144: *
145: *          If LWORK = -1, then a workspace query is assumed; SLAQR3
146: *          only estimates the optimal workspace size for the given
147: *          values of N, NW, KTOP and KBOT.  The estimate is returned
148: *          in WORK(1).  No error message related to LWORK is issued
149: *          by XERBLA.  Neither H nor Z are accessed.
150: *
151: *     ================================================================
152: *     Based on contributions by
153: *        Karen Braman and Ralph Byers, Department of Mathematics,
154: *        University of Kansas, USA
155: *
156: *     ================================================================
157: *     .. Parameters ..
158:       REAL               ZERO, ONE
159:       PARAMETER          ( ZERO = 0.0e0, ONE = 1.0e0 )
160: *     ..
161: *     .. Local Scalars ..
162:       REAL               AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
163:      $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
164:       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
165:      $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
166:      $                   LWKOPT, NMIN
167:       LOGICAL            BULGE, SORTED
168: *     ..
169: *     .. External Functions ..
170:       REAL               SLAMCH
171:       INTEGER            ILAENV
172:       EXTERNAL           SLAMCH, ILAENV
173: *     ..
174: *     .. External Subroutines ..
175:       EXTERNAL           SCOPY, SGEHRD, SGEMM, SLABAD, SLACPY, SLAHQR,
176:      $                   SLANV2, SLAQR4, SLARF, SLARFG, SLASET, SORMHR,
177:      $                   STREXC
178: *     ..
179: *     .. Intrinsic Functions ..
180:       INTRINSIC          ABS, INT, MAX, MIN, REAL, SQRT
181: *     ..
182: *     .. Executable Statements ..
183: *
184: *     ==== Estimate optimal workspace. ====
185: *
186:       JW = MIN( NW, KBOT-KTOP+1 )
187:       IF( JW.LE.2 ) THEN
188:          LWKOPT = 1
189:       ELSE
190: *
191: *        ==== Workspace query call to SGEHRD ====
192: *
193:          CALL SGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
194:          LWK1 = INT( WORK( 1 ) )
195: *
196: *        ==== Workspace query call to SORMHR ====
197: *
198:          CALL SORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
199:      $                WORK, -1, INFO )
200:          LWK2 = INT( WORK( 1 ) )
201: *
202: *        ==== Workspace query call to SLAQR4 ====
203: *
204:          CALL SLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW,
205:      $                V, LDV, WORK, -1, INFQR )
206:          LWK3 = INT( WORK( 1 ) )
207: *
208: *        ==== Optimal workspace ====
209: *
210:          LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
211:       END IF
212: *
213: *     ==== Quick return in case of workspace query. ====
214: *
215:       IF( LWORK.EQ.-1 ) THEN
216:          WORK( 1 ) = REAL( LWKOPT )
217:          RETURN
218:       END IF
219: *
220: *     ==== Nothing to do ...
221: *     ... for an empty active block ... ====
222:       NS = 0
223:       ND = 0
224:       WORK( 1 ) = ONE
225:       IF( KTOP.GT.KBOT )
226:      $   RETURN
227: *     ... nor for an empty deflation window. ====
228:       IF( NW.LT.1 )
229:      $   RETURN
230: *
231: *     ==== Machine constants ====
232: *
233:       SAFMIN = SLAMCH( 'SAFE MINIMUM' )
234:       SAFMAX = ONE / SAFMIN
235:       CALL SLABAD( SAFMIN, SAFMAX )
236:       ULP = SLAMCH( 'PRECISION' )
237:       SMLNUM = SAFMIN*( REAL( N ) / ULP )
238: *
239: *     ==== Setup deflation window ====
240: *
241:       JW = MIN( NW, KBOT-KTOP+1 )
242:       KWTOP = KBOT - JW + 1
243:       IF( KWTOP.EQ.KTOP ) THEN
244:          S = ZERO
245:       ELSE
246:          S = H( KWTOP, KWTOP-1 )
247:       END IF
248: *
249:       IF( KBOT.EQ.KWTOP ) THEN
250: *
251: *        ==== 1-by-1 deflation window: not much to do ====
252: *
253:          SR( KWTOP ) = H( KWTOP, KWTOP )
254:          SI( KWTOP ) = ZERO
255:          NS = 1
256:          ND = 0
257:          IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
258:      $        THEN
259:             NS = 0
260:             ND = 1
261:             IF( KWTOP.GT.KTOP )
262:      $         H( KWTOP, KWTOP-1 ) = ZERO
263:          END IF
264:          WORK( 1 ) = ONE
265:          RETURN
266:       END IF
267: *
268: *     ==== Convert to spike-triangular form.  (In case of a
269: *     .    rare QR failure, this routine continues to do
270: *     .    aggressive early deflation using that part of
271: *     .    the deflation window that converged using INFQR
272: *     .    here and there to keep track.) ====
273: *
274:       CALL SLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
275:       CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
276: *
277:       CALL SLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
278:       NMIN = ILAENV( 12, 'SLAQR3', 'SV', JW, 1, JW, LWORK )
279:       IF( JW.GT.NMIN ) THEN
280:          CALL SLAQR4( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
281:      $                SI( KWTOP ), 1, JW, V, LDV, WORK, LWORK, INFQR )
282:       ELSE
283:          CALL SLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
284:      $                SI( KWTOP ), 1, JW, V, LDV, INFQR )
285:       END IF
286: *
287: *     ==== STREXC needs a clean margin near the diagonal ====
288: *
289:       DO 10 J = 1, JW - 3
290:          T( J+2, J ) = ZERO
291:          T( J+3, J ) = ZERO
292:    10 CONTINUE
293:       IF( JW.GT.2 )
294:      $   T( JW, JW-2 ) = ZERO
295: *
296: *     ==== Deflation detection loop ====
297: *
298:       NS = JW
299:       ILST = INFQR + 1
300:    20 CONTINUE
301:       IF( ILST.LE.NS ) THEN
302:          IF( NS.EQ.1 ) THEN
303:             BULGE = .FALSE.
304:          ELSE
305:             BULGE = T( NS, NS-1 ).NE.ZERO
306:          END IF
307: *
308: *        ==== Small spike tip test for deflation ====
309: *
310:          IF( .NOT.BULGE ) THEN
311: *
312: *           ==== Real eigenvalue ====
313: *
314:             FOO = ABS( T( NS, NS ) )
315:             IF( FOO.EQ.ZERO )
316:      $         FOO = ABS( S )
317:             IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
318: *
319: *              ==== Deflatable ====
320: *
321:                NS = NS - 1
322:             ELSE
323: *
324: *              ==== Undeflatable.   Move it up out of the way.
325: *              .    (STREXC can not fail in this case.) ====
326: *
327:                IFST = NS
328:                CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
329:      $                      INFO )
330:                ILST = ILST + 1
331:             END IF
332:          ELSE
333: *
334: *           ==== Complex conjugate pair ====
335: *
336:             FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
337:      $            SQRT( ABS( T( NS-1, NS ) ) )
338:             IF( FOO.EQ.ZERO )
339:      $         FOO = ABS( S )
340:             IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
341:      $          MAX( SMLNUM, ULP*FOO ) ) THEN
342: *
343: *              ==== Deflatable ====
344: *
345:                NS = NS - 2
346:             ELSE
347: *
348: *              ==== Undeflatable. Move them up out of the way.
349: *              .    Fortunately, STREXC does the right thing with
350: *              .    ILST in case of a rare exchange failure. ====
351: *
352:                IFST = NS
353:                CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
354:      $                      INFO )
355:                ILST = ILST + 2
356:             END IF
357:          END IF
358: *
359: *        ==== End deflation detection loop ====
360: *
361:          GO TO 20
362:       END IF
363: *
364: *        ==== Return to Hessenberg form ====
365: *
366:       IF( NS.EQ.0 )
367:      $   S = ZERO
368: *
369:       IF( NS.LT.JW ) THEN
370: *
371: *        ==== sorting diagonal blocks of T improves accuracy for
372: *        .    graded matrices.  Bubble sort deals well with
373: *        .    exchange failures. ====
374: *
375:          SORTED = .false.
376:          I = NS + 1
377:    30    CONTINUE
378:          IF( SORTED )
379:      $      GO TO 50
380:          SORTED = .true.
381: *
382:          KEND = I - 1
383:          I = INFQR + 1
384:          IF( I.EQ.NS ) THEN
385:             K = I + 1
386:          ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
387:             K = I + 1
388:          ELSE
389:             K = I + 2
390:          END IF
391:    40    CONTINUE
392:          IF( K.LE.KEND ) THEN
393:             IF( K.EQ.I+1 ) THEN
394:                EVI = ABS( T( I, I ) )
395:             ELSE
396:                EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
397:      $               SQRT( ABS( T( I, I+1 ) ) )
398:             END IF
399: *
400:             IF( K.EQ.KEND ) THEN
401:                EVK = ABS( T( K, K ) )
402:             ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
403:                EVK = ABS( T( K, K ) )
404:             ELSE
405:                EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
406:      $               SQRT( ABS( T( K, K+1 ) ) )
407:             END IF
408: *
409:             IF( EVI.GE.EVK ) THEN
410:                I = K
411:             ELSE
412:                SORTED = .false.
413:                IFST = I
414:                ILST = K
415:                CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
416:      $                      INFO )
417:                IF( INFO.EQ.0 ) THEN
418:                   I = ILST
419:                ELSE
420:                   I = K
421:                END IF
422:             END IF
423:             IF( I.EQ.KEND ) THEN
424:                K = I + 1
425:             ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
426:                K = I + 1
427:             ELSE
428:                K = I + 2
429:             END IF
430:             GO TO 40
431:          END IF
432:          GO TO 30
433:    50    CONTINUE
434:       END IF
435: *
436: *     ==== Restore shift/eigenvalue array from T ====
437: *
438:       I = JW
439:    60 CONTINUE
440:       IF( I.GE.INFQR+1 ) THEN
441:          IF( I.EQ.INFQR+1 ) THEN
442:             SR( KWTOP+I-1 ) = T( I, I )
443:             SI( KWTOP+I-1 ) = ZERO
444:             I = I - 1
445:          ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
446:             SR( KWTOP+I-1 ) = T( I, I )
447:             SI( KWTOP+I-1 ) = ZERO
448:             I = I - 1
449:          ELSE
450:             AA = T( I-1, I-1 )
451:             CC = T( I, I-1 )
452:             BB = T( I-1, I )
453:             DD = T( I, I )
454:             CALL SLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
455:      $                   SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
456:      $                   SI( KWTOP+I-1 ), CS, SN )
457:             I = I - 2
458:          END IF
459:          GO TO 60
460:       END IF
461: *
462:       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
463:          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
464: *
465: *           ==== Reflect spike back into lower triangle ====
466: *
467:             CALL SCOPY( NS, V, LDV, WORK, 1 )
468:             BETA = WORK( 1 )
469:             CALL SLARFG( NS, BETA, WORK( 2 ), 1, TAU )
470:             WORK( 1 ) = ONE
471: *
472:             CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
473: *
474:             CALL SLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
475:      $                  WORK( JW+1 ) )
476:             CALL SLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
477:      $                  WORK( JW+1 ) )
478:             CALL SLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
479:      $                  WORK( JW+1 ) )
480: *
481:             CALL SGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
482:      $                   LWORK-JW, INFO )
483:          END IF
484: *
485: *        ==== Copy updated reduced window into place ====
486: *
487:          IF( KWTOP.GT.1 )
488:      $      H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
489:          CALL SLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
490:          CALL SCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
491:      $               LDH+1 )
492: *
493: *        ==== Accumulate orthogonal matrix in order update
494: *        .    H and Z, if requested.  ====
495: *
496:          IF( NS.GT.1 .AND. S.NE.ZERO )
497:      $      CALL SORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
498:      $                   WORK( JW+1 ), LWORK-JW, INFO )
499: *
500: *        ==== Update vertical slab in H ====
501: *
502:          IF( WANTT ) THEN
503:             LTOP = 1
504:          ELSE
505:             LTOP = KTOP
506:          END IF
507:          DO 70 KROW = LTOP, KWTOP - 1, NV
508:             KLN = MIN( NV, KWTOP-KROW )
509:             CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
510:      $                  LDH, V, LDV, ZERO, WV, LDWV )
511:             CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
512:    70    CONTINUE
513: *
514: *        ==== Update horizontal slab in H ====
515: *
516:          IF( WANTT ) THEN
517:             DO 80 KCOL = KBOT + 1, N, NH
518:                KLN = MIN( NH, N-KCOL+1 )
519:                CALL SGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
520:      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
521:                CALL SLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
522:      $                      LDH )
523:    80       CONTINUE
524:          END IF
525: *
526: *        ==== Update vertical slab in Z ====
527: *
528:          IF( WANTZ ) THEN
529:             DO 90 KROW = ILOZ, IHIZ, NV
530:                KLN = MIN( NV, IHIZ-KROW+1 )
531:                CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
532:      $                     LDZ, V, LDV, ZERO, WV, LDWV )
533:                CALL SLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
534:      $                      LDZ )
535:    90       CONTINUE
536:          END IF
537:       END IF
538: *
539: *     ==== Return the number of deflations ... ====
540: *
541:       ND = JW - NS
542: *
543: *     ==== ... and the number of shifts. (Subtracting
544: *     .    INFQR from the spike length takes care
545: *     .    of the case of a rare QR failure while
546: *     .    calculating eigenvalues of the deflation
547: *     .    window.)  ====
548: *
549:       NS = NS - INFQR
550: *
551: *      ==== Return optimal workspace. ====
552: *
553:       WORK( 1 ) = REAL( LWKOPT )
554: *
555: *     ==== End of SLAQR3 ====
556: *
557:       END
558: