d4/d76/dlaqr2_8f_source.html

00001       SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
00002      $                   IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
00003      $                   LDT, NV, WV, LDWV, WORK, LWORK )
00004 *
00005 *  -- LAPACK auxiliary routine (version 3.2.2)                        --
00006 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
00007 *  -- June 2010                                                       --
00008 *
00009 *     .. Scalar Arguments ..
00010       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
00011      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
00012       LOGICAL            WANTT, WANTZ
00013 *     ..
00014 *     .. Array Arguments ..
00015       DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
00016      $                   V( LDV, * ), WORK( * ), WV( LDWV, * ),
00017      $                   Z( LDZ, * )
00018 *     ..
00019 *
00020 *     This subroutine is identical to DLAQR3 except that it avoids
00021 *     recursion by calling DLAHQR instead of DLAQR4.
00022 *
00023 *
00024 *     ******************************************************************
00025 *     Aggressive early deflation:
00026 *
00027 *     This subroutine accepts as input an upper Hessenberg matrix
00028 *     H and performs an orthogonal similarity transformation
00029 *     designed to detect and deflate fully converged eigenvalues from
00030 *     a trailing principal submatrix.  On output H has been over-
00031 *     written by a new Hessenberg matrix that is a perturbation of
00032 *     an orthogonal similarity transformation of H.  It is to be
00033 *     hoped that the final version of H has many zero subdiagonal
00034 *     entries.
00035 *
00036 *     ******************************************************************
00037 *     WANTT   (input) LOGICAL
00038 *          If .TRUE., then the Hessenberg matrix H is fully updated
00039 *          so that the quasi-triangular Schur factor may be
00040 *          computed (in cooperation with the calling subroutine).
00041 *          If .FALSE., then only enough of H is updated to preserve
00042 *          the eigenvalues.
00043 *
00044 *     WANTZ   (input) LOGICAL
00045 *          If .TRUE., then the orthogonal matrix Z is updated so
00046 *          so that the orthogonal Schur factor may be computed
00047 *          (in cooperation with the calling subroutine).
00048 *          If .FALSE., then Z is not referenced.
00049 *
00050 *     N       (input) INTEGER
00051 *          The order of the matrix H and (if WANTZ is .TRUE.) the
00052 *          order of the orthogonal matrix Z.
00053 *
00054 *     KTOP    (input) INTEGER
00055 *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
00056 *          KBOT and KTOP together determine an isolated block
00057 *          along the diagonal of the Hessenberg matrix.
00058 *
00059 *     KBOT    (input) INTEGER
00060 *          It is assumed without a check that either
00061 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
00062 *          determine an isolated block along the diagonal of the
00063 *          Hessenberg matrix.
00064 *
00065 *     NW      (input) INTEGER
00066 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
00067 *
00068 *     H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
00069 *          On input the initial N-by-N section of H stores the
00070 *          Hessenberg matrix undergoing aggressive early deflation.
00071 *          On output H has been transformed by an orthogonal
00072 *          similarity transformation, perturbed, and the returned
00073 *          to Hessenberg form that (it is to be hoped) has some
00074 *          zero subdiagonal entries.
00075 *
00076 *     LDH     (input) integer
00077 *          Leading dimension of H just as declared in the calling
00078 *          subroutine.  N .LE. LDH
00079 *
00080 *     ILOZ    (input) INTEGER
00081 *     IHIZ    (input) INTEGER
00082 *          Specify the rows of Z to which transformations must be
00083 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
00084 *
00085 *     Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
00086 *          IF WANTZ is .TRUE., then on output, the orthogonal
00087 *          similarity transformation mentioned above has been
00088 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
00089 *          If WANTZ is .FALSE., then Z is unreferenced.
00090 *
00091 *     LDZ     (input) integer
00092 *          The leading dimension of Z just as declared in the
00093 *          calling subroutine.  1 .LE. LDZ.
00094 *
00095 *     NS      (output) integer
00096 *          The number of unconverged (ie approximate) eigenvalues
00097 *          returned in SR and SI that may be used as shifts by the
00098 *          calling subroutine.
00099 *
00100 *     ND      (output) integer
00101 *          The number of converged eigenvalues uncovered by this
00102 *          subroutine.
00103 *
00104 *     SR      (output) DOUBLE PRECISION array, dimension (KBOT)
00105 *     SI      (output) DOUBLE PRECISION array, dimension (KBOT)
00106 *          On output, the real and imaginary parts of approximate
00107 *          eigenvalues that may be used for shifts are stored in
00108 *          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
00109 *          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
00110 *          The real and imaginary parts of converged eigenvalues
00111 *          are stored in SR(KBOT-ND+1) through SR(KBOT) and
00112 *          SI(KBOT-ND+1) through SI(KBOT), respectively.
00113 *
00114 *     V       (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
00115 *          An NW-by-NW work array.
00116 *
00117 *     LDV     (input) integer scalar
00118 *          The leading dimension of V just as declared in the
00119 *          calling subroutine.  NW .LE. LDV
00120 *
00121 *     NH      (input) integer scalar
00122 *          The number of columns of T.  NH.GE.NW.
00123 *
00124 *     T       (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
00125 *
00126 *     LDT     (input) integer
00127 *          The leading dimension of T just as declared in the
00128 *          calling subroutine.  NW .LE. LDT
00129 *
00130 *     NV      (input) integer
00131 *          The number of rows of work array WV available for
00132 *          workspace.  NV.GE.NW.
00133 *
00134 *     WV      (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
00135 *
00136 *     LDWV    (input) integer
00137 *          The leading dimension of W just as declared in the
00138 *          calling subroutine.  NW .LE. LDV
00139 *
00140 *     WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
00141 *          On exit, WORK(1) is set to an estimate of the optimal value
00142 *          of LWORK for the given values of N, NW, KTOP and KBOT.
00143 *
00144 *     LWORK   (input) integer
00145 *          The dimension of the work array WORK.  LWORK = 2*NW
00146 *          suffices, but greater efficiency may result from larger
00147 *          values of LWORK.
00148 *
00149 *          If LWORK = -1, then a workspace query is assumed; DLAQR2
00150 *          only estimates the optimal workspace size for the given
00151 *          values of N, NW, KTOP and KBOT.  The estimate is returned
00152 *          in WORK(1).  No error message related to LWORK is issued
00153 *          by XERBLA.  Neither H nor Z are accessed.
00154 *
00155 *     ================================================================
00156 *     Based on contributions by
00157 *        Karen Braman and Ralph Byers, Department of Mathematics,
00158 *        University of Kansas, USA
00159 *
00160 *     ================================================================
00161 *     .. Parameters ..
00162       DOUBLE PRECISION   ZERO, ONE
00163       PARAMETER          ( ZERO = 0.0d0, ONE = 1.0d0 )
00164 *     ..
00165 *     .. Local Scalars ..
00166       DOUBLE PRECISION   AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
00167      $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
00168       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
00169      $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2,
00170      $                   LWKOPT
00171       LOGICAL            BULGE, SORTED
00172 *     ..
00173 *     .. External Functions ..
00174       DOUBLE PRECISION   DLAMCH
00175       EXTERNAL           DLAMCH
00176 *     ..
00177 *     .. External Subroutines ..
00178       EXTERNAL           DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
00179      $                   DLANV2, DLARF, DLARFG, DLASET, DORMHR, DTREXC
00180 *     ..
00181 *     .. Intrinsic Functions ..
00182       INTRINSIC          ABS, DBLE, INT, MAX, MIN, SQRT
00183 *     ..
00184 *     .. Executable Statements ..
00185 *
00186 *     ==== Estimate optimal workspace. ====
00187 *
00188       JW = MIN( NW, KBOT-KTOP+1 )
00189       IF( JW.LE.2 ) THEN
00190          LWKOPT = 1
00191       ELSE
00192 *
00193 *        ==== Workspace query call to DGEHRD ====
00194 *
00195          CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
00196          LWK1 = INT( WORK( 1 ) )
00197 *
00198 *        ==== Workspace query call to DORMHR ====
00199 *
00200          CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
00201      $                WORK, -1, INFO )
00202          LWK2 = INT( WORK( 1 ) )
00203 *
00204 *        ==== Optimal workspace ====
00205 *
00206          LWKOPT = JW + MAX( LWK1, LWK2 )
00207       END IF
00208 *
00209 *     ==== Quick return in case of workspace query. ====
00210 *
00211       IF( LWORK.EQ.-1 ) THEN
00212          WORK( 1 ) = DBLE( LWKOPT )
00213          RETURN
00214       END IF
00215 *
00216 *     ==== Nothing to do ...
00217 *     ... for an empty active block ... ====
00218       NS = 0
00219       ND = 0
00220       WORK( 1 ) = ONE
00221       IF( KTOP.GT.KBOT )
00222      $   RETURN
00223 *     ... nor for an empty deflation window. ====
00224       IF( NW.LT.1 )
00225      $   RETURN
00226 *
00227 *     ==== Machine constants ====
00228 *
00229       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
00230       SAFMAX = ONE / SAFMIN
00231       CALL DLABAD( SAFMIN, SAFMAX )
00232       ULP = DLAMCH( 'PRECISION' )
00233       SMLNUM = SAFMIN*( DBLE( N ) / ULP )
00234 *
00235 *     ==== Setup deflation window ====
00236 *
00237       JW = MIN( NW, KBOT-KTOP+1 )
00238       KWTOP = KBOT - JW + 1
00239       IF( KWTOP.EQ.KTOP ) THEN
00240          S = ZERO
00241       ELSE
00242          S = H( KWTOP, KWTOP-1 )
00243       END IF
00244 *
00245       IF( KBOT.EQ.KWTOP ) THEN
00246 *
00247 *        ==== 1-by-1 deflation window: not much to do ====
00248 *
00249          SR( KWTOP ) = H( KWTOP, KWTOP )
00250          SI( KWTOP ) = ZERO
00251          NS = 1
00252          ND = 0
00253          IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
00254      $        THEN
00255             NS = 0
00256             ND = 1
00257             IF( KWTOP.GT.KTOP )
00258      $         H( KWTOP, KWTOP-1 ) = ZERO
00259          END IF
00260          WORK( 1 ) = ONE
00261          RETURN
00262       END IF
00263 *
00264 *     ==== Convert to spike-triangular form.  (In case of a
00265 *     .    rare QR failure, this routine continues to do
00266 *     .    aggressive early deflation using that part of
00267 *     .    the deflation window that converged using INFQR
00268 *     .    here and there to keep track.) ====
00269 *
00270       CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
00271       CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
00272 *
00273       CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
00274       CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
00275      $             SI( KWTOP ), 1, JW, V, LDV, INFQR )
00276 *
00277 *     ==== DTREXC needs a clean margin near the diagonal ====
00278 *
00279       DO 10 J = 1, JW - 3
00280          T( J+2, J ) = ZERO
00281          T( J+3, J ) = ZERO
00282    10 CONTINUE
00283       IF( JW.GT.2 )
00284      $   T( JW, JW-2 ) = ZERO
00285 *
00286 *     ==== Deflation detection loop ====
00287 *
00288       NS = JW
00289       ILST = INFQR + 1
00290    20 CONTINUE
00291       IF( ILST.LE.NS ) THEN
00292          IF( NS.EQ.1 ) THEN
00293             BULGE = .FALSE.
00294          ELSE
00295             BULGE = T( NS, NS-1 ).NE.ZERO
00296          END IF
00297 *
00298 *        ==== Small spike tip test for deflation ====
00299 *
00300          IF( .NOT.BULGE ) THEN
00301 *
00302 *           ==== Real eigenvalue ====
00303 *
00304             FOO = ABS( T( NS, NS ) )
00305             IF( FOO.EQ.ZERO )
00306      $         FOO = ABS( S )
00307             IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
00308 *
00309 *              ==== Deflatable ====
00310 *
00311                NS = NS - 1
00312             ELSE
00313 *
00314 *              ==== Undeflatable.   Move it up out of the way.
00315 *              .    (DTREXC can not fail in this case.) ====
00316 *
00317                IFST = NS
00318                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
00319      $                      INFO )
00320                ILST = ILST + 1
00321             END IF
00322          ELSE
00323 *
00324 *           ==== Complex conjugate pair ====
00325 *
00326             FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
00327      $            SQRT( ABS( T( NS-1, NS ) ) )
00328             IF( FOO.EQ.ZERO )
00329      $         FOO = ABS( S )
00330             IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
00331      $          MAX( SMLNUM, ULP*FOO ) ) THEN
00332 *
00333 *              ==== Deflatable ====
00334 *
00335                NS = NS - 2
00336             ELSE
00337 *
00338 *              ==== Undeflatable. Move them up out of the way.
00339 *              .    Fortunately, DTREXC does the right thing with
00340 *              .    ILST in case of a rare exchange failure. ====
00341 *
00342                IFST = NS
00343                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
00344      $                      INFO )
00345                ILST = ILST + 2
00346             END IF
00347          END IF
00348 *
00349 *        ==== End deflation detection loop ====
00350 *
00351          GO TO 20
00352       END IF
00353 *
00354 *        ==== Return to Hessenberg form ====
00355 *
00356       IF( NS.EQ.0 )
00357      $   S = ZERO
00358 *
00359       IF( NS.LT.JW ) THEN
00360 *
00361 *        ==== sorting diagonal blocks of T improves accuracy for
00362 *        .    graded matrices.  Bubble sort deals well with
00363 *        .    exchange failures. ====
00364 *
00365          SORTED = .false.
00366          I = NS + 1
00367    30    CONTINUE
00368          IF( SORTED )
00369      $      GO TO 50
00370          SORTED = .true.
00371 *
00372          KEND = I - 1
00373          I = INFQR + 1
00374          IF( I.EQ.NS ) THEN
00375             K = I + 1
00376          ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
00377             K = I + 1
00378          ELSE
00379             K = I + 2
00380          END IF
00381    40    CONTINUE
00382          IF( K.LE.KEND ) THEN
00383             IF( K.EQ.I+1 ) THEN
00384                EVI = ABS( T( I, I ) )
00385             ELSE
00386                EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
00387      $               SQRT( ABS( T( I, I+1 ) ) )
00388             END IF
00389 *
00390             IF( K.EQ.KEND ) THEN
00391                EVK = ABS( T( K, K ) )
00392             ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
00393                EVK = ABS( T( K, K ) )
00394             ELSE
00395                EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
00396      $               SQRT( ABS( T( K, K+1 ) ) )
00397             END IF
00398 *
00399             IF( EVI.GE.EVK ) THEN
00400                I = K
00401             ELSE
00402                SORTED = .false.
00403                IFST = I
00404                ILST = K
00405                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
00406      $                      INFO )
00407                IF( INFO.EQ.0 ) THEN
00408                   I = ILST
00409                ELSE
00410                   I = K
00411                END IF
00412             END IF
00413             IF( I.EQ.KEND ) THEN
00414                K = I + 1
00415             ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
00416                K = I + 1
00417             ELSE
00418                K = I + 2
00419             END IF
00420             GO TO 40
00421          END IF
00422          GO TO 30
00423    50    CONTINUE
00424       END IF
00425 *
00426 *     ==== Restore shift/eigenvalue array from T ====
00427 *
00428       I = JW
00429    60 CONTINUE
00430       IF( I.GE.INFQR+1 ) THEN
00431          IF( I.EQ.INFQR+1 ) THEN
00432             SR( KWTOP+I-1 ) = T( I, I )
00433             SI( KWTOP+I-1 ) = ZERO
00434             I = I - 1
00435          ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
00436             SR( KWTOP+I-1 ) = T( I, I )
00437             SI( KWTOP+I-1 ) = ZERO
00438             I = I - 1
00439          ELSE
00440             AA = T( I-1, I-1 )
00441             CC = T( I, I-1 )
00442             BB = T( I-1, I )
00443             DD = T( I, I )
00444             CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
00445      $                   SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
00446      $                   SI( KWTOP+I-1 ), CS, SN )
00447             I = I - 2
00448          END IF
00449          GO TO 60
00450       END IF
00451 *
00452       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
00453          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
00454 *
00455 *           ==== Reflect spike back into lower triangle ====
00456 *
00457             CALL DCOPY( NS, V, LDV, WORK, 1 )
00458             BETA = WORK( 1 )
00459             CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
00460             WORK( 1 ) = ONE
00461 *
00462             CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
00463 *
00464             CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
00465      $                  WORK( JW+1 ) )
00466             CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
00467      $                  WORK( JW+1 ) )
00468             CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
00469      $                  WORK( JW+1 ) )
00470 *
00471             CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
00472      $                   LWORK-JW, INFO )
00473          END IF
00474 *
00475 *        ==== Copy updated reduced window into place ====
00476 *
00477          IF( KWTOP.GT.1 )
00478      $      H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
00479          CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
00480          CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
00481      $               LDH+1 )
00482 *
00483 *        ==== Accumulate orthogonal matrix in order update
00484 *        .    H and Z, if requested.  ====
00485 *
00486          IF( NS.GT.1 .AND. S.NE.ZERO )
00487      $      CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
00488      $                   WORK( JW+1 ), LWORK-JW, INFO )
00489 *
00490 *        ==== Update vertical slab in H ====
00491 *
00492          IF( WANTT ) THEN
00493             LTOP = 1
00494          ELSE
00495             LTOP = KTOP
00496          END IF
00497          DO 70 KROW = LTOP, KWTOP - 1, NV
00498             KLN = MIN( NV, KWTOP-KROW )
00499             CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
00500      $                  LDH, V, LDV, ZERO, WV, LDWV )
00501             CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
00502    70    CONTINUE
00503 *
00504 *        ==== Update horizontal slab in H ====
00505 *
00506          IF( WANTT ) THEN
00507             DO 80 KCOL = KBOT + 1, N, NH
00508                KLN = MIN( NH, N-KCOL+1 )
00509                CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
00510      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
00511                CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
00512      $                      LDH )
00513    80       CONTINUE
00514          END IF
00515 *
00516 *        ==== Update vertical slab in Z ====
00517 *
00518          IF( WANTZ ) THEN
00519             DO 90 KROW = ILOZ, IHIZ, NV
00520                KLN = MIN( NV, IHIZ-KROW+1 )
00521                CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
00522      $                     LDZ, V, LDV, ZERO, WV, LDWV )
00523                CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
00524      $                      LDZ )
00525    90       CONTINUE
00526          END IF
00527       END IF
00528 *
00529 *     ==== Return the number of deflations ... ====
00530 *
00531       ND = JW - NS
00532 *
00533 *     ==== ... and the number of shifts. (Subtracting
00534 *     .    INFQR from the spike length takes care
00535 *     .    of the case of a rare QR failure while
00536 *     .    calculating eigenvalues of the deflation
00537 *     .    window.)  ====
00538 *
00539       NS = NS - INFQR
00540 *
00541 *      ==== Return optimal workspace. ====
00542 *
00543       WORK( 1 ) = DBLE( LWKOPT )
00544 *
00545 *     ==== End of DLAQR2 ====
00546 *
00547       END