d2/d09/zlaqr3_8f_source.html

00001       SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
00002      $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
00003      $                   NV, WV, LDWV, WORK, LWORK )
00004 *
00005 *  -- LAPACK auxiliary routine (version 3.2.1)                        --
00006 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
00007 *  -- April 2009                                                      --
00008 *
00009 *     .. Scalar Arguments ..
00010       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
00011      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
00012       LOGICAL            WANTT, WANTZ
00013 *     ..
00014 *     .. Array Arguments ..
00015       COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
00016      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
00017 *     ..
00018 *
00019 *     ******************************************************************
00020 *     Aggressive early deflation:
00021 *
00022 *     This subroutine accepts as input an upper Hessenberg matrix
00023 *     H and performs an unitary similarity transformation
00024 *     designed to detect and deflate fully converged eigenvalues from
00025 *     a trailing principal submatrix.  On output H has been over-
00026 *     written by a new Hessenberg matrix that is a perturbation of
00027 *     an unitary similarity transformation of H.  It is to be
00028 *     hoped that the final version of H has many zero subdiagonal
00029 *     entries.
00030 *
00031 *     ******************************************************************
00032 *     WANTT   (input) LOGICAL
00033 *          If .TRUE., then the Hessenberg matrix H is fully updated
00034 *          so that the triangular Schur factor may be
00035 *          computed (in cooperation with the calling subroutine).
00036 *          If .FALSE., then only enough of H is updated to preserve
00037 *          the eigenvalues.
00038 *
00039 *     WANTZ   (input) LOGICAL
00040 *          If .TRUE., then the unitary matrix Z is updated so
00041 *          so that the unitary Schur factor may be computed
00042 *          (in cooperation with the calling subroutine).
00043 *          If .FALSE., then Z is not referenced.
00044 *
00045 *     N       (input) INTEGER
00046 *          The order of the matrix H and (if WANTZ is .TRUE.) the
00047 *          order of the unitary matrix Z.
00048 *
00049 *     KTOP    (input) INTEGER
00050 *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
00051 *          KBOT and KTOP together determine an isolated block
00052 *          along the diagonal of the Hessenberg matrix.
00053 *
00054 *     KBOT    (input) INTEGER
00055 *          It is assumed without a check that either
00056 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
00057 *          determine an isolated block along the diagonal of the
00058 *          Hessenberg matrix.
00059 *
00060 *     NW      (input) INTEGER
00061 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
00062 *
00063 *     H       (input/output) COMPLEX*16 array, dimension (LDH,N)
00064 *          On input the initial N-by-N section of H stores the
00065 *          Hessenberg matrix undergoing aggressive early deflation.
00066 *          On output H has been transformed by a unitary
00067 *          similarity transformation, perturbed, and the returned
00068 *          to Hessenberg form that (it is to be hoped) has some
00069 *          zero subdiagonal entries.
00070 *
00071 *     LDH     (input) integer
00072 *          Leading dimension of H just as declared in the calling
00073 *          subroutine.  N .LE. LDH
00074 *
00075 *     ILOZ    (input) INTEGER
00076 *     IHIZ    (input) INTEGER
00077 *          Specify the rows of Z to which transformations must be
00078 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
00079 *
00080 *     Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
00081 *          IF WANTZ is .TRUE., then on output, the unitary
00082 *          similarity transformation mentioned above has been
00083 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
00084 *          If WANTZ is .FALSE., then Z is unreferenced.
00085 *
00086 *     LDZ     (input) integer
00087 *          The leading dimension of Z just as declared in the
00088 *          calling subroutine.  1 .LE. LDZ.
00089 *
00090 *     NS      (output) integer
00091 *          The number of unconverged (ie approximate) eigenvalues
00092 *          returned in SR and SI that may be used as shifts by the
00093 *          calling subroutine.
00094 *
00095 *     ND      (output) integer
00096 *          The number of converged eigenvalues uncovered by this
00097 *          subroutine.
00098 *
00099 *     SH      (output) COMPLEX*16 array, dimension KBOT
00100 *          On output, approximate eigenvalues that may
00101 *          be used for shifts are stored in SH(KBOT-ND-NS+1)
00102 *          through SR(KBOT-ND).  Converged eigenvalues are
00103 *          stored in SH(KBOT-ND+1) through SH(KBOT).
00104 *
00105 *     V       (workspace) COMPLEX*16 array, dimension (LDV,NW)
00106 *          An NW-by-NW work array.
00107 *
00108 *     LDV     (input) integer scalar
00109 *          The leading dimension of V just as declared in the
00110 *          calling subroutine.  NW .LE. LDV
00111 *
00112 *     NH      (input) integer scalar
00113 *          The number of columns of T.  NH.GE.NW.
00114 *
00115 *     T       (workspace) COMPLEX*16 array, dimension (LDT,NW)
00116 *
00117 *     LDT     (input) integer
00118 *          The leading dimension of T just as declared in the
00119 *          calling subroutine.  NW .LE. LDT
00120 *
00121 *     NV      (input) integer
00122 *          The number of rows of work array WV available for
00123 *          workspace.  NV.GE.NW.
00124 *
00125 *     WV      (workspace) COMPLEX*16 array, dimension (LDWV,NW)
00126 *
00127 *     LDWV    (input) integer
00128 *          The leading dimension of W just as declared in the
00129 *          calling subroutine.  NW .LE. LDV
00130 *
00131 *     WORK    (workspace) COMPLEX*16 array, dimension LWORK.
00132 *          On exit, WORK(1) is set to an estimate of the optimal value
00133 *          of LWORK for the given values of N, NW, KTOP and KBOT.
00134 *
00135 *     LWORK   (input) integer
00136 *          The dimension of the work array WORK.  LWORK = 2*NW
00137 *          suffices, but greater efficiency may result from larger
00138 *          values of LWORK.
00139 *
00140 *          If LWORK = -1, then a workspace query is assumed; ZLAQR3
00141 *          only estimates the optimal workspace size for the given
00142 *          values of N, NW, KTOP and KBOT.  The estimate is returned
00143 *          in WORK(1).  No error message related to LWORK is issued
00144 *          by XERBLA.  Neither H nor Z are accessed.
00145 *
00146 *     ================================================================
00147 *     Based on contributions by
00148 *        Karen Braman and Ralph Byers, Department of Mathematics,
00149 *        University of Kansas, USA
00150 *
00151 *     ================================================================
00152 *     .. Parameters ..
00153       COMPLEX*16         ZERO, ONE
00154       PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),
00155      $                   ONE = ( 1.0d0, 0.0d0 ) )
00156       DOUBLE PRECISION   RZERO, RONE
00157       PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )
00158 *     ..
00159 *     .. Local Scalars ..
00160       COMPLEX*16         BETA, CDUM, S, TAU
00161       DOUBLE PRECISION   FOO, SAFMAX, SAFMIN, SMLNUM, ULP
00162       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
00163      $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
00164      $                   LWKOPT, NMIN
00165 *     ..
00166 *     .. External Functions ..
00167       DOUBLE PRECISION   DLAMCH
00168       INTEGER            ILAENV
00169       EXTERNAL           DLAMCH, ILAENV
00170 *     ..
00171 *     .. External Subroutines ..
00172       EXTERNAL           DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,
00173      $                   ZLAQR4, ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR
00174 *     ..
00175 *     .. Intrinsic Functions ..
00176       INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN
00177 *     ..
00178 *     .. Statement Functions ..
00179       DOUBLE PRECISION   CABS1
00180 *     ..
00181 *     .. Statement Function definitions ..
00182       CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
00183 *     ..
00184 *     .. Executable Statements ..
00185 *
00186 *     ==== Estimate optimal workspace. ====
00187 *
00188       JW = MIN( NW, KBOT-KTOP+1 )
00189       IF( JW.LE.2 ) THEN
00190          LWKOPT = 1
00191       ELSE
00192 *
00193 *        ==== Workspace query call to ZGEHRD ====
00194 *
00195          CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
00196          LWK1 = INT( WORK( 1 ) )
00197 *
00198 *        ==== Workspace query call to ZUNMHR ====
00199 *
00200          CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
00201      $                WORK, -1, INFO )
00202          LWK2 = INT( WORK( 1 ) )
00203 *
00204 *        ==== Workspace query call to ZLAQR4 ====
00205 *
00206          CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V,
00207      $                LDV, WORK, -1, INFQR )
00208          LWK3 = INT( WORK( 1 ) )
00209 *
00210 *        ==== Optimal workspace ====
00211 *
00212          LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
00213       END IF
00214 *
00215 *     ==== Quick return in case of workspace query. ====
00216 *
00217       IF( LWORK.EQ.-1 ) THEN
00218          WORK( 1 ) = DCMPLX( LWKOPT, 0 )
00219          RETURN
00220       END IF
00221 *
00222 *     ==== Nothing to do ...
00223 *     ... for an empty active block ... ====
00224       NS = 0
00225       ND = 0
00226       WORK( 1 ) = ONE
00227       IF( KTOP.GT.KBOT )
00228      $   RETURN
00229 *     ... nor for an empty deflation window. ====
00230       IF( NW.LT.1 )
00231      $   RETURN
00232 *
00233 *     ==== Machine constants ====
00234 *
00235       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
00236       SAFMAX = RONE / SAFMIN
00237       CALL DLABAD( SAFMIN, SAFMAX )
00238       ULP = DLAMCH( 'PRECISION' )
00239       SMLNUM = SAFMIN*( DBLE( N ) / ULP )
00240 *
00241 *     ==== Setup deflation window ====
00242 *
00243       JW = MIN( NW, KBOT-KTOP+1 )
00244       KWTOP = KBOT - JW + 1
00245       IF( KWTOP.EQ.KTOP ) THEN
00246          S = ZERO
00247       ELSE
00248          S = H( KWTOP, KWTOP-1 )
00249       END IF
00250 *
00251       IF( KBOT.EQ.KWTOP ) THEN
00252 *
00253 *        ==== 1-by-1 deflation window: not much to do ====
00254 *
00255          SH( KWTOP ) = H( KWTOP, KWTOP )
00256          NS = 1
00257          ND = 0
00258          IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
00259      $       KWTOP ) ) ) ) THEN
00260             NS = 0
00261             ND = 1
00262             IF( KWTOP.GT.KTOP )
00263      $         H( KWTOP, KWTOP-1 ) = ZERO
00264          END IF
00265          WORK( 1 ) = ONE
00266          RETURN
00267       END IF
00268 *
00269 *     ==== Convert to spike-triangular form.  (In case of a
00270 *     .    rare QR failure, this routine continues to do
00271 *     .    aggressive early deflation using that part of
00272 *     .    the deflation window that converged using INFQR
00273 *     .    here and there to keep track.) ====
00274 *
00275       CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
00276       CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
00277 *
00278       CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
00279       NMIN = ILAENV( 12, 'ZLAQR3', 'SV', JW, 1, JW, LWORK )
00280       IF( JW.GT.NMIN ) THEN
00281          CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
00282      $                JW, V, LDV, WORK, LWORK, INFQR )
00283       ELSE
00284          CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
00285      $                JW, V, LDV, INFQR )
00286       END IF
00287 *
00288 *     ==== Deflation detection loop ====
00289 *
00290       NS = JW
00291       ILST = INFQR + 1
00292       DO 10 KNT = INFQR + 1, JW
00293 *
00294 *        ==== Small spike tip deflation test ====
00295 *
00296          FOO = CABS1( T( NS, NS ) )
00297          IF( FOO.EQ.RZERO )
00298      $      FOO = CABS1( S )
00299          IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
00300      $        THEN
00301 *
00302 *           ==== One more converged eigenvalue ====
00303 *
00304             NS = NS - 1
00305          ELSE
00306 *
00307 *           ==== One undeflatable eigenvalue.  Move it up out of the
00308 *           .    way.   (ZTREXC can not fail in this case.) ====
00309 *
00310             IFST = NS
00311             CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
00312             ILST = ILST + 1
00313          END IF
00314    10 CONTINUE
00315 *
00316 *        ==== Return to Hessenberg form ====
00317 *
00318       IF( NS.EQ.0 )
00319      $   S = ZERO
00320 *
00321       IF( NS.LT.JW ) THEN
00322 *
00323 *        ==== sorting the diagonal of T improves accuracy for
00324 *        .    graded matrices.  ====
00325 *
00326          DO 30 I = INFQR + 1, NS
00327             IFST = I
00328             DO 20 J = I + 1, NS
00329                IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
00330      $            IFST = J
00331    20       CONTINUE
00332             ILST = I
00333             IF( IFST.NE.ILST )
00334      $         CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
00335    30    CONTINUE
00336       END IF
00337 *
00338 *     ==== Restore shift/eigenvalue array from T ====
00339 *
00340       DO 40 I = INFQR + 1, JW
00341          SH( KWTOP+I-1 ) = T( I, I )
00342    40 CONTINUE
00343 *
00344 *
00345       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
00346          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
00347 *
00348 *           ==== Reflect spike back into lower triangle ====
00349 *
00350             CALL ZCOPY( NS, V, LDV, WORK, 1 )
00351             DO 50 I = 1, NS
00352                WORK( I ) = DCONJG( WORK( I ) )
00353    50       CONTINUE
00354             BETA = WORK( 1 )
00355             CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )
00356             WORK( 1 ) = ONE
00357 *
00358             CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
00359 *
00360             CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT,
00361      $                  WORK( JW+1 ) )
00362             CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
00363      $                  WORK( JW+1 ) )
00364             CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
00365      $                  WORK( JW+1 ) )
00366 *
00367             CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
00368      $                   LWORK-JW, INFO )
00369          END IF
00370 *
00371 *        ==== Copy updated reduced window into place ====
00372 *
00373          IF( KWTOP.GT.1 )
00374      $      H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) )
00375          CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
00376          CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
00377      $               LDH+1 )
00378 *
00379 *        ==== Accumulate orthogonal matrix in order update
00380 *        .    H and Z, if requested.  ====
00381 *
00382          IF( NS.GT.1 .AND. S.NE.ZERO )
00383      $      CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
00384      $                   WORK( JW+1 ), LWORK-JW, INFO )
00385 *
00386 *        ==== Update vertical slab in H ====
00387 *
00388          IF( WANTT ) THEN
00389             LTOP = 1
00390          ELSE
00391             LTOP = KTOP
00392          END IF
00393          DO 60 KROW = LTOP, KWTOP - 1, NV
00394             KLN = MIN( NV, KWTOP-KROW )
00395             CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
00396      $                  LDH, V, LDV, ZERO, WV, LDWV )
00397             CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
00398    60    CONTINUE
00399 *
00400 *        ==== Update horizontal slab in H ====
00401 *
00402          IF( WANTT ) THEN
00403             DO 70 KCOL = KBOT + 1, N, NH
00404                KLN = MIN( NH, N-KCOL+1 )
00405                CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
00406      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
00407                CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
00408      $                      LDH )
00409    70       CONTINUE
00410          END IF
00411 *
00412 *        ==== Update vertical slab in Z ====
00413 *
00414          IF( WANTZ ) THEN
00415             DO 80 KROW = ILOZ, IHIZ, NV
00416                KLN = MIN( NV, IHIZ-KROW+1 )
00417                CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
00418      $                     LDZ, V, LDV, ZERO, WV, LDWV )
00419                CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
00420      $                      LDZ )
00421    80       CONTINUE
00422          END IF
00423       END IF
00424 *
00425 *     ==== Return the number of deflations ... ====
00426 *
00427       ND = JW - NS
00428 *
00429 *     ==== ... and the number of shifts. (Subtracting
00430 *     .    INFQR from the spike length takes care
00431 *     .    of the case of a rare QR failure while
00432 *     .    calculating eigenvalues of the deflation
00433 *     .    window.)  ====
00434 *
00435       NS = NS - INFQR
00436 *
00437 *      ==== Return optimal workspace. ====
00438 *
00439       WORK( 1 ) = DCMPLX( LWKOPT, 0 )
00440 *
00441 *     ==== End of ZLAQR3 ====
00442 *
00443       END