LAPACK 3.3.0
|
00001 SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, 00002 $ IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT, 00003 $ NV, WV, LDWV, WORK, LWORK ) 00004 * 00005 * -- LAPACK auxiliary routine (version 3.2.1) -- 00006 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. 00007 * -- April 2009 -- 00008 * 00009 * .. Scalar Arguments .. 00010 INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, 00011 $ LDZ, LWORK, N, ND, NH, NS, NV, NW 00012 LOGICAL WANTT, WANTZ 00013 * .. 00014 * .. Array Arguments .. 00015 COMPLEX*16 H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ), 00016 $ WORK( * ), WV( LDWV, * ), Z( LDZ, * ) 00017 * .. 00018 * 00019 * ****************************************************************** 00020 * Aggressive early deflation: 00021 * 00022 * This subroutine accepts as input an upper Hessenberg matrix 00023 * H and performs an unitary similarity transformation 00024 * designed to detect and deflate fully converged eigenvalues from 00025 * a trailing principal submatrix. On output H has been over- 00026 * written by a new Hessenberg matrix that is a perturbation of 00027 * an unitary similarity transformation of H. It is to be 00028 * hoped that the final version of H has many zero subdiagonal 00029 * entries. 00030 * 00031 * ****************************************************************** 00032 * WANTT (input) LOGICAL 00033 * If .TRUE., then the Hessenberg matrix H is fully updated 00034 * so that the triangular Schur factor may be 00035 * computed (in cooperation with the calling subroutine). 00036 * If .FALSE., then only enough of H is updated to preserve 00037 * the eigenvalues. 00038 * 00039 * WANTZ (input) LOGICAL 00040 * If .TRUE., then the unitary matrix Z is updated so 00041 * so that the unitary Schur factor may be computed 00042 * (in cooperation with the calling subroutine). 00043 * If .FALSE., then Z is not referenced. 00044 * 00045 * N (input) INTEGER 00046 * The order of the matrix H and (if WANTZ is .TRUE.) the 00047 * order of the unitary matrix Z. 00048 * 00049 * KTOP (input) INTEGER 00050 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. 00051 * KBOT and KTOP together determine an isolated block 00052 * along the diagonal of the Hessenberg matrix. 00053 * 00054 * KBOT (input) INTEGER 00055 * It is assumed without a check that either 00056 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together 00057 * determine an isolated block along the diagonal of the 00058 * Hessenberg matrix. 00059 * 00060 * NW (input) INTEGER 00061 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). 00062 * 00063 * H (input/output) COMPLEX*16 array, dimension (LDH,N) 00064 * On input the initial N-by-N section of H stores the 00065 * Hessenberg matrix undergoing aggressive early deflation. 00066 * On output H has been transformed by a unitary 00067 * similarity transformation, perturbed, and the returned 00068 * to Hessenberg form that (it is to be hoped) has some 00069 * zero subdiagonal entries. 00070 * 00071 * LDH (input) integer 00072 * Leading dimension of H just as declared in the calling 00073 * subroutine. N .LE. LDH 00074 * 00075 * ILOZ (input) INTEGER 00076 * IHIZ (input) INTEGER 00077 * Specify the rows of Z to which transformations must be 00078 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. 00079 * 00080 * Z (input/output) COMPLEX*16 array, dimension (LDZ,N) 00081 * IF WANTZ is .TRUE., then on output, the unitary 00082 * similarity transformation mentioned above has been 00083 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. 00084 * If WANTZ is .FALSE., then Z is unreferenced. 00085 * 00086 * LDZ (input) integer 00087 * The leading dimension of Z just as declared in the 00088 * calling subroutine. 1 .LE. LDZ. 00089 * 00090 * NS (output) integer 00091 * The number of unconverged (ie approximate) eigenvalues 00092 * returned in SR and SI that may be used as shifts by the 00093 * calling subroutine. 00094 * 00095 * ND (output) integer 00096 * The number of converged eigenvalues uncovered by this 00097 * subroutine. 00098 * 00099 * SH (output) COMPLEX*16 array, dimension KBOT 00100 * On output, approximate eigenvalues that may 00101 * be used for shifts are stored in SH(KBOT-ND-NS+1) 00102 * through SR(KBOT-ND). Converged eigenvalues are 00103 * stored in SH(KBOT-ND+1) through SH(KBOT). 00104 * 00105 * V (workspace) COMPLEX*16 array, dimension (LDV,NW) 00106 * An NW-by-NW work array. 00107 * 00108 * LDV (input) integer scalar 00109 * The leading dimension of V just as declared in the 00110 * calling subroutine. NW .LE. LDV 00111 * 00112 * NH (input) integer scalar 00113 * The number of columns of T. NH.GE.NW. 00114 * 00115 * T (workspace) COMPLEX*16 array, dimension (LDT,NW) 00116 * 00117 * LDT (input) integer 00118 * The leading dimension of T just as declared in the 00119 * calling subroutine. NW .LE. LDT 00120 * 00121 * NV (input) integer 00122 * The number of rows of work array WV available for 00123 * workspace. NV.GE.NW. 00124 * 00125 * WV (workspace) COMPLEX*16 array, dimension (LDWV,NW) 00126 * 00127 * LDWV (input) integer 00128 * The leading dimension of W just as declared in the 00129 * calling subroutine. NW .LE. LDV 00130 * 00131 * WORK (workspace) COMPLEX*16 array, dimension LWORK. 00132 * On exit, WORK(1) is set to an estimate of the optimal value 00133 * of LWORK for the given values of N, NW, KTOP and KBOT. 00134 * 00135 * LWORK (input) integer 00136 * The dimension of the work array WORK. LWORK = 2*NW 00137 * suffices, but greater efficiency may result from larger 00138 * values of LWORK. 00139 * 00140 * If LWORK = -1, then a workspace query is assumed; ZLAQR3 00141 * only estimates the optimal workspace size for the given 00142 * values of N, NW, KTOP and KBOT. The estimate is returned 00143 * in WORK(1). No error message related to LWORK is issued 00144 * by XERBLA. Neither H nor Z are accessed. 00145 * 00146 * ================================================================ 00147 * Based on contributions by 00148 * Karen Braman and Ralph Byers, Department of Mathematics, 00149 * University of Kansas, USA 00150 * 00151 * ================================================================ 00152 * .. Parameters .. 00153 COMPLEX*16 ZERO, ONE 00154 PARAMETER ( ZERO = ( 0.0d0, 0.0d0 ), 00155 $ ONE = ( 1.0d0, 0.0d0 ) ) 00156 DOUBLE PRECISION RZERO, RONE 00157 PARAMETER ( RZERO = 0.0d0, RONE = 1.0d0 ) 00158 * .. 00159 * .. Local Scalars .. 00160 COMPLEX*16 BETA, CDUM, S, TAU 00161 DOUBLE PRECISION FOO, SAFMAX, SAFMIN, SMLNUM, ULP 00162 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN, 00163 $ KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, 00164 $ LWKOPT, NMIN 00165 * .. 00166 * .. External Functions .. 00167 DOUBLE PRECISION DLAMCH 00168 INTEGER ILAENV 00169 EXTERNAL DLAMCH, ILAENV 00170 * .. 00171 * .. External Subroutines .. 00172 EXTERNAL DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR, 00173 $ ZLAQR4, ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR 00174 * .. 00175 * .. Intrinsic Functions .. 00176 INTRINSIC ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN 00177 * .. 00178 * .. Statement Functions .. 00179 DOUBLE PRECISION CABS1 00180 * .. 00181 * .. Statement Function definitions .. 00182 CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) 00183 * .. 00184 * .. Executable Statements .. 00185 * 00186 * ==== Estimate optimal workspace. ==== 00187 * 00188 JW = MIN( NW, KBOT-KTOP+1 ) 00189 IF( JW.LE.2 ) THEN 00190 LWKOPT = 1 00191 ELSE 00192 * 00193 * ==== Workspace query call to ZGEHRD ==== 00194 * 00195 CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) 00196 LWK1 = INT( WORK( 1 ) ) 00197 * 00198 * ==== Workspace query call to ZUNMHR ==== 00199 * 00200 CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, 00201 $ WORK, -1, INFO ) 00202 LWK2 = INT( WORK( 1 ) ) 00203 * 00204 * ==== Workspace query call to ZLAQR4 ==== 00205 * 00206 CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V, 00207 $ LDV, WORK, -1, INFQR ) 00208 LWK3 = INT( WORK( 1 ) ) 00209 * 00210 * ==== Optimal workspace ==== 00211 * 00212 LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 ) 00213 END IF 00214 * 00215 * ==== Quick return in case of workspace query. ==== 00216 * 00217 IF( LWORK.EQ.-1 ) THEN 00218 WORK( 1 ) = DCMPLX( LWKOPT, 0 ) 00219 RETURN 00220 END IF 00221 * 00222 * ==== Nothing to do ... 00223 * ... for an empty active block ... ==== 00224 NS = 0 00225 ND = 0 00226 WORK( 1 ) = ONE 00227 IF( KTOP.GT.KBOT ) 00228 $ RETURN 00229 * ... nor for an empty deflation window. ==== 00230 IF( NW.LT.1 ) 00231 $ RETURN 00232 * 00233 * ==== Machine constants ==== 00234 * 00235 SAFMIN = DLAMCH( 'SAFE MINIMUM' ) 00236 SAFMAX = RONE / SAFMIN 00237 CALL DLABAD( SAFMIN, SAFMAX ) 00238 ULP = DLAMCH( 'PRECISION' ) 00239 SMLNUM = SAFMIN*( DBLE( N ) / ULP ) 00240 * 00241 * ==== Setup deflation window ==== 00242 * 00243 JW = MIN( NW, KBOT-KTOP+1 ) 00244 KWTOP = KBOT - JW + 1 00245 IF( KWTOP.EQ.KTOP ) THEN 00246 S = ZERO 00247 ELSE 00248 S = H( KWTOP, KWTOP-1 ) 00249 END IF 00250 * 00251 IF( KBOT.EQ.KWTOP ) THEN 00252 * 00253 * ==== 1-by-1 deflation window: not much to do ==== 00254 * 00255 SH( KWTOP ) = H( KWTOP, KWTOP ) 00256 NS = 1 00257 ND = 0 00258 IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP, 00259 $ KWTOP ) ) ) ) THEN 00260 NS = 0 00261 ND = 1 00262 IF( KWTOP.GT.KTOP ) 00263 $ H( KWTOP, KWTOP-1 ) = ZERO 00264 END IF 00265 WORK( 1 ) = ONE 00266 RETURN 00267 END IF 00268 * 00269 * ==== Convert to spike-triangular form. (In case of a 00270 * . rare QR failure, this routine continues to do 00271 * . aggressive early deflation using that part of 00272 * . the deflation window that converged using INFQR 00273 * . here and there to keep track.) ==== 00274 * 00275 CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) 00276 CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) 00277 * 00278 CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) 00279 NMIN = ILAENV( 12, 'ZLAQR3', 'SV', JW, 1, JW, LWORK ) 00280 IF( JW.GT.NMIN ) THEN 00281 CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, 00282 $ JW, V, LDV, WORK, LWORK, INFQR ) 00283 ELSE 00284 CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1, 00285 $ JW, V, LDV, INFQR ) 00286 END IF 00287 * 00288 * ==== Deflation detection loop ==== 00289 * 00290 NS = JW 00291 ILST = INFQR + 1 00292 DO 10 KNT = INFQR + 1, JW 00293 * 00294 * ==== Small spike tip deflation test ==== 00295 * 00296 FOO = CABS1( T( NS, NS ) ) 00297 IF( FOO.EQ.RZERO ) 00298 $ FOO = CABS1( S ) 00299 IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) 00300 $ THEN 00301 * 00302 * ==== One more converged eigenvalue ==== 00303 * 00304 NS = NS - 1 00305 ELSE 00306 * 00307 * ==== One undeflatable eigenvalue. Move it up out of the 00308 * . way. (ZTREXC can not fail in this case.) ==== 00309 * 00310 IFST = NS 00311 CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) 00312 ILST = ILST + 1 00313 END IF 00314 10 CONTINUE 00315 * 00316 * ==== Return to Hessenberg form ==== 00317 * 00318 IF( NS.EQ.0 ) 00319 $ S = ZERO 00320 * 00321 IF( NS.LT.JW ) THEN 00322 * 00323 * ==== sorting the diagonal of T improves accuracy for 00324 * . graded matrices. ==== 00325 * 00326 DO 30 I = INFQR + 1, NS 00327 IFST = I 00328 DO 20 J = I + 1, NS 00329 IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) ) 00330 $ IFST = J 00331 20 CONTINUE 00332 ILST = I 00333 IF( IFST.NE.ILST ) 00334 $ CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO ) 00335 30 CONTINUE 00336 END IF 00337 * 00338 * ==== Restore shift/eigenvalue array from T ==== 00339 * 00340 DO 40 I = INFQR + 1, JW 00341 SH( KWTOP+I-1 ) = T( I, I ) 00342 40 CONTINUE 00343 * 00344 * 00345 IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN 00346 IF( NS.GT.1 .AND. S.NE.ZERO ) THEN 00347 * 00348 * ==== Reflect spike back into lower triangle ==== 00349 * 00350 CALL ZCOPY( NS, V, LDV, WORK, 1 ) 00351 DO 50 I = 1, NS 00352 WORK( I ) = DCONJG( WORK( I ) ) 00353 50 CONTINUE 00354 BETA = WORK( 1 ) 00355 CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU ) 00356 WORK( 1 ) = ONE 00357 * 00358 CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) 00359 * 00360 CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT, 00361 $ WORK( JW+1 ) ) 00362 CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, 00363 $ WORK( JW+1 ) ) 00364 CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, 00365 $ WORK( JW+1 ) ) 00366 * 00367 CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), 00368 $ LWORK-JW, INFO ) 00369 END IF 00370 * 00371 * ==== Copy updated reduced window into place ==== 00372 * 00373 IF( KWTOP.GT.1 ) 00374 $ H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) ) 00375 CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) 00376 CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), 00377 $ LDH+1 ) 00378 * 00379 * ==== Accumulate orthogonal matrix in order update 00380 * . H and Z, if requested. ==== 00381 * 00382 IF( NS.GT.1 .AND. S.NE.ZERO ) 00383 $ CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, 00384 $ WORK( JW+1 ), LWORK-JW, INFO ) 00385 * 00386 * ==== Update vertical slab in H ==== 00387 * 00388 IF( WANTT ) THEN 00389 LTOP = 1 00390 ELSE 00391 LTOP = KTOP 00392 END IF 00393 DO 60 KROW = LTOP, KWTOP - 1, NV 00394 KLN = MIN( NV, KWTOP-KROW ) 00395 CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), 00396 $ LDH, V, LDV, ZERO, WV, LDWV ) 00397 CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) 00398 60 CONTINUE 00399 * 00400 * ==== Update horizontal slab in H ==== 00401 * 00402 IF( WANTT ) THEN 00403 DO 70 KCOL = KBOT + 1, N, NH 00404 KLN = MIN( NH, N-KCOL+1 ) 00405 CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, 00406 $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) 00407 CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), 00408 $ LDH ) 00409 70 CONTINUE 00410 END IF 00411 * 00412 * ==== Update vertical slab in Z ==== 00413 * 00414 IF( WANTZ ) THEN 00415 DO 80 KROW = ILOZ, IHIZ, NV 00416 KLN = MIN( NV, IHIZ-KROW+1 ) 00417 CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), 00418 $ LDZ, V, LDV, ZERO, WV, LDWV ) 00419 CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), 00420 $ LDZ ) 00421 80 CONTINUE 00422 END IF 00423 END IF 00424 * 00425 * ==== Return the number of deflations ... ==== 00426 * 00427 ND = JW - NS 00428 * 00429 * ==== ... and the number of shifts. (Subtracting 00430 * . INFQR from the spike length takes care 00431 * . of the case of a rare QR failure while 00432 * . calculating eigenvalues of the deflation 00433 * . window.) ==== 00434 * 00435 NS = NS - INFQR 00436 * 00437 * ==== Return optimal workspace. ==== 00438 * 00439 WORK( 1 ) = DCMPLX( LWKOPT, 0 ) 00440 * 00441 * ==== End of ZLAQR3 ==== 00442 * 00443 END