LAPACK 3.3.0
|
00001 SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, 00002 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, 00003 $ LDT, NV, WV, LDWV, WORK, LWORK ) 00004 * 00005 * -- LAPACK auxiliary routine (version 3.2.2) -- 00006 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. 00007 * -- June 2010 -- 00008 * 00009 * .. Scalar Arguments .. 00010 INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, 00011 $ LDZ, LWORK, N, ND, NH, NS, NV, NW 00012 LOGICAL WANTT, WANTZ 00013 * .. 00014 * .. Array Arguments .. 00015 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), 00016 $ V( LDV, * ), WORK( * ), WV( LDWV, * ), 00017 $ Z( LDZ, * ) 00018 * .. 00019 * 00020 * This subroutine is identical to DLAQR3 except that it avoids 00021 * recursion by calling DLAHQR instead of DLAQR4. 00022 * 00023 * 00024 * ****************************************************************** 00025 * Aggressive early deflation: 00026 * 00027 * This subroutine accepts as input an upper Hessenberg matrix 00028 * H and performs an orthogonal similarity transformation 00029 * designed to detect and deflate fully converged eigenvalues from 00030 * a trailing principal submatrix. On output H has been over- 00031 * written by a new Hessenberg matrix that is a perturbation of 00032 * an orthogonal similarity transformation of H. It is to be 00033 * hoped that the final version of H has many zero subdiagonal 00034 * entries. 00035 * 00036 * ****************************************************************** 00037 * WANTT (input) LOGICAL 00038 * If .TRUE., then the Hessenberg matrix H is fully updated 00039 * so that the quasi-triangular Schur factor may be 00040 * computed (in cooperation with the calling subroutine). 00041 * If .FALSE., then only enough of H is updated to preserve 00042 * the eigenvalues. 00043 * 00044 * WANTZ (input) LOGICAL 00045 * If .TRUE., then the orthogonal matrix Z is updated so 00046 * so that the orthogonal Schur factor may be computed 00047 * (in cooperation with the calling subroutine). 00048 * If .FALSE., then Z is not referenced. 00049 * 00050 * N (input) INTEGER 00051 * The order of the matrix H and (if WANTZ is .TRUE.) the 00052 * order of the orthogonal matrix Z. 00053 * 00054 * KTOP (input) INTEGER 00055 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. 00056 * KBOT and KTOP together determine an isolated block 00057 * along the diagonal of the Hessenberg matrix. 00058 * 00059 * KBOT (input) INTEGER 00060 * It is assumed without a check that either 00061 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together 00062 * determine an isolated block along the diagonal of the 00063 * Hessenberg matrix. 00064 * 00065 * NW (input) INTEGER 00066 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). 00067 * 00068 * H (input/output) DOUBLE PRECISION array, dimension (LDH,N) 00069 * On input the initial N-by-N section of H stores the 00070 * Hessenberg matrix undergoing aggressive early deflation. 00071 * On output H has been transformed by an orthogonal 00072 * similarity transformation, perturbed, and the returned 00073 * to Hessenberg form that (it is to be hoped) has some 00074 * zero subdiagonal entries. 00075 * 00076 * LDH (input) integer 00077 * Leading dimension of H just as declared in the calling 00078 * subroutine. N .LE. LDH 00079 * 00080 * ILOZ (input) INTEGER 00081 * IHIZ (input) INTEGER 00082 * Specify the rows of Z to which transformations must be 00083 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. 00084 * 00085 * Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N) 00086 * IF WANTZ is .TRUE., then on output, the orthogonal 00087 * similarity transformation mentioned above has been 00088 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. 00089 * If WANTZ is .FALSE., then Z is unreferenced. 00090 * 00091 * LDZ (input) integer 00092 * The leading dimension of Z just as declared in the 00093 * calling subroutine. 1 .LE. LDZ. 00094 * 00095 * NS (output) integer 00096 * The number of unconverged (ie approximate) eigenvalues 00097 * returned in SR and SI that may be used as shifts by the 00098 * calling subroutine. 00099 * 00100 * ND (output) integer 00101 * The number of converged eigenvalues uncovered by this 00102 * subroutine. 00103 * 00104 * SR (output) DOUBLE PRECISION array, dimension (KBOT) 00105 * SI (output) DOUBLE PRECISION array, dimension (KBOT) 00106 * On output, the real and imaginary parts of approximate 00107 * eigenvalues that may be used for shifts are stored in 00108 * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and 00109 * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. 00110 * The real and imaginary parts of converged eigenvalues 00111 * are stored in SR(KBOT-ND+1) through SR(KBOT) and 00112 * SI(KBOT-ND+1) through SI(KBOT), respectively. 00113 * 00114 * V (workspace) DOUBLE PRECISION array, dimension (LDV,NW) 00115 * An NW-by-NW work array. 00116 * 00117 * LDV (input) integer scalar 00118 * The leading dimension of V just as declared in the 00119 * calling subroutine. NW .LE. LDV 00120 * 00121 * NH (input) integer scalar 00122 * The number of columns of T. NH.GE.NW. 00123 * 00124 * T (workspace) DOUBLE PRECISION array, dimension (LDT,NW) 00125 * 00126 * LDT (input) integer 00127 * The leading dimension of T just as declared in the 00128 * calling subroutine. NW .LE. LDT 00129 * 00130 * NV (input) integer 00131 * The number of rows of work array WV available for 00132 * workspace. NV.GE.NW. 00133 * 00134 * WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW) 00135 * 00136 * LDWV (input) integer 00137 * The leading dimension of W just as declared in the 00138 * calling subroutine. NW .LE. LDV 00139 * 00140 * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) 00141 * On exit, WORK(1) is set to an estimate of the optimal value 00142 * of LWORK for the given values of N, NW, KTOP and KBOT. 00143 * 00144 * LWORK (input) integer 00145 * The dimension of the work array WORK. LWORK = 2*NW 00146 * suffices, but greater efficiency may result from larger 00147 * values of LWORK. 00148 * 00149 * If LWORK = -1, then a workspace query is assumed; DLAQR2 00150 * only estimates the optimal workspace size for the given 00151 * values of N, NW, KTOP and KBOT. The estimate is returned 00152 * in WORK(1). No error message related to LWORK is issued 00153 * by XERBLA. Neither H nor Z are accessed. 00154 * 00155 * ================================================================ 00156 * Based on contributions by 00157 * Karen Braman and Ralph Byers, Department of Mathematics, 00158 * University of Kansas, USA 00159 * 00160 * ================================================================ 00161 * .. Parameters .. 00162 DOUBLE PRECISION ZERO, ONE 00163 PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 ) 00164 * .. 00165 * .. Local Scalars .. 00166 DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S, 00167 $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP 00168 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL, 00169 $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, 00170 $ LWKOPT 00171 LOGICAL BULGE, SORTED 00172 * .. 00173 * .. External Functions .. 00174 DOUBLE PRECISION DLAMCH 00175 EXTERNAL DLAMCH 00176 * .. 00177 * .. External Subroutines .. 00178 EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR, 00179 $ DLANV2, DLARF, DLARFG, DLASET, DORMHR, DTREXC 00180 * .. 00181 * .. Intrinsic Functions .. 00182 INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT 00183 * .. 00184 * .. Executable Statements .. 00185 * 00186 * ==== Estimate optimal workspace. ==== 00187 * 00188 JW = MIN( NW, KBOT-KTOP+1 ) 00189 IF( JW.LE.2 ) THEN 00190 LWKOPT = 1 00191 ELSE 00192 * 00193 * ==== Workspace query call to DGEHRD ==== 00194 * 00195 CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) 00196 LWK1 = INT( WORK( 1 ) ) 00197 * 00198 * ==== Workspace query call to DORMHR ==== 00199 * 00200 CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, 00201 $ WORK, -1, INFO ) 00202 LWK2 = INT( WORK( 1 ) ) 00203 * 00204 * ==== Optimal workspace ==== 00205 * 00206 LWKOPT = JW + MAX( LWK1, LWK2 ) 00207 END IF 00208 * 00209 * ==== Quick return in case of workspace query. ==== 00210 * 00211 IF( LWORK.EQ.-1 ) THEN 00212 WORK( 1 ) = DBLE( LWKOPT ) 00213 RETURN 00214 END IF 00215 * 00216 * ==== Nothing to do ... 00217 * ... for an empty active block ... ==== 00218 NS = 0 00219 ND = 0 00220 WORK( 1 ) = ONE 00221 IF( KTOP.GT.KBOT ) 00222 $ RETURN 00223 * ... nor for an empty deflation window. ==== 00224 IF( NW.LT.1 ) 00225 $ RETURN 00226 * 00227 * ==== Machine constants ==== 00228 * 00229 SAFMIN = DLAMCH( 'SAFE MINIMUM' ) 00230 SAFMAX = ONE / SAFMIN 00231 CALL DLABAD( SAFMIN, SAFMAX ) 00232 ULP = DLAMCH( 'PRECISION' ) 00233 SMLNUM = SAFMIN*( DBLE( N ) / ULP ) 00234 * 00235 * ==== Setup deflation window ==== 00236 * 00237 JW = MIN( NW, KBOT-KTOP+1 ) 00238 KWTOP = KBOT - JW + 1 00239 IF( KWTOP.EQ.KTOP ) THEN 00240 S = ZERO 00241 ELSE 00242 S = H( KWTOP, KWTOP-1 ) 00243 END IF 00244 * 00245 IF( KBOT.EQ.KWTOP ) THEN 00246 * 00247 * ==== 1-by-1 deflation window: not much to do ==== 00248 * 00249 SR( KWTOP ) = H( KWTOP, KWTOP ) 00250 SI( KWTOP ) = ZERO 00251 NS = 1 00252 ND = 0 00253 IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) ) 00254 $ THEN 00255 NS = 0 00256 ND = 1 00257 IF( KWTOP.GT.KTOP ) 00258 $ H( KWTOP, KWTOP-1 ) = ZERO 00259 END IF 00260 WORK( 1 ) = ONE 00261 RETURN 00262 END IF 00263 * 00264 * ==== Convert to spike-triangular form. (In case of a 00265 * . rare QR failure, this routine continues to do 00266 * . aggressive early deflation using that part of 00267 * . the deflation window that converged using INFQR 00268 * . here and there to keep track.) ==== 00269 * 00270 CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) 00271 CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) 00272 * 00273 CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) 00274 CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), 00275 $ SI( KWTOP ), 1, JW, V, LDV, INFQR ) 00276 * 00277 * ==== DTREXC needs a clean margin near the diagonal ==== 00278 * 00279 DO 10 J = 1, JW - 3 00280 T( J+2, J ) = ZERO 00281 T( J+3, J ) = ZERO 00282 10 CONTINUE 00283 IF( JW.GT.2 ) 00284 $ T( JW, JW-2 ) = ZERO 00285 * 00286 * ==== Deflation detection loop ==== 00287 * 00288 NS = JW 00289 ILST = INFQR + 1 00290 20 CONTINUE 00291 IF( ILST.LE.NS ) THEN 00292 IF( NS.EQ.1 ) THEN 00293 BULGE = .FALSE. 00294 ELSE 00295 BULGE = T( NS, NS-1 ).NE.ZERO 00296 END IF 00297 * 00298 * ==== Small spike tip test for deflation ==== 00299 * 00300 IF( .NOT.BULGE ) THEN 00301 * 00302 * ==== Real eigenvalue ==== 00303 * 00304 FOO = ABS( T( NS, NS ) ) 00305 IF( FOO.EQ.ZERO ) 00306 $ FOO = ABS( S ) 00307 IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN 00308 * 00309 * ==== Deflatable ==== 00310 * 00311 NS = NS - 1 00312 ELSE 00313 * 00314 * ==== Undeflatable. Move it up out of the way. 00315 * . (DTREXC can not fail in this case.) ==== 00316 * 00317 IFST = NS 00318 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, 00319 $ INFO ) 00320 ILST = ILST + 1 00321 END IF 00322 ELSE 00323 * 00324 * ==== Complex conjugate pair ==== 00325 * 00326 FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )* 00327 $ SQRT( ABS( T( NS-1, NS ) ) ) 00328 IF( FOO.EQ.ZERO ) 00329 $ FOO = ABS( S ) 00330 IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE. 00331 $ MAX( SMLNUM, ULP*FOO ) ) THEN 00332 * 00333 * ==== Deflatable ==== 00334 * 00335 NS = NS - 2 00336 ELSE 00337 * 00338 * ==== Undeflatable. Move them up out of the way. 00339 * . Fortunately, DTREXC does the right thing with 00340 * . ILST in case of a rare exchange failure. ==== 00341 * 00342 IFST = NS 00343 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, 00344 $ INFO ) 00345 ILST = ILST + 2 00346 END IF 00347 END IF 00348 * 00349 * ==== End deflation detection loop ==== 00350 * 00351 GO TO 20 00352 END IF 00353 * 00354 * ==== Return to Hessenberg form ==== 00355 * 00356 IF( NS.EQ.0 ) 00357 $ S = ZERO 00358 * 00359 IF( NS.LT.JW ) THEN 00360 * 00361 * ==== sorting diagonal blocks of T improves accuracy for 00362 * . graded matrices. Bubble sort deals well with 00363 * . exchange failures. ==== 00364 * 00365 SORTED = .false. 00366 I = NS + 1 00367 30 CONTINUE 00368 IF( SORTED ) 00369 $ GO TO 50 00370 SORTED = .true. 00371 * 00372 KEND = I - 1 00373 I = INFQR + 1 00374 IF( I.EQ.NS ) THEN 00375 K = I + 1 00376 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN 00377 K = I + 1 00378 ELSE 00379 K = I + 2 00380 END IF 00381 40 CONTINUE 00382 IF( K.LE.KEND ) THEN 00383 IF( K.EQ.I+1 ) THEN 00384 EVI = ABS( T( I, I ) ) 00385 ELSE 00386 EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )* 00387 $ SQRT( ABS( T( I, I+1 ) ) ) 00388 END IF 00389 * 00390 IF( K.EQ.KEND ) THEN 00391 EVK = ABS( T( K, K ) ) 00392 ELSE IF( T( K+1, K ).EQ.ZERO ) THEN 00393 EVK = ABS( T( K, K ) ) 00394 ELSE 00395 EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )* 00396 $ SQRT( ABS( T( K, K+1 ) ) ) 00397 END IF 00398 * 00399 IF( EVI.GE.EVK ) THEN 00400 I = K 00401 ELSE 00402 SORTED = .false. 00403 IFST = I 00404 ILST = K 00405 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, 00406 $ INFO ) 00407 IF( INFO.EQ.0 ) THEN 00408 I = ILST 00409 ELSE 00410 I = K 00411 END IF 00412 END IF 00413 IF( I.EQ.KEND ) THEN 00414 K = I + 1 00415 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN 00416 K = I + 1 00417 ELSE 00418 K = I + 2 00419 END IF 00420 GO TO 40 00421 END IF 00422 GO TO 30 00423 50 CONTINUE 00424 END IF 00425 * 00426 * ==== Restore shift/eigenvalue array from T ==== 00427 * 00428 I = JW 00429 60 CONTINUE 00430 IF( I.GE.INFQR+1 ) THEN 00431 IF( I.EQ.INFQR+1 ) THEN 00432 SR( KWTOP+I-1 ) = T( I, I ) 00433 SI( KWTOP+I-1 ) = ZERO 00434 I = I - 1 00435 ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN 00436 SR( KWTOP+I-1 ) = T( I, I ) 00437 SI( KWTOP+I-1 ) = ZERO 00438 I = I - 1 00439 ELSE 00440 AA = T( I-1, I-1 ) 00441 CC = T( I, I-1 ) 00442 BB = T( I-1, I ) 00443 DD = T( I, I ) 00444 CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ), 00445 $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ), 00446 $ SI( KWTOP+I-1 ), CS, SN ) 00447 I = I - 2 00448 END IF 00449 GO TO 60 00450 END IF 00451 * 00452 IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN 00453 IF( NS.GT.1 .AND. S.NE.ZERO ) THEN 00454 * 00455 * ==== Reflect spike back into lower triangle ==== 00456 * 00457 CALL DCOPY( NS, V, LDV, WORK, 1 ) 00458 BETA = WORK( 1 ) 00459 CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU ) 00460 WORK( 1 ) = ONE 00461 * 00462 CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) 00463 * 00464 CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, 00465 $ WORK( JW+1 ) ) 00466 CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, 00467 $ WORK( JW+1 ) ) 00468 CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, 00469 $ WORK( JW+1 ) ) 00470 * 00471 CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), 00472 $ LWORK-JW, INFO ) 00473 END IF 00474 * 00475 * ==== Copy updated reduced window into place ==== 00476 * 00477 IF( KWTOP.GT.1 ) 00478 $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 ) 00479 CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) 00480 CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), 00481 $ LDH+1 ) 00482 * 00483 * ==== Accumulate orthogonal matrix in order update 00484 * . H and Z, if requested. ==== 00485 * 00486 IF( NS.GT.1 .AND. S.NE.ZERO ) 00487 $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, 00488 $ WORK( JW+1 ), LWORK-JW, INFO ) 00489 * 00490 * ==== Update vertical slab in H ==== 00491 * 00492 IF( WANTT ) THEN 00493 LTOP = 1 00494 ELSE 00495 LTOP = KTOP 00496 END IF 00497 DO 70 KROW = LTOP, KWTOP - 1, NV 00498 KLN = MIN( NV, KWTOP-KROW ) 00499 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), 00500 $ LDH, V, LDV, ZERO, WV, LDWV ) 00501 CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) 00502 70 CONTINUE 00503 * 00504 * ==== Update horizontal slab in H ==== 00505 * 00506 IF( WANTT ) THEN 00507 DO 80 KCOL = KBOT + 1, N, NH 00508 KLN = MIN( NH, N-KCOL+1 ) 00509 CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, 00510 $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) 00511 CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), 00512 $ LDH ) 00513 80 CONTINUE 00514 END IF 00515 * 00516 * ==== Update vertical slab in Z ==== 00517 * 00518 IF( WANTZ ) THEN 00519 DO 90 KROW = ILOZ, IHIZ, NV 00520 KLN = MIN( NV, IHIZ-KROW+1 ) 00521 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), 00522 $ LDZ, V, LDV, ZERO, WV, LDWV ) 00523 CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), 00524 $ LDZ ) 00525 90 CONTINUE 00526 END IF 00527 END IF 00528 * 00529 * ==== Return the number of deflations ... ==== 00530 * 00531 ND = JW - NS 00532 * 00533 * ==== ... and the number of shifts. (Subtracting 00534 * . INFQR from the spike length takes care 00535 * . of the case of a rare QR failure while 00536 * . calculating eigenvalues of the deflation 00537 * . window.) ==== 00538 * 00539 NS = NS - INFQR 00540 * 00541 * ==== Return optimal workspace. ==== 00542 * 00543 WORK( 1 ) = DBLE( LWKOPT ) 00544 * 00545 * ==== End of DLAQR2 ==== 00546 * 00547 END