LAPACK 3.3.0
|
00001 SUBROUTINE SLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ, 00002 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T, 00003 $ LDT, NV, WV, LDWV, WORK, LWORK ) 00004 * 00005 * -- LAPACK auxiliary routine (version 3.2.1) -- 00006 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. 00007 * -- April 2009 -- 00008 * 00009 * .. Scalar Arguments .. 00010 INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV, 00011 $ LDZ, LWORK, N, ND, NH, NS, NV, NW 00012 LOGICAL WANTT, WANTZ 00013 * .. 00014 * .. Array Arguments .. 00015 REAL H( LDH, * ), SI( * ), SR( * ), T( LDT, * ), 00016 $ V( LDV, * ), WORK( * ), WV( LDWV, * ), 00017 $ Z( LDZ, * ) 00018 * .. 00019 * 00020 * ****************************************************************** 00021 * Aggressive early deflation: 00022 * 00023 * This subroutine accepts as input an upper Hessenberg matrix 00024 * H and performs an orthogonal similarity transformation 00025 * designed to detect and deflate fully converged eigenvalues from 00026 * a trailing principal submatrix. On output H has been over- 00027 * written by a new Hessenberg matrix that is a perturbation of 00028 * an orthogonal similarity transformation of H. It is to be 00029 * hoped that the final version of H has many zero subdiagonal 00030 * entries. 00031 * 00032 * ****************************************************************** 00033 * WANTT (input) LOGICAL 00034 * If .TRUE., then the Hessenberg matrix H is fully updated 00035 * so that the quasi-triangular Schur factor may be 00036 * computed (in cooperation with the calling subroutine). 00037 * If .FALSE., then only enough of H is updated to preserve 00038 * the eigenvalues. 00039 * 00040 * WANTZ (input) LOGICAL 00041 * If .TRUE., then the orthogonal matrix Z is updated so 00042 * so that the orthogonal Schur factor may be computed 00043 * (in cooperation with the calling subroutine). 00044 * If .FALSE., then Z is not referenced. 00045 * 00046 * N (input) INTEGER 00047 * The order of the matrix H and (if WANTZ is .TRUE.) the 00048 * order of the orthogonal matrix Z. 00049 * 00050 * KTOP (input) INTEGER 00051 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. 00052 * KBOT and KTOP together determine an isolated block 00053 * along the diagonal of the Hessenberg matrix. 00054 * 00055 * KBOT (input) INTEGER 00056 * It is assumed without a check that either 00057 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together 00058 * determine an isolated block along the diagonal of the 00059 * Hessenberg matrix. 00060 * 00061 * NW (input) INTEGER 00062 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1). 00063 * 00064 * H (input/output) REAL array, dimension (LDH,N) 00065 * On input the initial N-by-N section of H stores the 00066 * Hessenberg matrix undergoing aggressive early deflation. 00067 * On output H has been transformed by an orthogonal 00068 * similarity transformation, perturbed, and the returned 00069 * to Hessenberg form that (it is to be hoped) has some 00070 * zero subdiagonal entries. 00071 * 00072 * LDH (input) integer 00073 * Leading dimension of H just as declared in the calling 00074 * subroutine. N .LE. LDH 00075 * 00076 * ILOZ (input) INTEGER 00077 * IHIZ (input) INTEGER 00078 * Specify the rows of Z to which transformations must be 00079 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. 00080 * 00081 * Z (input/output) REAL array, dimension (LDZ,N) 00082 * IF WANTZ is .TRUE., then on output, the orthogonal 00083 * similarity transformation mentioned above has been 00084 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. 00085 * If WANTZ is .FALSE., then Z is unreferenced. 00086 * 00087 * LDZ (input) integer 00088 * The leading dimension of Z just as declared in the 00089 * calling subroutine. 1 .LE. LDZ. 00090 * 00091 * NS (output) integer 00092 * The number of unconverged (ie approximate) eigenvalues 00093 * returned in SR and SI that may be used as shifts by the 00094 * calling subroutine. 00095 * 00096 * ND (output) integer 00097 * The number of converged eigenvalues uncovered by this 00098 * subroutine. 00099 * 00100 * SR (output) REAL array, dimension KBOT 00101 * SI (output) REAL array, dimension KBOT 00102 * On output, the real and imaginary parts of approximate 00103 * eigenvalues that may be used for shifts are stored in 00104 * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and 00105 * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively. 00106 * The real and imaginary parts of converged eigenvalues 00107 * are stored in SR(KBOT-ND+1) through SR(KBOT) and 00108 * SI(KBOT-ND+1) through SI(KBOT), respectively. 00109 * 00110 * V (workspace) REAL array, dimension (LDV,NW) 00111 * An NW-by-NW work array. 00112 * 00113 * LDV (input) integer scalar 00114 * The leading dimension of V just as declared in the 00115 * calling subroutine. NW .LE. LDV 00116 * 00117 * NH (input) integer scalar 00118 * The number of columns of T. NH.GE.NW. 00119 * 00120 * T (workspace) REAL array, dimension (LDT,NW) 00121 * 00122 * LDT (input) integer 00123 * The leading dimension of T just as declared in the 00124 * calling subroutine. NW .LE. LDT 00125 * 00126 * NV (input) integer 00127 * The number of rows of work array WV available for 00128 * workspace. NV.GE.NW. 00129 * 00130 * WV (workspace) REAL array, dimension (LDWV,NW) 00131 * 00132 * LDWV (input) integer 00133 * The leading dimension of W just as declared in the 00134 * calling subroutine. NW .LE. LDV 00135 * 00136 * WORK (workspace) REAL array, dimension LWORK. 00137 * On exit, WORK(1) is set to an estimate of the optimal value 00138 * of LWORK for the given values of N, NW, KTOP and KBOT. 00139 * 00140 * LWORK (input) integer 00141 * The dimension of the work array WORK. LWORK = 2*NW 00142 * suffices, but greater efficiency may result from larger 00143 * values of LWORK. 00144 * 00145 * If LWORK = -1, then a workspace query is assumed; SLAQR3 00146 * only estimates the optimal workspace size for the given 00147 * values of N, NW, KTOP and KBOT. The estimate is returned 00148 * in WORK(1). No error message related to LWORK is issued 00149 * by XERBLA. Neither H nor Z are accessed. 00150 * 00151 * ================================================================ 00152 * Based on contributions by 00153 * Karen Braman and Ralph Byers, Department of Mathematics, 00154 * University of Kansas, USA 00155 * 00156 * ================================================================ 00157 * .. Parameters .. 00158 REAL ZERO, ONE 00159 PARAMETER ( ZERO = 0.0e0, ONE = 1.0e0 ) 00160 * .. 00161 * .. Local Scalars .. 00162 REAL AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S, 00163 $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP 00164 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL, 00165 $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3, 00166 $ LWKOPT, NMIN 00167 LOGICAL BULGE, SORTED 00168 * .. 00169 * .. External Functions .. 00170 REAL SLAMCH 00171 INTEGER ILAENV 00172 EXTERNAL SLAMCH, ILAENV 00173 * .. 00174 * .. External Subroutines .. 00175 EXTERNAL SCOPY, SGEHRD, SGEMM, SLABAD, SLACPY, SLAHQR, 00176 $ SLANV2, SLAQR4, SLARF, SLARFG, SLASET, SORMHR, 00177 $ STREXC 00178 * .. 00179 * .. Intrinsic Functions .. 00180 INTRINSIC ABS, INT, MAX, MIN, REAL, SQRT 00181 * .. 00182 * .. Executable Statements .. 00183 * 00184 * ==== Estimate optimal workspace. ==== 00185 * 00186 JW = MIN( NW, KBOT-KTOP+1 ) 00187 IF( JW.LE.2 ) THEN 00188 LWKOPT = 1 00189 ELSE 00190 * 00191 * ==== Workspace query call to SGEHRD ==== 00192 * 00193 CALL SGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO ) 00194 LWK1 = INT( WORK( 1 ) ) 00195 * 00196 * ==== Workspace query call to SORMHR ==== 00197 * 00198 CALL SORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV, 00199 $ WORK, -1, INFO ) 00200 LWK2 = INT( WORK( 1 ) ) 00201 * 00202 * ==== Workspace query call to SLAQR4 ==== 00203 * 00204 CALL SLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW, 00205 $ V, LDV, WORK, -1, INFQR ) 00206 LWK3 = INT( WORK( 1 ) ) 00207 * 00208 * ==== Optimal workspace ==== 00209 * 00210 LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 ) 00211 END IF 00212 * 00213 * ==== Quick return in case of workspace query. ==== 00214 * 00215 IF( LWORK.EQ.-1 ) THEN 00216 WORK( 1 ) = REAL( LWKOPT ) 00217 RETURN 00218 END IF 00219 * 00220 * ==== Nothing to do ... 00221 * ... for an empty active block ... ==== 00222 NS = 0 00223 ND = 0 00224 WORK( 1 ) = ONE 00225 IF( KTOP.GT.KBOT ) 00226 $ RETURN 00227 * ... nor for an empty deflation window. ==== 00228 IF( NW.LT.1 ) 00229 $ RETURN 00230 * 00231 * ==== Machine constants ==== 00232 * 00233 SAFMIN = SLAMCH( 'SAFE MINIMUM' ) 00234 SAFMAX = ONE / SAFMIN 00235 CALL SLABAD( SAFMIN, SAFMAX ) 00236 ULP = SLAMCH( 'PRECISION' ) 00237 SMLNUM = SAFMIN*( REAL( N ) / ULP ) 00238 * 00239 * ==== Setup deflation window ==== 00240 * 00241 JW = MIN( NW, KBOT-KTOP+1 ) 00242 KWTOP = KBOT - JW + 1 00243 IF( KWTOP.EQ.KTOP ) THEN 00244 S = ZERO 00245 ELSE 00246 S = H( KWTOP, KWTOP-1 ) 00247 END IF 00248 * 00249 IF( KBOT.EQ.KWTOP ) THEN 00250 * 00251 * ==== 1-by-1 deflation window: not much to do ==== 00252 * 00253 SR( KWTOP ) = H( KWTOP, KWTOP ) 00254 SI( KWTOP ) = ZERO 00255 NS = 1 00256 ND = 0 00257 IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) ) 00258 $ THEN 00259 NS = 0 00260 ND = 1 00261 IF( KWTOP.GT.KTOP ) 00262 $ H( KWTOP, KWTOP-1 ) = ZERO 00263 END IF 00264 WORK( 1 ) = ONE 00265 RETURN 00266 END IF 00267 * 00268 * ==== Convert to spike-triangular form. (In case of a 00269 * . rare QR failure, this routine continues to do 00270 * . aggressive early deflation using that part of 00271 * . the deflation window that converged using INFQR 00272 * . here and there to keep track.) ==== 00273 * 00274 CALL SLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT ) 00275 CALL SCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 ) 00276 * 00277 CALL SLASET( 'A', JW, JW, ZERO, ONE, V, LDV ) 00278 NMIN = ILAENV( 12, 'SLAQR3', 'SV', JW, 1, JW, LWORK ) 00279 IF( JW.GT.NMIN ) THEN 00280 CALL SLAQR4( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), 00281 $ SI( KWTOP ), 1, JW, V, LDV, WORK, LWORK, INFQR ) 00282 ELSE 00283 CALL SLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ), 00284 $ SI( KWTOP ), 1, JW, V, LDV, INFQR ) 00285 END IF 00286 * 00287 * ==== STREXC needs a clean margin near the diagonal ==== 00288 * 00289 DO 10 J = 1, JW - 3 00290 T( J+2, J ) = ZERO 00291 T( J+3, J ) = ZERO 00292 10 CONTINUE 00293 IF( JW.GT.2 ) 00294 $ T( JW, JW-2 ) = ZERO 00295 * 00296 * ==== Deflation detection loop ==== 00297 * 00298 NS = JW 00299 ILST = INFQR + 1 00300 20 CONTINUE 00301 IF( ILST.LE.NS ) THEN 00302 IF( NS.EQ.1 ) THEN 00303 BULGE = .FALSE. 00304 ELSE 00305 BULGE = T( NS, NS-1 ).NE.ZERO 00306 END IF 00307 * 00308 * ==== Small spike tip test for deflation ==== 00309 * 00310 IF( .NOT.BULGE ) THEN 00311 * 00312 * ==== Real eigenvalue ==== 00313 * 00314 FOO = ABS( T( NS, NS ) ) 00315 IF( FOO.EQ.ZERO ) 00316 $ FOO = ABS( S ) 00317 IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN 00318 * 00319 * ==== Deflatable ==== 00320 * 00321 NS = NS - 1 00322 ELSE 00323 * 00324 * ==== Undeflatable. Move it up out of the way. 00325 * . (STREXC can not fail in this case.) ==== 00326 * 00327 IFST = NS 00328 CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, 00329 $ INFO ) 00330 ILST = ILST + 1 00331 END IF 00332 ELSE 00333 * 00334 * ==== Complex conjugate pair ==== 00335 * 00336 FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )* 00337 $ SQRT( ABS( T( NS-1, NS ) ) ) 00338 IF( FOO.EQ.ZERO ) 00339 $ FOO = ABS( S ) 00340 IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE. 00341 $ MAX( SMLNUM, ULP*FOO ) ) THEN 00342 * 00343 * ==== Deflatable ==== 00344 * 00345 NS = NS - 2 00346 ELSE 00347 * 00348 * ==== Undeflatable. Move them up out of the way. 00349 * . Fortunately, STREXC does the right thing with 00350 * . ILST in case of a rare exchange failure. ==== 00351 * 00352 IFST = NS 00353 CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, 00354 $ INFO ) 00355 ILST = ILST + 2 00356 END IF 00357 END IF 00358 * 00359 * ==== End deflation detection loop ==== 00360 * 00361 GO TO 20 00362 END IF 00363 * 00364 * ==== Return to Hessenberg form ==== 00365 * 00366 IF( NS.EQ.0 ) 00367 $ S = ZERO 00368 * 00369 IF( NS.LT.JW ) THEN 00370 * 00371 * ==== sorting diagonal blocks of T improves accuracy for 00372 * . graded matrices. Bubble sort deals well with 00373 * . exchange failures. ==== 00374 * 00375 SORTED = .false. 00376 I = NS + 1 00377 30 CONTINUE 00378 IF( SORTED ) 00379 $ GO TO 50 00380 SORTED = .true. 00381 * 00382 KEND = I - 1 00383 I = INFQR + 1 00384 IF( I.EQ.NS ) THEN 00385 K = I + 1 00386 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN 00387 K = I + 1 00388 ELSE 00389 K = I + 2 00390 END IF 00391 40 CONTINUE 00392 IF( K.LE.KEND ) THEN 00393 IF( K.EQ.I+1 ) THEN 00394 EVI = ABS( T( I, I ) ) 00395 ELSE 00396 EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )* 00397 $ SQRT( ABS( T( I, I+1 ) ) ) 00398 END IF 00399 * 00400 IF( K.EQ.KEND ) THEN 00401 EVK = ABS( T( K, K ) ) 00402 ELSE IF( T( K+1, K ).EQ.ZERO ) THEN 00403 EVK = ABS( T( K, K ) ) 00404 ELSE 00405 EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )* 00406 $ SQRT( ABS( T( K, K+1 ) ) ) 00407 END IF 00408 * 00409 IF( EVI.GE.EVK ) THEN 00410 I = K 00411 ELSE 00412 SORTED = .false. 00413 IFST = I 00414 ILST = K 00415 CALL STREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK, 00416 $ INFO ) 00417 IF( INFO.EQ.0 ) THEN 00418 I = ILST 00419 ELSE 00420 I = K 00421 END IF 00422 END IF 00423 IF( I.EQ.KEND ) THEN 00424 K = I + 1 00425 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN 00426 K = I + 1 00427 ELSE 00428 K = I + 2 00429 END IF 00430 GO TO 40 00431 END IF 00432 GO TO 30 00433 50 CONTINUE 00434 END IF 00435 * 00436 * ==== Restore shift/eigenvalue array from T ==== 00437 * 00438 I = JW 00439 60 CONTINUE 00440 IF( I.GE.INFQR+1 ) THEN 00441 IF( I.EQ.INFQR+1 ) THEN 00442 SR( KWTOP+I-1 ) = T( I, I ) 00443 SI( KWTOP+I-1 ) = ZERO 00444 I = I - 1 00445 ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN 00446 SR( KWTOP+I-1 ) = T( I, I ) 00447 SI( KWTOP+I-1 ) = ZERO 00448 I = I - 1 00449 ELSE 00450 AA = T( I-1, I-1 ) 00451 CC = T( I, I-1 ) 00452 BB = T( I-1, I ) 00453 DD = T( I, I ) 00454 CALL SLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ), 00455 $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ), 00456 $ SI( KWTOP+I-1 ), CS, SN ) 00457 I = I - 2 00458 END IF 00459 GO TO 60 00460 END IF 00461 * 00462 IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN 00463 IF( NS.GT.1 .AND. S.NE.ZERO ) THEN 00464 * 00465 * ==== Reflect spike back into lower triangle ==== 00466 * 00467 CALL SCOPY( NS, V, LDV, WORK, 1 ) 00468 BETA = WORK( 1 ) 00469 CALL SLARFG( NS, BETA, WORK( 2 ), 1, TAU ) 00470 WORK( 1 ) = ONE 00471 * 00472 CALL SLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT ) 00473 * 00474 CALL SLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT, 00475 $ WORK( JW+1 ) ) 00476 CALL SLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT, 00477 $ WORK( JW+1 ) ) 00478 CALL SLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV, 00479 $ WORK( JW+1 ) ) 00480 * 00481 CALL SGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ), 00482 $ LWORK-JW, INFO ) 00483 END IF 00484 * 00485 * ==== Copy updated reduced window into place ==== 00486 * 00487 IF( KWTOP.GT.1 ) 00488 $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 ) 00489 CALL SLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH ) 00490 CALL SCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ), 00491 $ LDH+1 ) 00492 * 00493 * ==== Accumulate orthogonal matrix in order update 00494 * . H and Z, if requested. ==== 00495 * 00496 IF( NS.GT.1 .AND. S.NE.ZERO ) 00497 $ CALL SORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV, 00498 $ WORK( JW+1 ), LWORK-JW, INFO ) 00499 * 00500 * ==== Update vertical slab in H ==== 00501 * 00502 IF( WANTT ) THEN 00503 LTOP = 1 00504 ELSE 00505 LTOP = KTOP 00506 END IF 00507 DO 70 KROW = LTOP, KWTOP - 1, NV 00508 KLN = MIN( NV, KWTOP-KROW ) 00509 CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ), 00510 $ LDH, V, LDV, ZERO, WV, LDWV ) 00511 CALL SLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH ) 00512 70 CONTINUE 00513 * 00514 * ==== Update horizontal slab in H ==== 00515 * 00516 IF( WANTT ) THEN 00517 DO 80 KCOL = KBOT + 1, N, NH 00518 KLN = MIN( NH, N-KCOL+1 ) 00519 CALL SGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV, 00520 $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT ) 00521 CALL SLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ), 00522 $ LDH ) 00523 80 CONTINUE 00524 END IF 00525 * 00526 * ==== Update vertical slab in Z ==== 00527 * 00528 IF( WANTZ ) THEN 00529 DO 90 KROW = ILOZ, IHIZ, NV 00530 KLN = MIN( NV, IHIZ-KROW+1 ) 00531 CALL SGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ), 00532 $ LDZ, V, LDV, ZERO, WV, LDWV ) 00533 CALL SLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ), 00534 $ LDZ ) 00535 90 CONTINUE 00536 END IF 00537 END IF 00538 * 00539 * ==== Return the number of deflations ... ==== 00540 * 00541 ND = JW - NS 00542 * 00543 * ==== ... and the number of shifts. (Subtracting 00544 * . INFQR from the spike length takes care 00545 * . of the case of a rare QR failure while 00546 * . calculating eigenvalues of the deflation 00547 * . window.) ==== 00548 * 00549 NS = NS - INFQR 00550 * 00551 * ==== Return optimal workspace. ==== 00552 * 00553 WORK( 1 ) = REAL( LWKOPT ) 00554 * 00555 * ==== End of SLAQR3 ==== 00556 * 00557 END