LAPACK 3.3.0

claqr5.f

Go to the documentation of this file.
00001       SUBROUTINE CLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, S,
00002      $                   H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV,
00003      $                   WV, LDWV, NH, WH, LDWH )
00004 *
00005 *  -- LAPACK auxiliary routine (version 3.3.0) --
00006 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
00007 *     November 2010
00008 *
00009 *     .. Scalar Arguments ..
00010       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
00011      $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
00012       LOGICAL            WANTT, WANTZ
00013 *     ..
00014 *     .. Array Arguments ..
00015       COMPLEX            H( LDH, * ), S( * ), U( LDU, * ), V( LDV, * ),
00016      $                   WH( LDWH, * ), WV( LDWV, * ), Z( LDZ, * )
00017 *     ..
00018 *
00019 *     This auxiliary subroutine called by CLAQR0 performs a
00020 *     single small-bulge multi-shift QR sweep.
00021 *
00022 *      WANTT  (input) logical scalar
00023 *             WANTT = .true. if the triangular Schur factor
00024 *             is being computed.  WANTT is set to .false. otherwise.
00025 *
00026 *      WANTZ  (input) logical scalar
00027 *             WANTZ = .true. if the unitary Schur factor is being
00028 *             computed.  WANTZ is set to .false. otherwise.
00029 *
00030 *      KACC22 (input) integer with value 0, 1, or 2.
00031 *             Specifies the computation mode of far-from-diagonal
00032 *             orthogonal updates.
00033 *        = 0: CLAQR5 does not accumulate reflections and does not
00034 *             use matrix-matrix multiply to update far-from-diagonal
00035 *             matrix entries.
00036 *        = 1: CLAQR5 accumulates reflections and uses matrix-matrix
00037 *             multiply to update the far-from-diagonal matrix entries.
00038 *        = 2: CLAQR5 accumulates reflections, uses matrix-matrix
00039 *             multiply to update the far-from-diagonal matrix entries,
00040 *             and takes advantage of 2-by-2 block structure during
00041 *             matrix multiplies.
00042 *
00043 *      N      (input) integer scalar
00044 *             N is the order of the Hessenberg matrix H upon which this
00045 *             subroutine operates.
00046 *
00047 *      KTOP   (input) integer scalar
00048 *      KBOT   (input) integer scalar
00049 *             These are the first and last rows and columns of an
00050 *             isolated diagonal block upon which the QR sweep is to be
00051 *             applied. It is assumed without a check that
00052 *                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
00053 *             and
00054 *                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
00055 *
00056 *      NSHFTS (input) integer scalar
00057 *             NSHFTS gives the number of simultaneous shifts.  NSHFTS
00058 *             must be positive and even.
00059 *
00060 *      S      (input/output) COMPLEX array of size (NSHFTS)
00061 *             S contains the shifts of origin that define the multi-
00062 *             shift QR sweep.  On output S may be reordered.
00063 *
00064 *      H      (input/output) COMPLEX array of size (LDH,N)
00065 *             On input H contains a Hessenberg matrix.  On output a
00066 *             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
00067 *             to the isolated diagonal block in rows and columns KTOP
00068 *             through KBOT.
00069 *
00070 *      LDH    (input) integer scalar
00071 *             LDH is the leading dimension of H just as declared in the
00072 *             calling procedure.  LDH.GE.MAX(1,N).
00073 *
00074 *      ILOZ   (input) INTEGER
00075 *      IHIZ   (input) INTEGER
00076 *             Specify the rows of Z to which transformations must be
00077 *             applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
00078 *
00079 *      Z      (input/output) COMPLEX array of size (LDZ,IHI)
00080 *             If WANTZ = .TRUE., then the QR Sweep unitary
00081 *             similarity transformation is accumulated into
00082 *             Z(ILOZ:IHIZ,ILO:IHI) from the right.
00083 *             If WANTZ = .FALSE., then Z is unreferenced.
00084 *
00085 *      LDZ    (input) integer scalar
00086 *             LDA is the leading dimension of Z just as declared in
00087 *             the calling procedure. LDZ.GE.N.
00088 *
00089 *      V      (workspace) COMPLEX array of size (LDV,NSHFTS/2)
00090 *
00091 *      LDV    (input) integer scalar
00092 *             LDV is the leading dimension of V as declared in the
00093 *             calling procedure.  LDV.GE.3.
00094 *
00095 *      U      (workspace) COMPLEX array of size
00096 *             (LDU,3*NSHFTS-3)
00097 *
00098 *      LDU    (input) integer scalar
00099 *             LDU is the leading dimension of U just as declared in the
00100 *             in the calling subroutine.  LDU.GE.3*NSHFTS-3.
00101 *
00102 *      NH     (input) integer scalar
00103 *             NH is the number of columns in array WH available for
00104 *             workspace. NH.GE.1.
00105 *
00106 *      WH     (workspace) COMPLEX array of size (LDWH,NH)
00107 *
00108 *      LDWH   (input) integer scalar
00109 *             Leading dimension of WH just as declared in the
00110 *             calling procedure.  LDWH.GE.3*NSHFTS-3.
00111 *
00112 *      NV     (input) integer scalar
00113 *             NV is the number of rows in WV agailable for workspace.
00114 *             NV.GE.1.
00115 *
00116 *      WV     (workspace) COMPLEX array of size
00117 *             (LDWV,3*NSHFTS-3)
00118 *
00119 *      LDWV   (input) integer scalar
00120 *             LDWV is the leading dimension of WV as declared in the
00121 *             in the calling subroutine.  LDWV.GE.NV.
00122 *
00123 *     ================================================================
00124 *     Based on contributions by
00125 *        Karen Braman and Ralph Byers, Department of Mathematics,
00126 *        University of Kansas, USA
00127 *
00128 *     ================================================================
00129 *     Reference:
00130 *
00131 *     K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
00132 *     Algorithm Part I: Maintaining Well Focused Shifts, and
00133 *     Level 3 Performance, SIAM Journal of Matrix Analysis,
00134 *     volume 23, pages 929--947, 2002.
00135 *
00136 *     ================================================================
00137 *     .. Parameters ..
00138       COMPLEX            ZERO, ONE
00139       PARAMETER          ( ZERO = ( 0.0e0, 0.0e0 ),
00140      $                   ONE = ( 1.0e0, 0.0e0 ) )
00141       REAL               RZERO, RONE
00142       PARAMETER          ( RZERO = 0.0e0, RONE = 1.0e0 )
00143 *     ..
00144 *     .. Local Scalars ..
00145       COMPLEX            ALPHA, BETA, CDUM, REFSUM
00146       REAL               H11, H12, H21, H22, SAFMAX, SAFMIN, SCL,
00147      $                   SMLNUM, TST1, TST2, ULP
00148       INTEGER            I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
00149      $                   JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
00150      $                   M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
00151      $                   NS, NU
00152       LOGICAL            ACCUM, BLK22, BMP22
00153 *     ..
00154 *     .. External Functions ..
00155       REAL               SLAMCH
00156       EXTERNAL           SLAMCH
00157 *     ..
00158 *     .. Intrinsic Functions ..
00159 *
00160       INTRINSIC          ABS, AIMAG, CONJG, MAX, MIN, MOD, REAL
00161 *     ..
00162 *     .. Local Arrays ..
00163       COMPLEX            VT( 3 )
00164 *     ..
00165 *     .. External Subroutines ..
00166       EXTERNAL           CGEMM, CLACPY, CLAQR1, CLARFG, CLASET, CTRMM,
00167      $                   SLABAD
00168 *     ..
00169 *     .. Statement Functions ..
00170       REAL               CABS1
00171 *     ..
00172 *     .. Statement Function definitions ..
00173       CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
00174 *     ..
00175 *     .. Executable Statements ..
00176 *
00177 *     ==== If there are no shifts, then there is nothing to do. ====
00178 *
00179       IF( NSHFTS.LT.2 )
00180      $   RETURN
00181 *
00182 *     ==== If the active block is empty or 1-by-1, then there
00183 *     .    is nothing to do. ====
00184 *
00185       IF( KTOP.GE.KBOT )
00186      $   RETURN
00187 *
00188 *     ==== NSHFTS is supposed to be even, but if it is odd,
00189 *     .    then simply reduce it by one.  ====
00190 *
00191       NS = NSHFTS - MOD( NSHFTS, 2 )
00192 *
00193 *     ==== Machine constants for deflation ====
00194 *
00195       SAFMIN = SLAMCH( 'SAFE MINIMUM' )
00196       SAFMAX = RONE / SAFMIN
00197       CALL SLABAD( SAFMIN, SAFMAX )
00198       ULP = SLAMCH( 'PRECISION' )
00199       SMLNUM = SAFMIN*( REAL( N ) / ULP )
00200 *
00201 *     ==== Use accumulated reflections to update far-from-diagonal
00202 *     .    entries ? ====
00203 *
00204       ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
00205 *
00206 *     ==== If so, exploit the 2-by-2 block structure? ====
00207 *
00208       BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
00209 *
00210 *     ==== clear trash ====
00211 *
00212       IF( KTOP+2.LE.KBOT )
00213      $   H( KTOP+2, KTOP ) = ZERO
00214 *
00215 *     ==== NBMPS = number of 2-shift bulges in the chain ====
00216 *
00217       NBMPS = NS / 2
00218 *
00219 *     ==== KDU = width of slab ====
00220 *
00221       KDU = 6*NBMPS - 3
00222 *
00223 *     ==== Create and chase chains of NBMPS bulges ====
00224 *
00225       DO 210 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
00226          NDCOL = INCOL + KDU
00227          IF( ACCUM )
00228      $      CALL CLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
00229 *
00230 *        ==== Near-the-diagonal bulge chase.  The following loop
00231 *        .    performs the near-the-diagonal part of a small bulge
00232 *        .    multi-shift QR sweep.  Each 6*NBMPS-2 column diagonal
00233 *        .    chunk extends from column INCOL to column NDCOL
00234 *        .    (including both column INCOL and column NDCOL). The
00235 *        .    following loop chases a 3*NBMPS column long chain of
00236 *        .    NBMPS bulges 3*NBMPS-2 columns to the right.  (INCOL
00237 *        .    may be less than KTOP and and NDCOL may be greater than
00238 *        .    KBOT indicating phantom columns from which to chase
00239 *        .    bulges before they are actually introduced or to which
00240 *        .    to chase bulges beyond column KBOT.)  ====
00241 *
00242          DO 140 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
00243 *
00244 *           ==== Bulges number MTOP to MBOT are active double implicit
00245 *           .    shift bulges.  There may or may not also be small
00246 *           .    2-by-2 bulge, if there is room.  The inactive bulges
00247 *           .    (if any) must wait until the active bulges have moved
00248 *           .    down the diagonal to make room.  The phantom matrix
00249 *           .    paradigm described above helps keep track.  ====
00250 *
00251             MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
00252             MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
00253             M22 = MBOT + 1
00254             BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
00255      $              ( KBOT-2 )
00256 *
00257 *           ==== Generate reflections to chase the chain right
00258 *           .    one column.  (The minimum value of K is KTOP-1.) ====
00259 *
00260             DO 10 M = MTOP, MBOT
00261                K = KRCOL + 3*( M-1 )
00262                IF( K.EQ.KTOP-1 ) THEN
00263                   CALL CLAQR1( 3, H( KTOP, KTOP ), LDH, S( 2*M-1 ),
00264      $                         S( 2*M ), V( 1, M ) )
00265                   ALPHA = V( 1, M )
00266                   CALL CLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
00267                ELSE
00268                   BETA = H( K+1, K )
00269                   V( 2, M ) = H( K+2, K )
00270                   V( 3, M ) = H( K+3, K )
00271                   CALL CLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
00272 *
00273 *                 ==== A Bulge may collapse because of vigilant
00274 *                 .    deflation or destructive underflow.  In the
00275 *                 .    underflow case, try the two-small-subdiagonals
00276 *                 .    trick to try to reinflate the bulge.  ====
00277 *
00278                   IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
00279      $                ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
00280 *
00281 *                    ==== Typical case: not collapsed (yet). ====
00282 *
00283                      H( K+1, K ) = BETA
00284                      H( K+2, K ) = ZERO
00285                      H( K+3, K ) = ZERO
00286                   ELSE
00287 *
00288 *                    ==== Atypical case: collapsed.  Attempt to
00289 *                    .    reintroduce ignoring H(K+1,K) and H(K+2,K).
00290 *                    .    If the fill resulting from the new
00291 *                    .    reflector is too large, then abandon it.
00292 *                    .    Otherwise, use the new one. ====
00293 *
00294                      CALL CLAQR1( 3, H( K+1, K+1 ), LDH, S( 2*M-1 ),
00295      $                            S( 2*M ), VT )
00296                      ALPHA = VT( 1 )
00297                      CALL CLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
00298                      REFSUM = CONJG( VT( 1 ) )*
00299      $                        ( H( K+1, K )+CONJG( VT( 2 ) )*
00300      $                        H( K+2, K ) )
00301 *
00302                      IF( CABS1( H( K+2, K )-REFSUM*VT( 2 ) )+
00303      $                   CABS1( REFSUM*VT( 3 ) ).GT.ULP*
00304      $                   ( CABS1( H( K, K ) )+CABS1( H( K+1,
00305      $                   K+1 ) )+CABS1( H( K+2, K+2 ) ) ) ) THEN
00306 *
00307 *                       ==== Starting a new bulge here would
00308 *                       .    create non-negligible fill.  Use
00309 *                       .    the old one with trepidation. ====
00310 *
00311                         H( K+1, K ) = BETA
00312                         H( K+2, K ) = ZERO
00313                         H( K+3, K ) = ZERO
00314                      ELSE
00315 *
00316 *                       ==== Stating a new bulge here would
00317 *                       .    create only negligible fill.
00318 *                       .    Replace the old reflector with
00319 *                       .    the new one. ====
00320 *
00321                         H( K+1, K ) = H( K+1, K ) - REFSUM
00322                         H( K+2, K ) = ZERO
00323                         H( K+3, K ) = ZERO
00324                         V( 1, M ) = VT( 1 )
00325                         V( 2, M ) = VT( 2 )
00326                         V( 3, M ) = VT( 3 )
00327                      END IF
00328                   END IF
00329                END IF
00330    10       CONTINUE
00331 *
00332 *           ==== Generate a 2-by-2 reflection, if needed. ====
00333 *
00334             K = KRCOL + 3*( M22-1 )
00335             IF( BMP22 ) THEN
00336                IF( K.EQ.KTOP-1 ) THEN
00337                   CALL CLAQR1( 2, H( K+1, K+1 ), LDH, S( 2*M22-1 ),
00338      $                         S( 2*M22 ), V( 1, M22 ) )
00339                   BETA = V( 1, M22 )
00340                   CALL CLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
00341                ELSE
00342                   BETA = H( K+1, K )
00343                   V( 2, M22 ) = H( K+2, K )
00344                   CALL CLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
00345                   H( K+1, K ) = BETA
00346                   H( K+2, K ) = ZERO
00347                END IF
00348             END IF
00349 *
00350 *           ==== Multiply H by reflections from the left ====
00351 *
00352             IF( ACCUM ) THEN
00353                JBOT = MIN( NDCOL, KBOT )
00354             ELSE IF( WANTT ) THEN
00355                JBOT = N
00356             ELSE
00357                JBOT = KBOT
00358             END IF
00359             DO 30 J = MAX( KTOP, KRCOL ), JBOT
00360                MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
00361                DO 20 M = MTOP, MEND
00362                   K = KRCOL + 3*( M-1 )
00363                   REFSUM = CONJG( V( 1, M ) )*
00364      $                     ( H( K+1, J )+CONJG( V( 2, M ) )*H( K+2, J )+
00365      $                     CONJG( V( 3, M ) )*H( K+3, J ) )
00366                   H( K+1, J ) = H( K+1, J ) - REFSUM
00367                   H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
00368                   H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
00369    20          CONTINUE
00370    30       CONTINUE
00371             IF( BMP22 ) THEN
00372                K = KRCOL + 3*( M22-1 )
00373                DO 40 J = MAX( K+1, KTOP ), JBOT
00374                   REFSUM = CONJG( V( 1, M22 ) )*
00375      $                     ( H( K+1, J )+CONJG( V( 2, M22 ) )*
00376      $                     H( K+2, J ) )
00377                   H( K+1, J ) = H( K+1, J ) - REFSUM
00378                   H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
00379    40          CONTINUE
00380             END IF
00381 *
00382 *           ==== Multiply H by reflections from the right.
00383 *           .    Delay filling in the last row until the
00384 *           .    vigilant deflation check is complete. ====
00385 *
00386             IF( ACCUM ) THEN
00387                JTOP = MAX( KTOP, INCOL )
00388             ELSE IF( WANTT ) THEN
00389                JTOP = 1
00390             ELSE
00391                JTOP = KTOP
00392             END IF
00393             DO 80 M = MTOP, MBOT
00394                IF( V( 1, M ).NE.ZERO ) THEN
00395                   K = KRCOL + 3*( M-1 )
00396                   DO 50 J = JTOP, MIN( KBOT, K+3 )
00397                      REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
00398      $                        H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
00399                      H( J, K+1 ) = H( J, K+1 ) - REFSUM
00400                      H( J, K+2 ) = H( J, K+2 ) -
00401      $                             REFSUM*CONJG( V( 2, M ) )
00402                      H( J, K+3 ) = H( J, K+3 ) -
00403      $                             REFSUM*CONJG( V( 3, M ) )
00404    50             CONTINUE
00405 *
00406                   IF( ACCUM ) THEN
00407 *
00408 *                    ==== Accumulate U. (If necessary, update Z later
00409 *                    .    with with an efficient matrix-matrix
00410 *                    .    multiply.) ====
00411 *
00412                      KMS = K - INCOL
00413                      DO 60 J = MAX( 1, KTOP-INCOL ), KDU
00414                         REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
00415      $                           U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
00416                         U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
00417                         U( J, KMS+2 ) = U( J, KMS+2 ) -
00418      $                                  REFSUM*CONJG( V( 2, M ) )
00419                         U( J, KMS+3 ) = U( J, KMS+3 ) -
00420      $                                  REFSUM*CONJG( V( 3, M ) )
00421    60                CONTINUE
00422                   ELSE IF( WANTZ ) THEN
00423 *
00424 *                    ==== U is not accumulated, so update Z
00425 *                    .    now by multiplying by reflections
00426 *                    .    from the right. ====
00427 *
00428                      DO 70 J = ILOZ, IHIZ
00429                         REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
00430      $                           Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
00431                         Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
00432                         Z( J, K+2 ) = Z( J, K+2 ) -
00433      $                                REFSUM*CONJG( V( 2, M ) )
00434                         Z( J, K+3 ) = Z( J, K+3 ) -
00435      $                                REFSUM*CONJG( V( 3, M ) )
00436    70                CONTINUE
00437                   END IF
00438                END IF
00439    80       CONTINUE
00440 *
00441 *           ==== Special case: 2-by-2 reflection (if needed) ====
00442 *
00443             K = KRCOL + 3*( M22-1 )
00444             IF( BMP22 ) THEN
00445                IF ( V( 1, M22 ).NE.ZERO ) THEN
00446                   DO 90 J = JTOP, MIN( KBOT, K+3 )
00447                      REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
00448      $                        H( J, K+2 ) )
00449                      H( J, K+1 ) = H( J, K+1 ) - REFSUM
00450                      H( J, K+2 ) = H( J, K+2 ) -
00451      $                             REFSUM*CONJG( V( 2, M22 ) )
00452    90             CONTINUE
00453 *
00454                   IF( ACCUM ) THEN
00455                      KMS = K - INCOL
00456                      DO 100 J = MAX( 1, KTOP-INCOL ), KDU
00457                         REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
00458      $                           V( 2, M22 )*U( J, KMS+2 ) )
00459                         U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
00460                         U( J, KMS+2 ) = U( J, KMS+2 ) -
00461      $                                  REFSUM*CONJG( V( 2, M22 ) )
00462   100                CONTINUE
00463                   ELSE IF( WANTZ ) THEN
00464                      DO 110 J = ILOZ, IHIZ
00465                         REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
00466      $                           Z( J, K+2 ) )
00467                         Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
00468                         Z( J, K+2 ) = Z( J, K+2 ) -
00469      $                                REFSUM*CONJG( V( 2, M22 ) )
00470   110                CONTINUE
00471                   END IF
00472                END IF
00473             END IF
00474 *
00475 *           ==== Vigilant deflation check ====
00476 *
00477             MSTART = MTOP
00478             IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
00479      $         MSTART = MSTART + 1
00480             MEND = MBOT
00481             IF( BMP22 )
00482      $         MEND = MEND + 1
00483             IF( KRCOL.EQ.KBOT-2 )
00484      $         MEND = MEND + 1
00485             DO 120 M = MSTART, MEND
00486                K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
00487 *
00488 *              ==== The following convergence test requires that
00489 *              .    the tradition small-compared-to-nearby-diagonals
00490 *              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
00491 *              .    criteria both be satisfied.  The latter improves
00492 *              .    accuracy in some examples. Falling back on an
00493 *              .    alternate convergence criterion when TST1 or TST2
00494 *              .    is zero (as done here) is traditional but probably
00495 *              .    unnecessary. ====
00496 *
00497                IF( H( K+1, K ).NE.ZERO ) THEN
00498                   TST1 = CABS1( H( K, K ) ) + CABS1( H( K+1, K+1 ) )
00499                   IF( TST1.EQ.RZERO ) THEN
00500                      IF( K.GE.KTOP+1 )
00501      $                  TST1 = TST1 + CABS1( H( K, K-1 ) )
00502                      IF( K.GE.KTOP+2 )
00503      $                  TST1 = TST1 + CABS1( H( K, K-2 ) )
00504                      IF( K.GE.KTOP+3 )
00505      $                  TST1 = TST1 + CABS1( H( K, K-3 ) )
00506                      IF( K.LE.KBOT-2 )
00507      $                  TST1 = TST1 + CABS1( H( K+2, K+1 ) )
00508                      IF( K.LE.KBOT-3 )
00509      $                  TST1 = TST1 + CABS1( H( K+3, K+1 ) )
00510                      IF( K.LE.KBOT-4 )
00511      $                  TST1 = TST1 + CABS1( H( K+4, K+1 ) )
00512                   END IF
00513                   IF( CABS1( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
00514      $                 THEN
00515                      H12 = MAX( CABS1( H( K+1, K ) ),
00516      $                     CABS1( H( K, K+1 ) ) )
00517                      H21 = MIN( CABS1( H( K+1, K ) ),
00518      $                     CABS1( H( K, K+1 ) ) )
00519                      H11 = MAX( CABS1( H( K+1, K+1 ) ),
00520      $                     CABS1( H( K, K )-H( K+1, K+1 ) ) )
00521                      H22 = MIN( CABS1( H( K+1, K+1 ) ),
00522      $                     CABS1( H( K, K )-H( K+1, K+1 ) ) )
00523                      SCL = H11 + H12
00524                      TST2 = H22*( H11 / SCL )
00525 *
00526                      IF( TST2.EQ.RZERO .OR. H21*( H12 / SCL ).LE.
00527      $                   MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
00528                   END IF
00529                END IF
00530   120       CONTINUE
00531 *
00532 *           ==== Fill in the last row of each bulge. ====
00533 *
00534             MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
00535             DO 130 M = MTOP, MEND
00536                K = KRCOL + 3*( M-1 )
00537                REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
00538                H( K+4, K+1 ) = -REFSUM
00539                H( K+4, K+2 ) = -REFSUM*CONJG( V( 2, M ) )
00540                H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*CONJG( V( 3, M ) )
00541   130       CONTINUE
00542 *
00543 *           ==== End of near-the-diagonal bulge chase. ====
00544 *
00545   140    CONTINUE
00546 *
00547 *        ==== Use U (if accumulated) to update far-from-diagonal
00548 *        .    entries in H.  If required, use U to update Z as
00549 *        .    well. ====
00550 *
00551          IF( ACCUM ) THEN
00552             IF( WANTT ) THEN
00553                JTOP = 1
00554                JBOT = N
00555             ELSE
00556                JTOP = KTOP
00557                JBOT = KBOT
00558             END IF
00559             IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
00560      $          ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
00561 *
00562 *              ==== Updates not exploiting the 2-by-2 block
00563 *              .    structure of U.  K1 and NU keep track of
00564 *              .    the location and size of U in the special
00565 *              .    cases of introducing bulges and chasing
00566 *              .    bulges off the bottom.  In these special
00567 *              .    cases and in case the number of shifts
00568 *              .    is NS = 2, there is no 2-by-2 block
00569 *              .    structure to exploit.  ====
00570 *
00571                K1 = MAX( 1, KTOP-INCOL )
00572                NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
00573 *
00574 *              ==== Horizontal Multiply ====
00575 *
00576                DO 150 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
00577                   JLEN = MIN( NH, JBOT-JCOL+1 )
00578                   CALL CGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
00579      $                        LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
00580      $                        LDWH )
00581                   CALL CLACPY( 'ALL', NU, JLEN, WH, LDWH,
00582      $                         H( INCOL+K1, JCOL ), LDH )
00583   150          CONTINUE
00584 *
00585 *              ==== Vertical multiply ====
00586 *
00587                DO 160 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
00588                   JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
00589                   CALL CGEMM( 'N', 'N', JLEN, NU, NU, ONE,
00590      $                        H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
00591      $                        LDU, ZERO, WV, LDWV )
00592                   CALL CLACPY( 'ALL', JLEN, NU, WV, LDWV,
00593      $                         H( JROW, INCOL+K1 ), LDH )
00594   160          CONTINUE
00595 *
00596 *              ==== Z multiply (also vertical) ====
00597 *
00598                IF( WANTZ ) THEN
00599                   DO 170 JROW = ILOZ, IHIZ, NV
00600                      JLEN = MIN( NV, IHIZ-JROW+1 )
00601                      CALL CGEMM( 'N', 'N', JLEN, NU, NU, ONE,
00602      $                           Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
00603      $                           LDU, ZERO, WV, LDWV )
00604                      CALL CLACPY( 'ALL', JLEN, NU, WV, LDWV,
00605      $                            Z( JROW, INCOL+K1 ), LDZ )
00606   170             CONTINUE
00607                END IF
00608             ELSE
00609 *
00610 *              ==== Updates exploiting U's 2-by-2 block structure.
00611 *              .    (I2, I4, J2, J4 are the last rows and columns
00612 *              .    of the blocks.) ====
00613 *
00614                I2 = ( KDU+1 ) / 2
00615                I4 = KDU
00616                J2 = I4 - I2
00617                J4 = KDU
00618 *
00619 *              ==== KZS and KNZ deal with the band of zeros
00620 *              .    along the diagonal of one of the triangular
00621 *              .    blocks. ====
00622 *
00623                KZS = ( J4-J2 ) - ( NS+1 )
00624                KNZ = NS + 1
00625 *
00626 *              ==== Horizontal multiply ====
00627 *
00628                DO 180 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
00629                   JLEN = MIN( NH, JBOT-JCOL+1 )
00630 *
00631 *                 ==== Copy bottom of H to top+KZS of scratch ====
00632 *                  (The first KZS rows get multiplied by zero.) ====
00633 *
00634                   CALL CLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
00635      $                         LDH, WH( KZS+1, 1 ), LDWH )
00636 *
00637 *                 ==== Multiply by U21' ====
00638 *
00639                   CALL CLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
00640                   CALL CTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
00641      $                        U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
00642      $                        LDWH )
00643 *
00644 *                 ==== Multiply top of H by U11' ====
00645 *
00646                   CALL CGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
00647      $                        H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
00648 *
00649 *                 ==== Copy top of H to bottom of WH ====
00650 *
00651                   CALL CLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
00652      $                         WH( I2+1, 1 ), LDWH )
00653 *
00654 *                 ==== Multiply by U21' ====
00655 *
00656                   CALL CTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
00657      $                        U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
00658 *
00659 *                 ==== Multiply by U22 ====
00660 *
00661                   CALL CGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
00662      $                        U( J2+1, I2+1 ), LDU,
00663      $                        H( INCOL+1+J2, JCOL ), LDH, ONE,
00664      $                        WH( I2+1, 1 ), LDWH )
00665 *
00666 *                 ==== Copy it back ====
00667 *
00668                   CALL CLACPY( 'ALL', KDU, JLEN, WH, LDWH,
00669      $                         H( INCOL+1, JCOL ), LDH )
00670   180          CONTINUE
00671 *
00672 *              ==== Vertical multiply ====
00673 *
00674                DO 190 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
00675                   JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
00676 *
00677 *                 ==== Copy right of H to scratch (the first KZS
00678 *                 .    columns get multiplied by zero) ====
00679 *
00680                   CALL CLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
00681      $                         LDH, WV( 1, 1+KZS ), LDWV )
00682 *
00683 *                 ==== Multiply by U21 ====
00684 *
00685                   CALL CLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
00686                   CALL CTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
00687      $                        U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
00688      $                        LDWV )
00689 *
00690 *                 ==== Multiply by U11 ====
00691 *
00692                   CALL CGEMM( 'N', 'N', JLEN, I2, J2, ONE,
00693      $                        H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
00694      $                        LDWV )
00695 *
00696 *                 ==== Copy left of H to right of scratch ====
00697 *
00698                   CALL CLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
00699      $                         WV( 1, 1+I2 ), LDWV )
00700 *
00701 *                 ==== Multiply by U21 ====
00702 *
00703                   CALL CTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
00704      $                        U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
00705 *
00706 *                 ==== Multiply by U22 ====
00707 *
00708                   CALL CGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
00709      $                        H( JROW, INCOL+1+J2 ), LDH,
00710      $                        U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
00711      $                        LDWV )
00712 *
00713 *                 ==== Copy it back ====
00714 *
00715                   CALL CLACPY( 'ALL', JLEN, KDU, WV, LDWV,
00716      $                         H( JROW, INCOL+1 ), LDH )
00717   190          CONTINUE
00718 *
00719 *              ==== Multiply Z (also vertical) ====
00720 *
00721                IF( WANTZ ) THEN
00722                   DO 200 JROW = ILOZ, IHIZ, NV
00723                      JLEN = MIN( NV, IHIZ-JROW+1 )
00724 *
00725 *                    ==== Copy right of Z to left of scratch (first
00726 *                    .     KZS columns get multiplied by zero) ====
00727 *
00728                      CALL CLACPY( 'ALL', JLEN, KNZ,
00729      $                            Z( JROW, INCOL+1+J2 ), LDZ,
00730      $                            WV( 1, 1+KZS ), LDWV )
00731 *
00732 *                    ==== Multiply by U12 ====
00733 *
00734                      CALL CLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
00735      $                            LDWV )
00736                      CALL CTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
00737      $                           U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
00738      $                           LDWV )
00739 *
00740 *                    ==== Multiply by U11 ====
00741 *
00742                      CALL CGEMM( 'N', 'N', JLEN, I2, J2, ONE,
00743      $                           Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
00744      $                           WV, LDWV )
00745 *
00746 *                    ==== Copy left of Z to right of scratch ====
00747 *
00748                      CALL CLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
00749      $                            LDZ, WV( 1, 1+I2 ), LDWV )
00750 *
00751 *                    ==== Multiply by U21 ====
00752 *
00753                      CALL CTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
00754      $                           U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
00755      $                           LDWV )
00756 *
00757 *                    ==== Multiply by U22 ====
00758 *
00759                      CALL CGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
00760      $                           Z( JROW, INCOL+1+J2 ), LDZ,
00761      $                           U( J2+1, I2+1 ), LDU, ONE,
00762      $                           WV( 1, 1+I2 ), LDWV )
00763 *
00764 *                    ==== Copy the result back to Z ====
00765 *
00766                      CALL CLACPY( 'ALL', JLEN, KDU, WV, LDWV,
00767      $                            Z( JROW, INCOL+1 ), LDZ )
00768   200             CONTINUE
00769                END IF
00770             END IF
00771          END IF
00772   210 CONTINUE
00773 *
00774 *     ==== End of CLAQR5 ====
00775 *
00776       END
 All Files Functions