d8/d97/slaqr5_8f_source.html

 *> \brief \b SLAQR5 performs a single small-bulge multi-shift QR sweep.

 *

 *  =========== DOCUMENTATION ===========

 *

 * Online html documentation available at

 *            http://www.netlib.org/lapack/explore-html/

 *

 *> \htmlonly

 *> Download SLAQR5 + dependencies

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slaqr5.f">

 *> [TGZ]</a>

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slaqr5.f">

 *> [ZIP]</a>

 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slaqr5.f">

 *> [TXT]</a>

 *> \endhtmlonly

 *

 *  Definition:

 *  ===========

 *

 *       SUBROUTINE SLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,

 *                          SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,

 *                          LDU, NV, WV, LDWV, NH, WH, LDWH )

 *

 *       .. Scalar Arguments ..

 *       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,

 *      $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV

 *       LOGICAL            WANTT, WANTZ

 *       ..

 *       .. Array Arguments ..

 *       REAL               H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),

 *      $                   V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),

 *      $                   Z( LDZ, * )

 *       ..

 *

 *

 *> \par Purpose:

 *  =============

 *>

 *> \verbatim

 *>

 *>    SLAQR5, called by SLAQR0, performs a

 *>    single small-bulge multi-shift QR sweep.

 *> \endverbatim

 *

 *  Arguments:

 *  ==========

 *

 *> \param[in] WANTT

 *> \verbatim

 *>          WANTT is logical scalar

 *>             WANTT = .true. if the quasi-triangular Schur factor

 *>             is being computed.  WANTT is set to .false. otherwise.

 *> \endverbatim

 *>

 *> \param[in] WANTZ

 *> \verbatim

 *>          WANTZ is logical scalar

 *>             WANTZ = .true. if the orthogonal Schur factor is being

 *>             computed.  WANTZ is set to .false. otherwise.

 *> \endverbatim

 *>

 *> \param[in] KACC22

 *> \verbatim

 *>          KACC22 is integer with value 0, 1, or 2.

 *>             Specifies the computation mode of far-from-diagonal

 *>             orthogonal updates.

 *>        = 0: SLAQR5 does not accumulate reflections and does not

 *>             use matrix-matrix multiply to update far-from-diagonal

 *>             matrix entries.

 *>        = 1: SLAQR5 accumulates reflections and uses matrix-matrix

 *>             multiply to update the far-from-diagonal matrix entries.

 *>        = 2: SLAQR5 accumulates reflections, uses matrix-matrix

 *>             multiply to update the far-from-diagonal matrix entries,

 *>             and takes advantage of 2-by-2 block structure during

 *>             matrix multiplies.

 *> \endverbatim

 *>

 *> \param[in] N

 *> \verbatim

 *>          N is integer scalar

 *>             N is the order of the Hessenberg matrix H upon which this

 *>             subroutine operates.

 *> \endverbatim

 *>

 *> \param[in] KTOP

 *> \verbatim

 *>          KTOP is integer scalar

 *> \endverbatim

 *>

 *> \param[in] KBOT

 *> \verbatim

 *>          KBOT is integer scalar

 *>             These are the first and last rows and columns of an

 *>             isolated diagonal block upon which the QR sweep is to be

 *>             applied. It is assumed without a check that

 *>                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0

 *>             and

 *>                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.

 *> \endverbatim

 *>

 *> \param[in] NSHFTS

 *> \verbatim

 *>          NSHFTS is integer scalar

 *>             NSHFTS gives the number of simultaneous shifts.  NSHFTS

 *>             must be positive and even.

 *> \endverbatim

 *>

 *> \param[in,out] SR

 *> \verbatim

 *>          SR is REAL array of size (NSHFTS)

 *> \endverbatim

 *>

 *> \param[in,out] SI

 *> \verbatim

 *>          SI is REAL array of size (NSHFTS)

 *>             SR contains the real parts and SI contains the imaginary

 *>             parts of the NSHFTS shifts of origin that define the

 *>             multi-shift QR sweep.  On output SR and SI may be

 *>             reordered.

 *> \endverbatim

 *>

 *> \param[in,out] H

 *> \verbatim

 *>          H is REAL array of size (LDH,N)

 *>             On input H contains a Hessenberg matrix.  On output a

 *>             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied

 *>             to the isolated diagonal block in rows and columns KTOP

 *>             through KBOT.

 *> \endverbatim

 *>

 *> \param[in] LDH

 *> \verbatim

 *>          LDH is integer scalar

 *>             LDH is the leading dimension of H just as declared in the

 *>             calling procedure.  LDH.GE.MAX(1,N).

 *> \endverbatim

 *>

 *> \param[in] ILOZ

 *> \verbatim

 *>          ILOZ is INTEGER

 *> \endverbatim

 *>

 *> \param[in] IHIZ

 *> \verbatim

 *>          IHIZ is INTEGER

 *>             Specify the rows of Z to which transformations must be

 *>             applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N

 *> \endverbatim

 *>

 *> \param[in,out] Z

 *> \verbatim

 *>          Z is REAL array of size (LDZ,IHIZ)

 *>             If WANTZ = .TRUE., then the QR Sweep orthogonal

 *>             similarity transformation is accumulated into

 *>             Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.

 *>             If WANTZ = .FALSE., then Z is unreferenced.

 *> \endverbatim

 *>

 *> \param[in] LDZ

 *> \verbatim

 *>          LDZ is integer scalar

 *>             LDA is the leading dimension of Z just as declared in

 *>             the calling procedure. LDZ.GE.N.

 *> \endverbatim

 *>

 *> \param[out] V

 *> \verbatim

 *>          V is REAL array of size (LDV,NSHFTS/2)

 *> \endverbatim

 *>

 *> \param[in] LDV

 *> \verbatim

 *>          LDV is integer scalar

 *>             LDV is the leading dimension of V as declared in the

 *>             calling procedure.  LDV.GE.3.

 *> \endverbatim

 *>

 *> \param[out] U

 *> \verbatim

 *>          U is REAL array of size

 *>             (LDU,3*NSHFTS-3)

 *> \endverbatim

 *>

 *> \param[in] LDU

 *> \verbatim

 *>          LDU is integer scalar

 *>             LDU is the leading dimension of U just as declared in the

 *>             in the calling subroutine.  LDU.GE.3*NSHFTS-3.

 *> \endverbatim

 *>

 *> \param[in] NH

 *> \verbatim

 *>          NH is integer scalar

 *>             NH is the number of columns in array WH available for

 *>             workspace. NH.GE.1.

 *> \endverbatim

 *>

 *> \param[out] WH

 *> \verbatim

 *>          WH is REAL array of size (LDWH,NH)

 *> \endverbatim

 *>

 *> \param[in] LDWH

 *> \verbatim

 *>          LDWH is integer scalar

 *>             Leading dimension of WH just as declared in the

 *>             calling procedure.  LDWH.GE.3*NSHFTS-3.

 *> \endverbatim

 *>

 *> \param[in] NV

 *> \verbatim

 *>          NV is integer scalar

 *>             NV is the number of rows in WV agailable for workspace.

 *>             NV.GE.1.

 *> \endverbatim

 *>

 *> \param[out] WV

 *> \verbatim

 *>          WV is REAL array of size

 *>             (LDWV,3*NSHFTS-3)

 *> \endverbatim

 *>

 *> \param[in] LDWV

 *> \verbatim

 *>          LDWV is integer scalar

 *>             LDWV is the leading dimension of WV as declared in the

 *>             in the calling subroutine.  LDWV.GE.NV.

 *> \endverbatim

 *

 *  Authors:

 *  ========

 *

 *> \author Univ. of Tennessee

 *> \author Univ. of California Berkeley

 *> \author Univ. of Colorado Denver

 *> \author NAG Ltd.

 *

 *> \date June 2016

 *

 *> \ingroup realOTHERauxiliary

 *

 *> \par Contributors:

 *  ==================

 *>

 *>       Karen Braman and Ralph Byers, Department of Mathematics,

 *>       University of Kansas, USA

 *

 *> \par References:

 *  ================

 *>

 *>       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR

 *>       Algorithm Part I: Maintaining Well Focused Shifts, and Level 3

 *>       Performance, SIAM Journal of Matrix Analysis, volume 23, pages

 *>       929--947, 2002.

 *>

 *  =====================================================================

       SUBROUTINE slaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,

      $                   sr, si, h, ldh, iloz, ihiz, z, ldz, v, ldv, u,

      $                   ldu, nv, wv, ldwv, nh, wh, ldwh )

 *

 *  -- LAPACK auxiliary routine (version 3.6.1) --

 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

 *     June 2016

 *

 *     .. Scalar Arguments ..

       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,

      $                   ldwh, ldwv, ldz, n, nh, nshfts, nv

       LOGICAL            WANTT, WANTZ

 *     ..

 *     .. Array Arguments ..

       REAL               H( ldh, * ), SI( * ), SR( * ), U( ldu, * ),

      $                   v( ldv, * ), wh( ldwh, * ), wv( ldwv, * ),

      $                   z( ldz, * )

 *     ..

 *

 *  ================================================================

 *     .. Parameters ..

       REAL               ZERO, ONE

       parameter                ( zero = 0.0e0, one = 1.0e0 )

 *     ..

 *     .. Local Scalars ..

       REAL               ALPHA, BETA, H11, H12, H21, H22, REFSUM,

      $                   safmax, safmin, scl, smlnum, swap, tst1, tst2,

      $                   ulp

       INTEGER            I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,

      $                   jrow, jtop, k, k1, kdu, kms, knz, krcol, kzs,

      $                   m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,

      $                   ns, nu

       LOGICAL            ACCUM, BLK22, BMP22

 *     ..

 *     .. External Functions ..

       REAL               SLAMCH

       EXTERNAL           slamch

 *     ..

 *     .. Intrinsic Functions ..

 *

       INTRINSIC          abs, max, min, mod, real

 *     ..

 *     .. Local Arrays ..

       REAL               VT( 3 )

 *     ..

 *     .. External Subroutines ..

       EXTERNAL           sgemm, slabad, slacpy, slaqr1, slarfg, slaset,

      $                   strmm

 *     ..

 *     .. Executable Statements ..

 *

 *     ==== If there are no shifts, then there is nothing to do. ====

 *

       IF( nshfts.LT.2 )

      $   RETURN

 *

 *     ==== If the active block is empty or 1-by-1, then there

 *     .    is nothing to do. ====

 *

       IF( ktop.GE.kbot )

      $   RETURN

 *

 *     ==== Shuffle shifts into pairs of real shifts and pairs

 *     .    of complex conjugate shifts assuming complex

 *     .    conjugate shifts are already adjacent to one

 *     .    another. ====

 *

       DO 10 i = 1, nshfts - 2, 2

          IF( si( i ).NE.-si( i+1 ) ) THEN

 *

             swap = sr( i )

             sr( i ) = sr( i+1 )

             sr( i+1 ) = sr( i+2 )

             sr( i+2 ) = swap

 *

             swap = si( i )

             si( i ) = si( i+1 )

             si( i+1 ) = si( i+2 )

             si( i+2 ) = swap

          END IF

    10 CONTINUE

 *

 *     ==== NSHFTS is supposed to be even, but if it is odd,

 *     .    then simply reduce it by one.  The shuffle above

 *     .    ensures that the dropped shift is real and that

 *     .    the remaining shifts are paired. ====

 *

       ns = nshfts - mod( nshfts, 2 )

 *

 *     ==== Machine constants for deflation ====

 *

       safmin = slamch( 'SAFE MINIMUM' )

       safmax = one / safmin

       CALL slabad( safmin, safmax )

       ulp = slamch( 'PRECISION' )

       smlnum = safmin*( REAL( N ) / ULP )

 *

 *     ==== Use accumulated reflections to update far-from-diagonal

 *     .    entries ? ====

 *

       accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )

 *

 *     ==== If so, exploit the 2-by-2 block structure? ====

 *

       blk22 = ( ns.GT.2 ) .AND. ( kacc22.EQ.2 )

 *

 *     ==== clear trash ====

 *

       IF( ktop+2.LE.kbot )

      $   h( ktop+2, ktop ) = zero

 *

 *     ==== NBMPS = number of 2-shift bulges in the chain ====

 *

       nbmps = ns / 2

 *

 *     ==== KDU = width of slab ====

 *

       kdu = 6*nbmps - 3

 *

 *     ==== Create and chase chains of NBMPS bulges ====

 *

       DO 220 incol = 3*( 1-nbmps ) + ktop - 1, kbot - 2, 3*nbmps - 2

          ndcol = incol + kdu

          IF( accum )

      $      CALL slaset( 'ALL', kdu, kdu, zero, one, u, ldu )

 *

 *        ==== Near-the-diagonal bulge chase.  The following loop

 *        .    performs the near-the-diagonal part of a small bulge

 *        .    multi-shift QR sweep.  Each 6*NBMPS-2 column diagonal

 *        .    chunk extends from column INCOL to column NDCOL

 *        .    (including both column INCOL and column NDCOL). The

 *        .    following loop chases a 3*NBMPS column long chain of

 *        .    NBMPS bulges 3*NBMPS-2 columns to the right.  (INCOL

 *        .    may be less than KTOP and and NDCOL may be greater than

 *        .    KBOT indicating phantom columns from which to chase

 *        .    bulges before they are actually introduced or to which

 *        .    to chase bulges beyond column KBOT.)  ====

 *

          DO 150 krcol = incol, min( incol+3*nbmps-3, kbot-2 )

 *

 *           ==== Bulges number MTOP to MBOT are active double implicit

 *           .    shift bulges.  There may or may not also be small

 *           .    2-by-2 bulge, if there is room.  The inactive bulges

 *           .    (if any) must wait until the active bulges have moved

 *           .    down the diagonal to make room.  The phantom matrix

 *           .    paradigm described above helps keep track.  ====

 *

             mtop = max( 1, ( ( ktop-1 )-krcol+2 ) / 3+1 )

             mbot = min( nbmps, ( kbot-krcol ) / 3 )

             m22 = mbot + 1

             bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+3*( m22-1 ) ).EQ.

      $              ( kbot-2 )

 *

 *           ==== Generate reflections to chase the chain right

 *           .    one column.  (The minimum value of K is KTOP-1.) ====

 *

             DO 20 m = mtop, mbot

                k = krcol + 3*( m-1 )

                IF( k.EQ.ktop-1 ) THEN

                   CALL slaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),

      $                         si( 2*m-1 ), sr( 2*m ), si( 2*m ),

      $                         v( 1, m ) )

                   alpha = v( 1, m )

                   CALL slarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )

                ELSE

                   beta = h( k+1, k )

                   v( 2, m ) = h( k+2, k )

                   v( 3, m ) = h( k+3, k )

                   CALL slarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )

 *

 *                 ==== A Bulge may collapse because of vigilant

 *                 .    deflation or destructive underflow.  In the

 *                 .    underflow case, try the two-small-subdiagonals

 *                 .    trick to try to reinflate the bulge.  ====

 *

                   IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.

      $                zero .OR. h( k+3, k+2 ).EQ.zero ) THEN

 *

 *                    ==== Typical case: not collapsed (yet). ====

 *

                      h( k+1, k ) = beta

                      h( k+2, k ) = zero

                      h( k+3, k ) = zero

                   ELSE

 *

 *                    ==== Atypical case: collapsed.  Attempt to

 *                    .    reintroduce ignoring H(K+1,K) and H(K+2,K).

 *                    .    If the fill resulting from the new

 *                    .    reflector is too large, then abandon it.

 *                    .    Otherwise, use the new one. ====

 *

                      CALL slaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),

      $                            si( 2*m-1 ), sr( 2*m ), si( 2*m ),

      $                            vt )

                      alpha = vt( 1 )

                      CALL slarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )

                      refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*

      $                        h( k+2, k ) )

 *

                      IF( abs( h( k+2, k )-refsum*vt( 2 ) )+

      $                   abs( refsum*vt( 3 ) ).GT.ulp*

      $                   ( abs( h( k, k ) )+abs( h( k+1,

      $                   k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN

 *

 *                       ==== Starting a new bulge here would

 *                       .    create non-negligible fill.  Use

 *                       .    the old one with trepidation. ====

 *

                         h( k+1, k ) = beta

                         h( k+2, k ) = zero

                         h( k+3, k ) = zero

                      ELSE

 *

 *                       ==== Stating a new bulge here would

 *                       .    create only negligible fill.

 *                       .    Replace the old reflector with

 *                       .    the new one. ====

 *

                         h( k+1, k ) = h( k+1, k ) - refsum

                         h( k+2, k ) = zero

                         h( k+3, k ) = zero

                         v( 1, m ) = vt( 1 )

                         v( 2, m ) = vt( 2 )

                         v( 3, m ) = vt( 3 )

                      END IF

                   END IF

                END IF

    20       CONTINUE

 *

 *           ==== Generate a 2-by-2 reflection, if needed. ====

 *

             k = krcol + 3*( m22-1 )

             IF( bmp22 ) THEN

                IF( k.EQ.ktop-1 ) THEN

                   CALL slaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),

      $                         si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),

      $                         v( 1, m22 ) )

                   beta = v( 1, m22 )

                   CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )

                ELSE

                   beta = h( k+1, k )

                   v( 2, m22 ) = h( k+2, k )

                   CALL slarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )

                   h( k+1, k ) = beta

                   h( k+2, k ) = zero

                END IF

             END IF

 *

 *           ==== Multiply H by reflections from the left ====

 *

             IF( accum ) THEN

                jbot = min( ndcol, kbot )

             ELSE IF( wantt ) THEN

                jbot = n

             ELSE

                jbot = kbot

             END IF

             DO 40 j = max( ktop, krcol ), jbot

                mend = min( mbot, ( j-krcol+2 ) / 3 )

                DO 30 m = mtop, mend

                   k = krcol + 3*( m-1 )

                   refsum = v( 1, m )*( h( k+1, j )+v( 2, m )*

      $                     h( k+2, j )+v( 3, m )*h( k+3, j ) )

                   h( k+1, j ) = h( k+1, j ) - refsum

                   h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m )

                   h( k+3, j ) = h( k+3, j ) - refsum*v( 3, m )

    30          CONTINUE

    40       CONTINUE

             IF( bmp22 ) THEN

                k = krcol + 3*( m22-1 )

                DO 50 j = max( k+1, ktop ), jbot

                   refsum = v( 1, m22 )*( h( k+1, j )+v( 2, m22 )*

      $                     h( k+2, j ) )

                   h( k+1, j ) = h( k+1, j ) - refsum

                   h( k+2, j ) = h( k+2, j ) - refsum*v( 2, m22 )

    50          CONTINUE

             END IF

 *

 *           ==== Multiply H by reflections from the right.

 *           .    Delay filling in the last row until the

 *           .    vigilant deflation check is complete. ====

 *

             IF( accum ) THEN

                jtop = max( ktop, incol )

             ELSE IF( wantt ) THEN

                jtop = 1

             ELSE

                jtop = ktop

             END IF

             DO 90 m = mtop, mbot

                IF( v( 1, m ).NE.zero ) THEN

                   k = krcol + 3*( m-1 )

                   DO 60 j = jtop, min( kbot, k+3 )

                      refsum = v( 1, m )*( h( j, k+1 )+v( 2, m )*

      $                        h( j, k+2 )+v( 3, m )*h( j, k+3 ) )

                      h( j, k+1 ) = h( j, k+1 ) - refsum

                      h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m )

                      h( j, k+3 ) = h( j, k+3 ) - refsum*v( 3, m )

    60             CONTINUE

 *

                   IF( accum ) THEN

 *

 *                    ==== Accumulate U. (If necessary, update Z later

 *                    .    with with an efficient matrix-matrix

 *                    .    multiply.) ====

 *

                      kms = k - incol

                      DO 70 j = max( 1, ktop-incol ), kdu

                         refsum = v( 1, m )*( u( j, kms+1 )+v( 2, m )*

      $                           u( j, kms+2 )+v( 3, m )*u( j, kms+3 ) )

                         u( j, kms+1 ) = u( j, kms+1 ) - refsum

                         u( j, kms+2 ) = u( j, kms+2 ) - refsum*v( 2, m )

                         u( j, kms+3 ) = u( j, kms+3 ) - refsum*v( 3, m )

    70                CONTINUE

                   ELSE IF( wantz ) THEN

 *

 *                    ==== U is not accumulated, so update Z

 *                    .    now by multiplying by reflections

 *                    .    from the right. ====

 *

                      DO 80 j = iloz, ihiz

                         refsum = v( 1, m )*( z( j, k+1 )+v( 2, m )*

      $                           z( j, k+2 )+v( 3, m )*z( j, k+3 ) )

                         z( j, k+1 ) = z( j, k+1 ) - refsum

                         z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m )

                         z( j, k+3 ) = z( j, k+3 ) - refsum*v( 3, m )

    80                CONTINUE

                   END IF

                END IF

    90       CONTINUE

 *

 *           ==== Special case: 2-by-2 reflection (if needed) ====

 *

             k = krcol + 3*( m22-1 )

             IF( bmp22 ) THEN

                IF ( v( 1, m22 ).NE.zero ) THEN

                   DO 100 j = jtop, min( kbot, k+3 )

                      refsum = v( 1, m22 )*( h( j, k+1 )+v( 2, m22 )*

      $                        h( j, k+2 ) )

                      h( j, k+1 ) = h( j, k+1 ) - refsum

                      h( j, k+2 ) = h( j, k+2 ) - refsum*v( 2, m22 )

   100             CONTINUE

 *

                   IF( accum ) THEN

                      kms = k - incol

                      DO 110 j = max( 1, ktop-incol ), kdu

                         refsum = v( 1, m22 )*( u( j, kms+1 )+

      $                           v( 2, m22 )*u( j, kms+2 ) )

                         u( j, kms+1 ) = u( j, kms+1 ) - refsum

                         u( j, kms+2 ) = u( j, kms+2 ) - refsum*

      $                                  v( 2, m22 )

   110                CONTINUE

                   ELSE IF( wantz ) THEN

                      DO 120 j = iloz, ihiz

                         refsum = v( 1, m22 )*( z( j, k+1 )+v( 2, m22 )*

      $                           z( j, k+2 ) )

                         z( j, k+1 ) = z( j, k+1 ) - refsum

                         z( j, k+2 ) = z( j, k+2 ) - refsum*v( 2, m22 )

   120                CONTINUE

                   END IF

                END IF

             END IF

 *

 *           ==== Vigilant deflation check ====

 *

             mstart = mtop

             IF( krcol+3*( mstart-1 ).LT.ktop )

      $         mstart = mstart + 1

             mend = mbot

             IF( bmp22 )

      $         mend = mend + 1

             IF( krcol.EQ.kbot-2 )

      $         mend = mend + 1

             DO 130 m = mstart, mend

                k = min( kbot-1, krcol+3*( m-1 ) )

 *

 *              ==== The following convergence test requires that

 *              .    the tradition small-compared-to-nearby-diagonals

 *              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)

 *              .    criteria both be satisfied.  The latter improves

 *              .    accuracy in some examples. Falling back on an

 *              .    alternate convergence criterion when TST1 or TST2

 *              .    is zero (as done here) is traditional but probably

 *              .    unnecessary. ====

 *

                IF( h( k+1, k ).NE.zero ) THEN

                   tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )

                   IF( tst1.EQ.zero ) THEN

                      IF( k.GE.ktop+1 )

      $                  tst1 = tst1 + abs( h( k, k-1 ) )

                      IF( k.GE.ktop+2 )

      $                  tst1 = tst1 + abs( h( k, k-2 ) )

                      IF( k.GE.ktop+3 )

      $                  tst1 = tst1 + abs( h( k, k-3 ) )

                      IF( k.LE.kbot-2 )

      $                  tst1 = tst1 + abs( h( k+2, k+1 ) )

                      IF( k.LE.kbot-3 )

      $                  tst1 = tst1 + abs( h( k+3, k+1 ) )

                      IF( k.LE.kbot-4 )

      $                  tst1 = tst1 + abs( h( k+4, k+1 ) )

                   END IF

                   IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )

      $                 THEN

                      h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )

                      h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )

                      h11 = max( abs( h( k+1, k+1 ) ),

      $                     abs( h( k, k )-h( k+1, k+1 ) ) )

                      h22 = min( abs( h( k+1, k+1 ) ),

      $                     abs( h( k, k )-h( k+1, k+1 ) ) )

                      scl = h11 + h12

                      tst2 = h22*( h11 / scl )

 *

                      IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.

      $                   max( smlnum, ulp*tst2 ) )h( k+1, k ) = zero

                   END IF

                END IF

   130       CONTINUE

 *

 *           ==== Fill in the last row of each bulge. ====

 *

             mend = min( nbmps, ( kbot-krcol-1 ) / 3 )

             DO 140 m = mtop, mend

                k = krcol + 3*( m-1 )

                refsum = v( 1, m )*v( 3, m )*h( k+4, k+3 )

                h( k+4, k+1 ) = -refsum

                h( k+4, k+2 ) = -refsum*v( 2, m )

                h( k+4, k+3 ) = h( k+4, k+3 ) - refsum*v( 3, m )

   140       CONTINUE

 *

 *           ==== End of near-the-diagonal bulge chase. ====

 *

   150    CONTINUE

 *

 *        ==== Use U (if accumulated) to update far-from-diagonal

 *        .    entries in H.  If required, use U to update Z as

 *        .    well. ====

 *

          IF( accum ) THEN

             IF( wantt ) THEN

                jtop = 1

                jbot = n

             ELSE

                jtop = ktop

                jbot = kbot

             END IF

             IF( ( .NOT.blk22 ) .OR. ( incol.LT.ktop ) .OR.

      $          ( ndcol.GT.kbot ) .OR. ( ns.LE.2 ) ) THEN

 *

 *              ==== Updates not exploiting the 2-by-2 block

 *              .    structure of U.  K1 and NU keep track of

 *              .    the location and size of U in the special

 *              .    cases of introducing bulges and chasing

 *              .    bulges off the bottom.  In these special

 *              .    cases and in case the number of shifts

 *              .    is NS = 2, there is no 2-by-2 block

 *              .    structure to exploit.  ====

 *

                k1 = max( 1, ktop-incol )

                nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1

 *

 *              ==== Horizontal Multiply ====

 *

                DO 160 jcol = min( ndcol, kbot ) + 1, jbot, nh

                   jlen = min( nh, jbot-jcol+1 )

                   CALL sgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),

      $                        ldu, h( incol+k1, jcol ), ldh, zero, wh,

      $                        ldwh )

                   CALL slacpy( 'ALL', nu, jlen, wh, ldwh,

      $                         h( incol+k1, jcol ), ldh )

   160          CONTINUE

 *

 *              ==== Vertical multiply ====

 *

                DO 170 jrow = jtop, max( ktop, incol ) - 1, nv

                   jlen = min( nv, max( ktop, incol )-jrow )

                   CALL sgemm( 'N', 'N', jlen, nu, nu, one,

      $                        h( jrow, incol+k1 ), ldh, u( k1, k1 ),

      $                        ldu, zero, wv, ldwv )

                   CALL slacpy( 'ALL', jlen, nu, wv, ldwv,

      $                         h( jrow, incol+k1 ), ldh )

   170          CONTINUE

 *

 *              ==== Z multiply (also vertical) ====

 *

                IF( wantz ) THEN

                   DO 180 jrow = iloz, ihiz, nv

                      jlen = min( nv, ihiz-jrow+1 )

                      CALL sgemm( 'N', 'N', jlen, nu, nu, one,

      $                           z( jrow, incol+k1 ), ldz, u( k1, k1 ),

      $                           ldu, zero, wv, ldwv )

                      CALL slacpy( 'ALL', jlen, nu, wv, ldwv,

      $                            z( jrow, incol+k1 ), ldz )

   180             CONTINUE

                END IF

             ELSE

 *

 *              ==== Updates exploiting U's 2-by-2 block structure.

 *              .    (I2, I4, J2, J4 are the last rows and columns

 *              .    of the blocks.) ====

 *

                i2 = ( kdu+1 ) / 2

                i4 = kdu

                j2 = i4 - i2

                j4 = kdu

 *

 *              ==== KZS and KNZ deal with the band of zeros

 *              .    along the diagonal of one of the triangular

 *              .    blocks. ====

 *

                kzs = ( j4-j2 ) - ( ns+1 )

                knz = ns + 1

 *

 *              ==== Horizontal multiply ====

 *

                DO 190 jcol = min( ndcol, kbot ) + 1, jbot, nh

                   jlen = min( nh, jbot-jcol+1 )

 *

 *                 ==== Copy bottom of H to top+KZS of scratch ====

 *                  (The first KZS rows get multiplied by zero.) ====

 *

                   CALL slacpy( 'ALL', knz, jlen, h( incol+1+j2, jcol ),

      $                         ldh, wh( kzs+1, 1 ), ldwh )

 *

 *                 ==== Multiply by U21**T ====

 *

                   CALL slaset( 'ALL', kzs, jlen, zero, zero, wh, ldwh )

                   CALL strmm( 'L', 'U', 'C', 'N', knz, jlen, one,

      $                        u( j2+1, 1+kzs ), ldu, wh( kzs+1, 1 ),

      $                        ldwh )

 *

 *                 ==== Multiply top of H by U11**T ====

 *

                   CALL sgemm( 'C', 'N', i2, jlen, j2, one, u, ldu,

      $                        h( incol+1, jcol ), ldh, one, wh, ldwh )

 *

 *                 ==== Copy top of H to bottom of WH ====

 *

                   CALL slacpy( 'ALL', j2, jlen, h( incol+1, jcol ), ldh,

      $                         wh( i2+1, 1 ), ldwh )

 *

 *                 ==== Multiply by U21**T ====

 *

                   CALL strmm( 'L', 'L', 'C', 'N', j2, jlen, one,

      $                        u( 1, i2+1 ), ldu, wh( i2+1, 1 ), ldwh )

 *

 *                 ==== Multiply by U22 ====

 *

                   CALL sgemm( 'C', 'N', i4-i2, jlen, j4-j2, one,

      $                        u( j2+1, i2+1 ), ldu,

      $                        h( incol+1+j2, jcol ), ldh, one,

      $                        wh( i2+1, 1 ), ldwh )

 *

 *                 ==== Copy it back ====

 *

                   CALL slacpy( 'ALL', kdu, jlen, wh, ldwh,

      $                         h( incol+1, jcol ), ldh )

   190          CONTINUE

 *

 *              ==== Vertical multiply ====

 *

                DO 200 jrow = jtop, max( incol, ktop ) - 1, nv

                   jlen = min( nv, max( incol, ktop )-jrow )

 *

 *                 ==== Copy right of H to scratch (the first KZS

 *                 .    columns get multiplied by zero) ====

 *

                   CALL slacpy( 'ALL', jlen, knz, h( jrow, incol+1+j2 ),

      $                         ldh, wv( 1, 1+kzs ), ldwv )

 *

 *                 ==== Multiply by U21 ====

 *

                   CALL slaset( 'ALL', jlen, kzs, zero, zero, wv, ldwv )

                   CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,

      $                        u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),

      $                        ldwv )

 *

 *                 ==== Multiply by U11 ====

 *

                   CALL sgemm( 'N', 'N', jlen, i2, j2, one,

      $                        h( jrow, incol+1 ), ldh, u, ldu, one, wv,

      $                        ldwv )

 *

 *                 ==== Copy left of H to right of scratch ====

 *

                   CALL slacpy( 'ALL', jlen, j2, h( jrow, incol+1 ), ldh,

      $                         wv( 1, 1+i2 ), ldwv )

 *

 *                 ==== Multiply by U21 ====

 *

                   CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,

      $                        u( 1, i2+1 ), ldu, wv( 1, 1+i2 ), ldwv )

 *

 *                 ==== Multiply by U22 ====

 *

                   CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,

      $                        h( jrow, incol+1+j2 ), ldh,

      $                        u( j2+1, i2+1 ), ldu, one, wv( 1, 1+i2 ),

      $                        ldwv )

 *

 *                 ==== Copy it back ====

 *

                   CALL slacpy( 'ALL', jlen, kdu, wv, ldwv,

      $                         h( jrow, incol+1 ), ldh )

   200          CONTINUE

 *

 *              ==== Multiply Z (also vertical) ====

 *

                IF( wantz ) THEN

                   DO 210 jrow = iloz, ihiz, nv

                      jlen = min( nv, ihiz-jrow+1 )

 *

 *                    ==== Copy right of Z to left of scratch (first

 *                    .     KZS columns get multiplied by zero) ====

 *

                      CALL slacpy( 'ALL', jlen, knz,

      $                            z( jrow, incol+1+j2 ), ldz,

      $                            wv( 1, 1+kzs ), ldwv )

 *

 *                    ==== Multiply by U12 ====

 *

                      CALL slaset( 'ALL', jlen, kzs, zero, zero, wv,

      $                            ldwv )

                      CALL strmm( 'R', 'U', 'N', 'N', jlen, knz, one,

      $                           u( j2+1, 1+kzs ), ldu, wv( 1, 1+kzs ),

      $                           ldwv )

 *

 *                    ==== Multiply by U11 ====

 *

                      CALL sgemm( 'N', 'N', jlen, i2, j2, one,

      $                           z( jrow, incol+1 ), ldz, u, ldu, one,

      $                           wv, ldwv )

 *

 *                    ==== Copy left of Z to right of scratch ====

 *

                      CALL slacpy( 'ALL', jlen, j2, z( jrow, incol+1 ),

      $                            ldz, wv( 1, 1+i2 ), ldwv )

 *

 *                    ==== Multiply by U21 ====

 *

                      CALL strmm( 'R', 'L', 'N', 'N', jlen, i4-i2, one,

      $                           u( 1, i2+1 ), ldu, wv( 1, 1+i2 ),

      $                           ldwv )

 *

 *                    ==== Multiply by U22 ====

 *

                      CALL sgemm( 'N', 'N', jlen, i4-i2, j4-j2, one,

      $                           z( jrow, incol+1+j2 ), ldz,

      $                           u( j2+1, i2+1 ), ldu, one,

      $                           wv( 1, 1+i2 ), ldwv )

 *

 *                    ==== Copy the result back to Z ====

 *

                      CALL slacpy( 'ALL', jlen, kdu, wv, ldwv,

      $                            z( jrow, incol+1 ), ldz )

   210             CONTINUE

                END IF

             END IF

          END IF

   220 CONTINUE

 *

 *     ==== End of SLAQR5 ====

 *

       END

slarfg
subroutine slarfg(N, ALPHA, X, INCX, TAU)
SLARFG generates an elementary reflector (Householder matrix).
Definition: slarfg.f:108

slabad
subroutine slabad(SMALL, LARGE)
SLABAD
Definition: slabad.f:76

slaqr1
subroutine slaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
SLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: slaqr1.f:123

sgemm
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189

slacpy
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:105

slaset
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: slaset.f:112

strmm
subroutine strmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
STRMM
Definition: strmm.f:179

slaqr5
subroutine slaqr5(WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,                                                                                           SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,                                                                                           LDU, NV, WV, LDWV, NH, WH, LDWH)
SLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition: slaqr5.f:261