d6/d6b/pdlaqr5_8f_source.html

      SUBROUTINE pdlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,

     $                    SR, SI, H, DESCH, ILOZ, IHIZ, Z, DESCZ, WORK,

     $                    LWORK, IWORK, LIWORK )

*

*     Contribution from the Department of Computing Science and HPC2N,

*     Umea University, Sweden

*

*  -- ScaLAPACK routine (version 2.0.2) --

*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver

*     May 1 2012

*

      IMPLICIT NONE

*

*     .. Scalar Arguments ..

      INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, N, NSHFTS,

     $                   LWORK, LIWORK

      LOGICAL            WANTT, WANTZ

*     ..

*     .. Array Arguments ..

      INTEGER            DESCH( * ), DESCZ( * ), IWORK( * )

      DOUBLE PRECISION   H( * ), SI( * ), SR( * ), Z( * ), WORK( * )

*     ..

*

*  Purpose

*  =======

*

*  This auxiliary subroutine called by PDLAQR0 performs a

*  single small-bulge multi-shift QR sweep by chasing separated

*  groups of bulges along the main block diagonal of H.

*

*   WANTT  (global input) logical scalar

*          WANTT = .TRUE. if the quasi-triangular Schur factor

*          is being computed.  WANTT is set to .FALSE. otherwise.

*

*   WANTZ  (global input) logical scalar

*          WANTZ = .TRUE. if the orthogonal Schur factor is being

*          computed.  WANTZ is set to .FALSE. otherwise.

*

*   KACC22 (global input) integer with value 0, 1, or 2.

*          Specifies the computation mode of far-from-diagonal

*          orthogonal updates.

*     = 1: PDLAQR5 accumulates reflections and uses matrix-matrix

*          multiply to update the far-from-diagonal matrix entries.

*     = 2: PDLAQR5 accumulates reflections, uses matrix-matrix

*          multiply to update the far-from-diagonal matrix entries,

*          and takes advantage of 2-by-2 block structure during

*          matrix multiplies.

*

*   N      (global input) integer scalar

*          N is the order of the Hessenberg matrix H upon which this

*          subroutine operates.

*

*   KTOP   (global input) integer scalar

*   KBOT   (global input) integer scalar

*          These are the first and last rows and columns of an

*          isolated diagonal block upon which the QR sweep is to be

*          applied. It is assumed without a check that

*                    either KTOP = 1  or   H(KTOP,KTOP-1) = 0

*          and

*                    either KBOT = N  or   H(KBOT+1,KBOT) = 0.

*

*   NSHFTS (global input) integer scalar

*          NSHFTS gives the number of simultaneous shifts.  NSHFTS

*          must be positive and even.

*

*   SR     (global input) DOUBLE PRECISION array of size (NSHFTS)

*   SI     (global input) DOUBLE PRECISION array of size (NSHFTS)

*          SR contains the real parts and SI contains the imaginary

*          parts of the NSHFTS shifts of origin that define the

*          multi-shift QR sweep.

*

*   H      (local input/output) DOUBLE PRECISION array of size

*          (DESCH(LLD_),*)

*          On input H contains a Hessenberg matrix.  On output a

*          multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied

*          to the isolated diagonal block in rows and columns KTOP

*          through KBOT.

*

*   DESCH  (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix H.

*

*   ILOZ   (global input) INTEGER

*   IHIZ   (global input) INTEGER

*          Specify the rows of Z to which transformations must be

*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N

*

*   Z      (local input/output) DOUBLE PRECISION array of size

*          (DESCZ(LLD_),*)

*          If WANTZ = .TRUE., then the QR Sweep orthogonal

*          similarity transformation is accumulated into

*          Z(ILOZ:IHIZ,ILO:IHI) from the right.

*          If WANTZ = .FALSE., then Z is unreferenced.

*

*   DESCZ  (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix Z.

*

*   WORK   (local workspace) DOUBLE PRECISION array, dimension(DWORK)

*

*   LWORK  (local input) INTEGER

*          The length of the workspace array WORK.

*

*   IWORK  (local workspace) INTEGER array, dimension (LIWORK)

*

*   LIWORK (local input) INTEGER

*          The length of the workspace array IWORK.

*

*     ================================================================

*     Based on contributions by

*        Robert Granat, Department of Computing Science and HPC2N,

*        University of Umea, Sweden.

*

*     ============================================================

*     References:

*       K. Braman, R. Byers, and R. Mathias,

*       The Multi-Shift QR Algorithm Part I: Maintaining Well Focused

*       Shifts, and Level 3 Performance.

*       SIAM J. Matrix Anal. Appl., 23(4):929--947, 2002.

*

*       R. Granat, B. Kagstrom, and D. Kressner,

*       A Novel Parallel QR Algorithm for Hybrid Distributed Momory HPC

*       Systems.

*       SIAM J. Sci. Comput., 32(4):2345--2378, 2010.

*

*     ============================================================

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   LLD_, MB_, M_, NB_, N_, RSRC_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      DOUBLE PRECISION   ZERO, ONE

      PARAMETER          ( ZERO = 0.0d0, one = 1.0d0 )

      INTEGER            NTINY

      parameter( ntiny = 11 )

*     ..

*     .. Local Scalars ..

      DOUBLE PRECISION   ALPHA, BETA, H11, H12, H21, H22, REFSUM,

     $                   SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,

     $                   ulp, tau, elem, stamp, ddum, orth

      INTEGER            I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,

     $                   JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,

     $                   m, m22, mbot, mend, mstart, mtop, nbmps, ndcol,

     $                   ns, nu, lldh, lldz, lldu, lldv, lldw, lldwh,

     $                   info, ictxt, nprow, npcol, nb, iroffh, itop,

     $                   nwin, myrow, mycol, lns, numwin, lkacc22,

     $                   lchain, win, idonejob, ipnext, anmwin, lenrbuf,

     $                   lencbuf, ichoff, lrsrc, lcsrc, lktop, lkbot,

     $                   ii, jj, swin, ewin, lnwin, dim, llktop, llkbot,

     $                   ipv, ipu, iph, ipw, ku, kwh, kwv, nve, lks,

     $                   idum, nho, dir, winid, indx, iloc, jloc, rsrc1,

     $                   csrc1, rsrc2, csrc2, rsrc3, csrc3, rsrc4, ipuu,

     $                   csrc4, lrows, lcols, indxs, ks, jloc1, iloc1,

     $                   lktop1, lktop2, wchunk, numchunk, oddeven,

     $                   chunknum, dim1, dim4, ipw3, hrows, zrows,

     $                   hcols, ipw1, ipw2, rsrc, east, jloc4, iloc4,

     $                   west, csrc, south, norht, indxe, north,

     $                   ihh, ipiw, lkbot1, nprocs, liroffh,

     $                   winfin, rws3, cls3, indx2, hrows2,

     $                   zrows2, hcols2, mnrbuf,

     $                   mxrbuf, mncbuf, mxcbuf, lwkopt

      LOGICAL            BLK22, BMP22, INTRO, DONEJOB, ODDNPROW,

     $                   ODDNPCOL, LQUERY, BCDONE

      CHARACTER          JBCMPZ*2, JOB

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            PILAENVX, ICEIL, INDXG2P, INDXG2L, NUMROC

      DOUBLE PRECISION   DLAMCH, DLANGE

      EXTERNAL           dlamch, pilaenvx, iceil, indxg2p, indxg2l,

     $                   numroc, lsame, dlange

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, dble, max, min, mod

*     ..

*     .. Local Arrays ..

      DOUBLE PRECISION   VT( 3 )

*     ..

*     .. External Subroutines ..

      EXTERNAL           dgemm, dlabad, dlamov, dlaqr1, dlarfg, dlaset,

     $                   dtrmm, dlaqr6

*     ..

*     .. Executable Statements ..

*

      info = 0

      ictxt = desch( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

      nprocs = nprow*npcol

      lldh = desch( lld_ )

      lldz = descz( lld_ )

      nb = desch( mb_ )

      iroffh = mod( ktop - 1, nb )

      lquery = lwork.EQ.-1 .OR. liwork.EQ.-1

*

*     If there are no shifts, then there is nothing to do.

*

      IF( .NOT. lquery .AND. nshfts.LT.2 )

     $   RETURN

*

*     If the active block is empty or 1-by-1, then there

*     is nothing to do.

*

      IF( .NOT. lquery .AND. ktop.GE.kbot )

     $   RETURN

*

*     Shuffle shifts into pairs of real shifts and pairs of

*     complex conjugate shifts assuming complex conjugate

*     shifts are already adjacent to one another.

*

      IF( .NOT. lquery ) THEN

         DO 10 i = 1, nshfts - 2, 2

            IF( si( i ).NE.-si( i+1 ) ) THEN

*

               swap = sr( i )

               sr( i ) = sr( i+1 )

               sr( i+1 ) = sr( i+2 )

               sr( i+2 ) = swap

*

               swap = si( i )

               si( i ) = si( i+1 )

               si( i+1 ) = si( i+2 )

               si( i+2 ) = swap

            END IF

   10    CONTINUE

      END IF

*

*     NSHFTS is supposed to be even, but if is odd,

*     then simply reduce it by one.  The shuffle above

*     ensures that the dropped shift is real and that

*     the remaining shifts are paired.

*

      ns = nshfts - mod( nshfts, 2 )

*

*     Extract the size of the computational window.

*

      nwin = pilaenvx( ictxt, 19, 'PDLAQR5', jbcmpz, n, nb, nb, nb )

      nwin = min( nwin, kbot-ktop+1 )

*

*     Adjust number of simultaneous shifts if it exceeds the limit

*     set by the number of diagonal blocks in the active submatrix

*     H(KTOP:KBOT,KTOP:KBOT).

*

      ns = max( 2, min( ns, iceil( kbot-ktop+1, nb )*nwin/3 ) )

      ns = ns - mod( ns, 2 )


*

*     Decide the number of simultaneous computational windows

*     from the number of shifts - each window should contain up to

*     (NWIN / 3) shifts. Also compute the number of shifts per

*     window and make sure that number is even.

*

      lns = min( max( 2, nwin / 3 ), max( 2, ns / min(nprow,npcol) ) )

      lns = lns - mod( lns, 2 )

      numwin = max( 1, min( iceil( ns, lns ),

     $     iceil( kbot-ktop+1, nb ) - 1 ) )

      IF( nprow.NE.npcol ) THEN

         numwin = min( numwin, min(nprow,npcol) )

         lns = min( lns, max( 2, ns / min(nprow,npcol) ) )

         lns = lns - mod( lns, 2 )

      END IF

*

*     Machine constants for deflation.

*

      safmin = dlamch( 'SAFE MINIMUM' )

      safmax = one / safmin

      CALL dlabad( safmin, safmax )

      ulp = dlamch( 'PRECISION' )

      smlnum = safmin*( dble( n ) / ulp )

*

*     Use accumulated reflections to update far-from-diagonal

*     entries on a local level?

*

      IF( lns.LT.14 ) THEN

         lkacc22 = 1

      ELSE

         lkacc22 = 2

      END IF

*

*     If so, exploit the 2-by-2 block structure?

*     ( Usually it is not efficient to exploit the 2-by-2 structure

*       because the block size is too small. )

*

      blk22 = ( lns.GT.2 ) .AND. ( kacc22.EQ.2 )

*

*     Clear trash.

*

      IF( .NOT. lquery .AND. ktop+2.LE.kbot )

     $   CALL pdelset( h, ktop+2, ktop, desch, zero )

*

*     NBMPS = number of 2-shift bulges in each chain

*

      nbmps = lns / 2

*

*     KDU = width of slab

*

      kdu = 6*nbmps - 3

*

*     LCHAIN = length of each chain

*

      lchain = 3 * nbmps + 1

*

*     Check if workspace query.

*

      IF( lquery ) THEN

         hrows = numroc( n, nb, myrow, desch(rsrc_), nprow )

         hcols = numroc( n, nb, mycol, desch(csrc_), npcol )

         lwkopt = (5+2*numwin)*nb**2 + 2*hrows*nb + hcols*nb +

     $        max( hrows*nb, hcols*nb )

         work(1)  = dble(lwkopt)

         iwork(1) = 5*numwin

         RETURN

      END IF

*

*     Check if KTOP and KBOT are valid.

*

      IF( ktop.LT.1 .OR. kbot.GT.n ) RETURN

*

*     Create and chase NUMWIN chains of NBMPS bulges.

*

*     Set up window introduction.

*

      anmwin = 0

      intro = .true.

      ipiw = 1

*

*     Main loop:

*     While-loop over the computational windows which is

*     terminated when all windows have been introduced,

*     chased down to the bottom of the considered submatrix

*     and chased off.

*

 20   CONTINUE

*

*     Set up next window as long as we have less than the prescribed

*     number of windows. Each window is described an integer quadruple:

*     1. Local value of KTOP (below denoted by LKTOP)

*     2. Local value of KBOT (below denoted by LKBOT)

*     3-4. Processor indices (LRSRC,LCSRC) associated with the window.

*     (5. Mark that decides if a window is fully processed or not)

*

*     Notice - the next window is only introduced if the first block

*     in the active submatrix does not contain any other windows.

*

      IF( anmwin.GT.0 ) THEN

         lktop = iwork( 1+(anmwin-1)*5 )

      ELSE

         lktop = ktop

      END IF

      IF( intro .AND. (anmwin.EQ.0 .OR. lktop.GT.iceil(ktop,nb)*nb) )

     $     THEN

         anmwin = anmwin + 1

*

*        Structure of IWORK:

*        IWORK( 1+(WIN-1)*5 ): start position

*        IWORK( 2+(WIN-1)*5 ): stop position

*        IWORK( 3+(WIN-1)*5 ): processor row id

*        IWORK( 4+(WIN-1)*5 ): processor col id

*        IWORK( 5+(WIN-1)*5 ): window status (0, 1, or 2)

*

         iwork( 1+(anmwin-1)*5 ) = ktop

         iwork( 2+(anmwin-1)*5 ) = ktop +

     $                             min( nwin,nb-iroffh,kbot-ktop+1 ) - 1

         iwork( 3+(anmwin-1)*5 ) = indxg2p( iwork(1+(anmwin-1)*5), nb,

     $                             myrow, desch(rsrc_), nprow )

         iwork( 4+(anmwin-1)*5 ) = indxg2p( iwork(2+(anmwin-1)*5), nb,

     $                             mycol, desch(csrc_), npcol )

         iwork( 5+(anmwin-1)*5 ) = 0

         ipiw = 6+(anmwin-1)*5

         IF( anmwin.EQ.numwin ) intro = .false.

      END IF

*

*     Do-loop over the number of windows.

*

      ipnext = 1

      donejob = .false.

      idonejob = 0

      lenrbuf = 0

      lencbuf = 0

      ichoff = 0

      DO 40 win = 1, anmwin

*

*        Extract window information to simplify the rest.

*

         lrsrc = iwork( 3+(win-1)*5 )

         lcsrc = iwork( 4+(win-1)*5 )

         lktop = iwork( 1+(win-1)*5 )

         lkbot = iwork( 2+(win-1)*5 )

         lnwin = lkbot - lktop + 1

*

*        Check if anything to do for current window, i.e., if the local

*        chain of bulges has reached the next block border etc.

*

         IF( iwork(5+(win-1)*5).LT.2 .AND. lnwin.GT.1 .AND.

     $        (lnwin.GT.lchain .OR. lkbot.EQ.kbot ) ) THEN

            liroffh = mod(lktop-1,nb)

            swin = lktop-liroffh

            ewin = min(kbot,lktop-liroffh+nb-1)

            dim = ewin-swin+1

            IF( dim.LE.ntiny .AND. .NOT.lkbot.EQ.kbot ) THEN

               iwork( 5+(win-1)*5 ) = 2

               GO TO 45

            END IF

            idonejob = 1

            IF( iwork(5+(win-1)*5).EQ.0 ) THEN

               iwork(5+(win-1)*5) = 1

            END IF

*

*           Let the process that owns the corresponding window do the

*           local bulge chase.

*

            IF( myrow.EQ.lrsrc .AND. mycol.EQ.lcsrc ) THEN

*

*              Set the kind of job to do in DLAQR6:

*              1. JOB = 'I': Introduce and chase bulges in window WIN

*              2. JOB = 'C': Chase bulges from top to bottom of window WIN

*              3. JOB = 'O': Chase bulges off window WIN

*              4. JOB = 'A': All of 1-3 above is done - this will for

*                            example happen for very small active

*                            submatrices (like 2-by-2)

*

               llkbot = llktop + lnwin - 1

               IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN

                  job = 'All steps'

                  ichoff = 1

               ELSEIF( lktop.EQ.ktop ) THEN

                  job = 'Introduce and chase'

               ELSEIF( lkbot.EQ.kbot ) THEN

                  job = 'Off-chase bulges'

                  ichoff = 1

               ELSE

                  job = 'Chase bulges'

               END IF

*

*              Copy submatrix of H corresponding to window WIN into

*              workspace and set out additional workspace for storing

*              orthogonal transformations. This submatrix must be at

*              least (NTINY+1)-by-(NTINY+1) to fit into DLAQR6 - if not,

*              abort and go for cross border bulge chasing with this

*              particular window.

*

               ii = indxg2l( swin, nb, myrow, desch(rsrc_), nprow )

               jj = indxg2l( swin, nb, mycol, desch(csrc_), npcol )

               llktop = 1 + liroffh

               llkbot = llktop + lnwin - 1

*

               ipu = ipnext

               iph = ipu + lnwin**2

               ipuu = iph + max(ntiny+1,dim)**2

               ipv = ipuu + max(ntiny+1,dim)**2

               ipnext = iph

*

               IF( lsame( job, 'A' ) .OR. lsame( job, 'O' ) .AND.

     $              dim.LT.ntiny+1 ) THEN

                  CALL dlaset( 'All', ntiny+1, ntiny+1, zero, one,

     $                 work(iph), ntiny+1 )

               END IF

               CALL dlamov( 'Upper', dim, dim, h(ii+(jj-1)*lldh), lldh,

     $              work(iph), max(ntiny+1,dim) )

               CALL dcopy(  dim-1, h(ii+(jj-1)*lldh+1), lldh+1,

     $              work(iph+1), max(ntiny+1,dim)+1 )

               IF( lsame( job, 'C' ) .OR. lsame( job, 'O') ) THEN

                  CALL dcopy(  dim-2, h(ii+(jj-1)*lldh+2), lldh+1,

     $                 work(iph+2), max(ntiny+1,dim)+1 )

                  CALL dcopy(  dim-3, h(ii+(jj-1)*lldh+3), lldh+1,

     $                 work(iph+3), max(ntiny+1,dim)+1 )

                  CALL dlaset( 'Lower', dim-4, dim-4, zero,

     $                 zero, work(iph+4), max(ntiny+1,dim) )

               ELSE

                  CALL dlaset( 'Lower', dim-2, dim-2, zero,

     $                 zero, work(iph+2), max(ntiny+1,dim) )

               END IF

*

               ku = max(ntiny+1,dim) - kdu + 1

               kwh = kdu + 1

               nho = ( max(ntiny+1,dim)-kdu+1-4 ) - ( kdu+1 ) + 1

               kwv = kdu + 4

               nve = max(ntiny+1,dim) - kdu - kwv + 1

               CALL dlaset( 'All', max(ntiny+1,dim),

     $              max(ntiny+1,dim), zero, one, work(ipuu),

     $              max(ntiny+1,dim) )

*

*              Small-bulge multi-shift QR sweep.

*

               lks = max( 1, ns - win*lns + 1 )

               CALL dlaqr6( job, wantt, .true., lkacc22,

     $              max(ntiny+1,dim), llktop, llkbot, lns, sr( lks ),

     $              si( lks ), work(iph), max(ntiny+1,dim), llktop,

     $              llkbot, work(ipuu), max(ntiny+1,dim), work(ipu),

     $              3, work( iph+ku-1 ),

     $              max(ntiny+1,dim), nve, work( iph+kwv-1 ),

     $              max(ntiny+1,dim), nho, work( iph-1+ku+(kwh-1)*

     $              max(ntiny+1,dim) ), max(ntiny+1,dim) )

*

*              Copy submatrix of H back.

*

               CALL dlamov( 'Upper', dim, dim, work(iph),

     $              max(ntiny+1,dim), h(ii+(jj-1)*lldh), lldh )

               CALL dcopy( dim-1, work(iph+1), max(ntiny+1,dim)+1,

     $              h(ii+(jj-1)*lldh+1), lldh+1 )

               IF( lsame( job, 'I' ) .OR. lsame( job, 'C' ) ) THEN

                  CALL dcopy( dim-2, work(iph+2), dim+1,

     $                 h(ii+(jj-1)*lldh+2), lldh+1 )

                  CALL dcopy( dim-3, work(iph+3), dim+1,

     $                 h(ii+(jj-1)*lldh+3), lldh+1 )

               ELSE

                  CALL dlaset( 'Lower', dim-2, dim-2, zero,

     $                 zero, h(ii+(jj-1)*lldh+2), lldh )

               END IF

*

*              Copy actual submatrix of U to the correct place

*              of the buffer.

*

               CALL dlamov( 'All', lnwin, lnwin,

     $              work(ipuu+(max(ntiny+1,dim)*liroffh)+liroffh),

     $              max(ntiny+1,dim), work(ipu), lnwin )

            END IF

*

*           In case the local submatrix was smaller than

*           (NTINY+1)-by-(NTINY+1) we go here and proceed.

*

 45         CONTINUE

         ELSE

            iwork( 5+(win-1)*5 ) = 2

         END IF

*

*        Increment counter for buffers of orthogonal transformations.

*

         IF( myrow.EQ.lrsrc .OR. mycol.EQ.lcsrc ) THEN

            IF( idonejob.EQ.1 .AND. iwork(5+(win-1)*5).LT.2 ) THEN

               IF( myrow.EQ.lrsrc ) lenrbuf = lenrbuf + lnwin*lnwin

               IF( mycol.EQ.lcsrc ) lencbuf = lencbuf + lnwin*lnwin

            END IF

         END IF

 40   CONTINUE

*

*     Did some work in the above do-loop?

*

      CALL igsum2d( ictxt, 'All', '1-Tree', 1, 1, idonejob, 1, -1, -1 )

      donejob = idonejob.GT.0

*

*     Chased off bulges from first window?

*

      IF( nprocs.GT.1 )

     $   CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ichoff, 1, -1,

     $        -1, -1, -1, -1 )

*

*     If work was done in the do-loop over local windows, perform

*     updates, otherwise go for cross border bulge chasing and updates.

*

      IF( donejob ) THEN

*

*        Broadcast orthogonal transformations.

*

 49      CONTINUE

         IF( lenrbuf.GT.0 .OR. lencbuf.GT.0 ) THEN

            DO 50 dir = 1, 2

               bcdone = .false.

               DO 60 win = 1, anmwin

                  IF( ( lenrbuf.EQ.0 .AND. lencbuf.EQ.0 ) .OR.

     $                 bcdone ) GO TO 62

                  lrsrc = iwork( 3+(win-1)*5 )

                  lcsrc = iwork( 4+(win-1)*5 )

                  IF( myrow.EQ.lrsrc .AND. mycol.EQ.lcsrc ) THEN

                     IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.

     $                    npcol.GT.1 ) THEN

                        CALL dgebs2d( ictxt, 'Row', '1-Tree', lenrbuf,

     $                       1, work, lenrbuf )

                     ELSEIF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.

     $                    nprow.GT.1 ) THEN

                        CALL dgebs2d( ictxt, 'Col', '1-Tree', lencbuf,

     $                       1, work, lencbuf )

                     END IF

                     IF( lenrbuf.GT.0 )

     $                  CALL dlamov( 'All', lenrbuf, 1, work, lenrbuf,

     $                       work(1+lenrbuf), lencbuf )

                     bcdone = .true.

                  ELSEIF( myrow.EQ.lrsrc .AND. dir.EQ.1 ) THEN

                     IF( lenrbuf.GT.0 .AND. npcol.GT.1 ) THEN

                        CALL dgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,

     $                       1, work, lenrbuf, lrsrc, lcsrc )

                        bcdone = .true.

                     END IF

                  ELSEIF( mycol.EQ.lcsrc .AND. dir.EQ.2 ) THEN

                     IF( lencbuf.GT.0 .AND. nprow.GT.1 ) THEN

                        CALL dgebr2d( ictxt, 'Col', '1-Tree', lencbuf,

     $                       1, work(1+lenrbuf), lencbuf, lrsrc, lcsrc )

                        bcdone = .true.

                     END IF

                  END IF

 62               CONTINUE

 60            CONTINUE

 50         CONTINUE

         END IF

*

*        Compute updates - make sure to skip windows that was skipped

*        regarding local bulge chasing.

*

         DO 65 dir = 1, 2

            winid = 0

            IF( dir.EQ.1 ) THEN

               ipnext = 1

            ELSE

               ipnext = 1 + lenrbuf

            END IF

            DO 70 win = 1, anmwin

               IF( iwork( 5+(win-1)*5 ).EQ.2 ) GO TO 75

               lrsrc = iwork( 3+(win-1)*5 )

               lcsrc = iwork( 4+(win-1)*5 )

               lktop = iwork( 1+(win-1)*5 )

               lkbot = iwork( 2+(win-1)*5 )

               lnwin = lkbot - lktop + 1

               IF( (myrow.EQ.lrsrc.AND.lenrbuf.GT.0.AND.dir.EQ.1) .OR.

     $              (mycol.EQ.lcsrc.AND.lencbuf.GT.0.AND.dir.EQ.2 ) )

     $              THEN

*

*                 Set up workspaces.

*

                  ipu = ipnext

                  ipnext = ipu + lnwin*lnwin

                  ipw = 1 + lenrbuf + lencbuf

                  liroffh = mod(lktop-1,nb)

                  winid = winid + 1

*

*                 Recompute JOB to see if block structure of U could

*                 possibly be exploited or not.

*

                  IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN

                     job = 'All steps'

                  ELSEIF( lktop.EQ.ktop ) THEN

                     job = 'Introduce and chase'

                  ELSEIF( lkbot.EQ.kbot ) THEN

                     job = 'Off-chase bulges'

                  ELSE

                     job = 'Chase bulges'

                  END IF

               END IF

*

*              Use U to update far-from-diagonal entries in H.

*              If required, use U to update Z as well.

*

               IF( .NOT. blk22 .OR. .NOT. lsame(job,'C')

     $              .OR. lns.LE.2 ) THEN

*

                  IF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.

     $                 mycol.EQ.lcsrc ) THEN

                     IF( wantt ) THEN

                        DO 80 indx = 1, lktop-liroffh-1, nb

                           CALL infog2l( indx, lktop, desch, nprow,

     $                          npcol, myrow, mycol, iloc, jloc, rsrc1,

     $                          csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              lrows = min( nb, lktop-indx )

                              CALL dgemm('No transpose', 'No transpose',

     $                             lrows, lnwin, lnwin, one,

     $                             h((jloc-1)*lldh+iloc), lldh,

     $                             work( ipu ), lnwin, zero,

     $                             work(ipw),

     $                             lrows )

                              CALL dlamov( 'All', lrows, lnwin,

     $                             work(ipw), lrows,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

 80                     CONTINUE

                     END IF

                     IF( wantz ) THEN

                        DO 90 indx = 1, n, nb

                           CALL infog2l( indx, lktop, descz, nprow,

     $                          npcol, myrow, mycol, iloc, jloc, rsrc1,

     $                          csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              lrows = min(nb,n-indx+1)

                              CALL dgemm( 'No transpose',

     $                             'No transpose', lrows, lnwin, lnwin,

     $                             one, z((jloc-1)*lldz+iloc), lldz,

     $                             work( ipu ), lnwin, zero,

     $                             work(ipw), lrows )

                              CALL dlamov( 'All', lrows, lnwin,

     $                             work(ipw), lrows,

     $                             z((jloc-1)*lldz+iloc), lldz )

                           END IF

 90                     CONTINUE

                     END IF

                  END IF

*

*                 Update the rows of H affected by the bulge-chase.

*

                  IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.

     $                 myrow.EQ.lrsrc ) THEN

                     IF( wantt ) THEN

                        IF( iceil(lkbot,nb).EQ.iceil(kbot,nb) ) THEN

                           lcols = min(iceil(kbot,nb)*nb,n) - kbot

                        ELSE

                           lcols = 0

                        END IF

                        IF( lcols.GT.0 ) THEN

                           indx = kbot + 1

                           CALL infog2l( lktop, indx, desch, nprow,

     $                          npcol, myrow, mycol, iloc, jloc,

     $                          rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              CALL dgemm( 'Transpose', 'No Transpose',

     $                             lnwin, lcols, lnwin, one, work(ipu),

     $                             lnwin, h((jloc-1)*lldh+iloc), lldh,

     $                             zero, work(ipw), lnwin )

                              CALL dlamov( 'All', lnwin, lcols,

     $                             work(ipw), lnwin,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

                        END IF

 93                     CONTINUE

                        indxs = iceil(lkbot,nb)*nb + 1

                        DO 95 indx = indxs, n, nb

                           CALL infog2l( lktop, indx,

     $                          desch, nprow, npcol, myrow, mycol,

     $                          iloc, jloc, rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              lcols = min( nb, n-indx+1 )

                              CALL dgemm( 'Transpose', 'No Transpose',

     $                             lnwin, lcols, lnwin, one, work(ipu),

     $                             lnwin, h((jloc-1)*lldh+iloc), lldh,

     $                             zero, work(ipw),

     $                             lnwin )

                              CALL dlamov( 'All', lnwin, lcols,

     $                             work(ipw), lnwin,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

 95                     CONTINUE

                     END IF

                  END IF

               ELSE

                  ks = lnwin-lns/2*3

*

*                 The LNWIN-by-LNWIN matrix U containing the accumulated

*                 orthogonal transformations has the following structure:

*

*                     [ U11  U12 ]

*                 U = [          ],

*                     [ U21  U22 ]

*

*                 where U21 is KS-by-KS upper triangular and U12 is

*                 (LNWIN-KS)-by-(LNWIN-KS) lower triangular.

*                 Here, KS = LNS.

*

*                 Update the columns of H and Z affected by the bulge

*                 chasing.

*

*                 Compute H2*U21 + H1*U11 in workspace.

*

                  IF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.

     $                 mycol.EQ.lcsrc ) THEN

                     IF( wantt ) THEN

                        DO 100 indx = 1, lktop-liroffh-1, nb

                           CALL infog2l( indx, lktop, desch, nprow,

     $                          npcol, myrow, mycol, iloc, jloc, rsrc1,

     $                          csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              jloc1 = indxg2l( lktop+lnwin-ks, nb,

     $                             mycol, desch( csrc_ ), npcol )

                              lrows = min( nb, lktop-indx )

                              CALL dlamov( 'All', lrows, ks,

     $                             h((jloc1-1)*lldh+iloc ), lldh,

     $                             work(ipw), lrows )

                              CALL dtrmm( 'Right', 'Upper',

     $                             'No transpose','Non-unit', lrows,

     $                             ks, one, work( ipu+lnwin-ks ), lnwin,

     $                             work(ipw), lrows )

                              CALL dgemm('No transpose', 'No transpose',

     $                             lrows, ks, lnwin-ks, one,

     $                             h((jloc-1)*lldh+iloc), lldh,

     $                             work( ipu ), lnwin, one, work(ipw),

     $                             lrows )

*

*                             Compute H1*U12 + H2*U22 in workspace.

*

                              CALL dlamov( 'All', lrows, lnwin-ks,

     $                             h((jloc-1)*lldh+iloc), lldh,

     $                             work( ipw+ks*lrows ), lrows )

                              CALL dtrmm( 'Right', 'Lower',

     $                             'No transpose', 'Non-Unit',

     $                             lrows, lnwin-ks, one,

     $                             work( ipu+lnwin*ks ), lnwin,

     $                             work( ipw+ks*lrows ), lrows )

                              CALL dgemm('No transpose', 'No transpose',

     $                             lrows, lnwin-ks, ks, one,

     $                             h((jloc1-1)*lldh+iloc), lldh,

     $                             work( ipu+lnwin*ks+lnwin-ks ), lnwin,

     $                             one, work( ipw+ks*lrows ), lrows )

*

*                             Copy workspace to H.

*

                              CALL dlamov( 'All', lrows, lnwin,

     $                             work(ipw), lrows,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

 100                    CONTINUE

                     END IF

*

                     IF( wantz ) THEN

*

*                       Compute Z2*U21 + Z1*U11 in workspace.

*

                        DO 110 indx = 1, n, nb

                           CALL infog2l( indx, lktop, descz, nprow,

     $                          npcol, myrow, mycol, iloc, jloc, rsrc1,

     $                          csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

                              jloc1 = indxg2l( lktop+lnwin-ks, nb,

     $                             mycol, descz( csrc_ ), npcol )

                              lrows = min(nb,n-indx+1)

                              CALL dlamov( 'All', lrows, ks,

     $                             z((jloc1-1)*lldz+iloc ), lldz,

     $                             work(ipw), lrows )

                              CALL dtrmm( 'Right', 'Upper',

     $                             'No transpose', 'Non-unit',

     $                             lrows, ks, one, work( ipu+lnwin-ks ),

     $                             lnwin, work(ipw), lrows )

                              CALL dgemm( 'No transpose',

     $                             'No transpose', lrows, ks, lnwin-ks,

     $                             one, z((jloc-1)*lldz+iloc), lldz,

     $                             work( ipu ), lnwin, one, work(ipw),

     $                             lrows )

*

*                             Compute Z1*U12 + Z2*U22 in workspace.

*

                              CALL dlamov( 'All', lrows, lnwin-ks,

     $                             z((jloc-1)*lldz+iloc), lldz,

     $                             work( ipw+ks*lrows ), lrows)

                              CALL dtrmm( 'Right', 'Lower',

     $                             'No transpose', 'Non-unit',

     $                             lrows, lnwin-ks, one,

     $                             work( ipu+lnwin*ks ), lnwin,

     $                             work( ipw+ks*lrows ), lrows )

                              CALL dgemm( 'No transpose',

     $                             'No transpose', lrows, lnwin-ks, ks,

     $                             one, z((jloc1-1)*lldz+iloc), lldz,

     $                             work( ipu+lnwin*ks+lnwin-ks ), lnwin,

     $                             one, work( ipw+ks*lrows ),

     $                             lrows )

*

*                             Copy workspace to Z.

*

                              CALL dlamov( 'All', lrows, lnwin,

     $                             work(ipw), lrows,

     $                             z((jloc-1)*lldz+iloc), lldz )

                           END IF

 110                    CONTINUE

                     END IF

                  END IF

*

                  IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.

     $                 myrow.EQ.lrsrc ) THEN

                     IF( wantt ) THEN

                        indxs = iceil(lkbot,nb)*nb + 1

                        DO 120 indx = indxs, n, nb

                           CALL infog2l( lktop, indx,

     $                          desch, nprow, npcol, myrow, mycol, iloc,

     $                          jloc, rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 ) THEN

*

*                             Compute U21**T*H2 + U11**T*H1 in workspace.

*

                              iloc1 = indxg2l( lktop+lnwin-ks, nb,

     $                             myrow, desch( rsrc_ ), nprow )

                              lcols = min( nb, n-indx+1 )

                              CALL dlamov( 'All', ks, lcols,

     $                             h((jloc-1)*lldh+iloc1), lldh,

     $                             work(ipw), lnwin )

                              CALL dtrmm( 'Left', 'Upper', 'Transpose',

     $                             'Non-unit', ks, lcols, one,

     $                             work( ipu+lnwin-ks ), lnwin,

     $                             work(ipw), lnwin )

                              CALL dgemm( 'Transpose', 'No transpose',

     $                             ks, lcols, lnwin-ks, one, work(ipu),

     $                             lnwin, h((jloc-1)*lldh+iloc), lldh,

     $                             one, work(ipw), lnwin )

*

*                             Compute U12**T*H1 + U22**T*H2 in workspace.

*

                              CALL dlamov( 'All', lnwin-ks, lcols,

     $                             h((jloc-1)*lldh+iloc), lldh,

     $                             work( ipw+ks ), lnwin )

                              CALL dtrmm( 'Left', 'Lower', 'Transpose',

     $                             'Non-unit', lnwin-ks, lcols, one,

     $                             work( ipu+lnwin*ks ), lnwin,

     $                             work( ipw+ks ), lnwin )

                              CALL dgemm( 'Transpose', 'No Transpose',

     $                             lnwin-ks, lcols, ks, one,

     $                             work( ipu+lnwin*ks+lnwin-ks ), lnwin,

     $                             h((jloc-1)*lldh+iloc1), lldh,

     $                             one, work( ipw+ks ), lnwin )

*

*                             Copy workspace to H.

*

                              CALL dlamov( 'All', lnwin, lcols,

     $                             work(ipw), lnwin,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

 120                    CONTINUE

                     END IF

                  END IF

               END IF

*

*              Update position information about current window.

*

               IF( dir.EQ.2 ) THEN

                  IF( lkbot.EQ.kbot ) THEN

                     lktop = kbot+1

                     lkbot = kbot+1

                     iwork( 1+(win-1)*5 ) = lktop

                     iwork( 2+(win-1)*5 ) = lkbot

                     iwork( 5+(win-1)*5 ) = 2

                  ELSE

                     lktop = min( lktop + lnwin - lchain,

     $                    iceil( lktop, nb )*nb - lchain + 1,

     $                    kbot )

                     iwork( 1+(win-1)*5 ) = lktop

                     lkbot = min( lkbot + lnwin - lchain,

     $                    iceil( lkbot, nb )*nb, kbot )

                     iwork( 2+(win-1)*5 ) = lkbot

                     lnwin = lkbot-lktop+1

                     IF( lnwin.EQ.lchain ) iwork(5+(win-1)*5) = 2

                  END IF

               END IF

 75            CONTINUE

 70         CONTINUE

 65      CONTINUE

*

*        If bulges were chasen off from first window, the window is

*        removed.

*

         IF( ichoff.GT.0 ) THEN

            DO 128 win = 2, anmwin

               iwork( 1+(win-2)*5 ) = iwork( 1+(win-1)*5 )

               iwork( 2+(win-2)*5 ) = iwork( 2+(win-1)*5 )

               iwork( 3+(win-2)*5 ) = iwork( 3+(win-1)*5 )

               iwork( 4+(win-2)*5 ) = iwork( 4+(win-1)*5 )

               iwork( 5+(win-2)*5 ) = iwork( 5+(win-1)*5 )

 128        CONTINUE

            anmwin = anmwin - 1

            ipiw = 6+(anmwin-1)*5

         END IF

*

*        If we have no more windows, return.

*

         IF( anmwin.LT.1 ) RETURN

*

      ELSE

*

*        Set up windows such that as many bulges as possible can be

*        moved over the border to the next block. Make sure that the

*        cross border window is at least (NTINY+1)-by-(NTINY+1), unless

*        we are chasing off the bulges from the last window. This is

*        accomplished by setting the bottom index LKBOT such that the

*        local window has the correct size.

*

*        If LKBOT then becomes larger than KBOT, the endpoint of the whole

*        global submatrix, or LKTOP from a window located already residing

*        at the other side of the border, this is taken care of by some

*        dirty tricks.

*

         DO 130 win = 1, anmwin

            lktop1 = iwork( 1+(win-1)*5 )

            lkbot = iwork( 2+(win-1)*5 )

            lnwin = max( 6, min( lkbot - lktop1 + 1, lchain ) )

            lkbot1 = max( min( kbot, iceil(lktop1,nb)*nb+lchain),

     $           min( kbot, min( lktop1+2*lnwin-1,

     $           (iceil(lktop1,nb)+1)*nb ) ) )

            iwork( 2+(win-1)*5 ) = lkbot1

 130     CONTINUE

         ichoff = 0

*

*        Keep a record over what windows that were moved over the borders

*        such that we can delay some windows due to lack of space on the

*        other side of the border; we do not want to leave any of the

*        bulges behind...

*

*        IWORK( 5+(WIN-1)*5 ) = 0: window WIN has not been processed

*        IWORK( 5+(WIN-1)*5 ) = 1: window WIN is being processed (need to

*                                  know for updates)

*        IWORK( 5+(WIN-1)*5 ) = 2: window WIN has been fully processed

*

*        So, start by marking all windows as not processed.

*

         DO 135 win = 1, anmwin

            iwork( 5+(win-1)*5 ) = 0

 135     CONTINUE

*

*        Do the cross border bulge-chase as follows: Start from the

*        first window (the one that is closest to be chased off the

*        diagonal of H) and take the odd windows first followed by the

*        even ones. To not get into hang-problems on processor meshes

*        with at least one odd dimension, the windows will in such a case

*        be processed in chunks of {the minimum odd process dimension}-1

*        windows to avoid overlapping processor scopes in forming the

*        cross border computational windows and the cross border update

*        regions.

*

         wchunk = max( 1, min( anmwin, nprow-1, npcol-1 ) )

         numchunk = iceil( anmwin, wchunk )

*

*        Based on the computed chunk of windows, start working with

*        crossborder bulge-chasing. Repeat this as long as there is

*        still work left to do (137 is a kind of do-while statement).

*

 137     CONTINUE

*

*        Zero out LENRBUF and LENCBUF each time we restart this loop.

*

         lenrbuf = 0

         lencbuf = 0

*

         DO 140 oddeven = 1, min( 2, anmwin )

         DO 150 chunknum = 1, numchunk

            ipnext = 1

            DO 160 win = oddeven+(chunknum-1)*wchunk,

     $           min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2

*

*              Get position and size of the WIN:th active window and

*              make sure that we skip the cross border bulge for this

*              window if the window is not shared between several data

*              layout blocks (and processors).

*

*              Also, delay windows that do not have sufficient size of

*              the other side of the border. Moreover, make sure to skip

*              windows that was already processed in the last round of

*              the do-while loop (137).

*

               IF( iwork( 5+(win-1)*5 ).EQ.2 ) GO TO 165

               lktop = iwork( 1+(win-1)*5 )

               lkbot = iwork( 2+(win-1)*5 )

               IF( win.GT.1 ) THEN

                  lktop2 = iwork( 1+(win-2)*5 )

               ELSE

                  lktop2 = kbot+1

               END IF

               IF( iceil(lktop,nb).EQ.iceil(lkbot,nb) .OR.

     $              lkbot.GE.lktop2 ) GO TO 165

               lnwin = lkbot - lktop + 1

               IF( lnwin.LE.ntiny .AND. lkbot.NE.kbot .AND.

     $              .NOT. mod(lkbot,nb).EQ.0  ) GO TO 165

*

*              If window is going to be processed, mark it as processed.

*

               iwork( 5+(win-1)*5 ) = 1

*

*              Extract processors for current cross border window,

*              as below:

*

*                        1 | 2

*                        --+--

*                        3 | 4

*

               rsrc1 = iwork( 3+(win-1)*5 )

               csrc1 = iwork( 4+(win-1)*5 )

               rsrc2 = rsrc1

               csrc2 = mod( csrc1+1, npcol )

               rsrc3 = mod( rsrc1+1, nprow )

               csrc3 = csrc1

               rsrc4 = mod( rsrc1+1, nprow )

               csrc4 = mod( csrc1+1, npcol )

*

*              Form group of four processors for cross border window.

*

               IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.

     $              ( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) .OR.

     $              ( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) .OR.

     $              ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN

*

*                 Compute the upper and lower parts of the active

*                 window.

*

                  dim1 = nb - mod(lktop-1,nb)

                  dim4 = lnwin - dim1

*

*                 Temporarily compute a new value of the size of the

*                 computational window that is larger than or equal to

*                 NTINY+1; call the *real* value DIM.

*

                  dim = lnwin

                  lnwin = max(ntiny+1,lnwin)

*

*                 Divide workspace.

*

                  ipu = ipnext

                  iph = ipu + dim**2

                  ipuu = iph + lnwin**2

                  ipv = ipuu + lnwin**2

                  ipnext = iph

                  IF( dim.LT.lnwin ) THEN

                     CALL dlaset( 'All', lnwin, lnwin, zero,

     $                    one, work( iph ), lnwin )

                  ELSE

                     CALL dlaset( 'All', dim, dim, zero,

     $                    zero, work( iph ), lnwin )

                  END IF

*

*                 Form the active window.

*

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     iloc = indxg2l( lktop, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     CALL dlamov( 'All', dim1, dim1,

     $                    h((jloc-1)*lldh+iloc), lldh, work(iph),

     $                    lnwin )

                     IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 ) THEN

*                       Proc#1 <==> Proc#4

                        CALL dgesd2d( ictxt, dim1, dim1,

     $                       work(iph), lnwin, rsrc4, csrc4 )

                        CALL dgerv2d( ictxt, dim4, dim4,

     $                       work(iph+dim1*lnwin+dim1),

     $                       lnwin, rsrc4, csrc4 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                     iloc = indxg2l( lktop+dim1, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop+dim1, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     CALL dlamov( 'All', dim4, dim4,

     $                    h((jloc-1)*lldh+iloc), lldh,

     $                    work(iph+dim1*lnwin+dim1),

     $                    lnwin )

                     IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 ) THEN

*                       Proc#4 <==> Proc#1

                        CALL dgesd2d( ictxt, dim4, dim4,

     $                       work(iph+dim1*lnwin+dim1),

     $                       lnwin, rsrc1, csrc1 )

                        CALL dgerv2d( ictxt, dim1, dim1,

     $                       work(iph), lnwin, rsrc1, csrc1 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                     iloc = indxg2l( lktop, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop+dim1, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     CALL dlamov( 'All', dim1, dim4,

     $                    h((jloc-1)*lldh+iloc), lldh,

     $                    work(iph+dim1*lnwin), lnwin )

                     IF( rsrc2.NE.rsrc1 .OR. csrc2.NE.csrc1 ) THEN

*                       Proc#2 ==> Proc#1

                        CALL dgesd2d( ictxt, dim1, dim4,

     $                       work(iph+dim1*lnwin),

     $                       lnwin, rsrc1, csrc1 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                     IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN

*                       Proc#2 ==> Proc#4

                        CALL dgesd2d( ictxt, dim1, dim4,

     $                       work(iph+dim1*lnwin),

     $                       lnwin, rsrc4, csrc4 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                     iloc = indxg2l( lktop+dim1, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop+dim1-1, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     CALL dlamov( 'All', 1, 1,

     $                    h((jloc-1)*lldh+iloc), lldh,

     $                    work(iph+(dim1-1)*lnwin+dim1),

     $                    lnwin )

                     IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN

*                       Proc#3 ==> Proc#1

                        CALL dgesd2d( ictxt, 1, 1,

     $                       work(iph+(dim1-1)*lnwin+dim1),

     $                       lnwin, rsrc1, csrc1 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                     IF( rsrc3.NE.rsrc4 .OR. csrc3.NE.csrc4 ) THEN

*                       Proc#3 ==> Proc#4

                        CALL dgesd2d( ictxt, 1, 1,

     $                       work(iph+(dim1-1)*lnwin+dim1),

     $                       lnwin, rsrc4, csrc4 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 ) THEN

*                       Proc#1 <== Proc#2

                        CALL dgerv2d( ictxt, dim1, dim4,

     $                       work(iph+dim1*lnwin),

     $                       lnwin, rsrc2, csrc2 )

                     END IF

                     IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN

*                       Proc#1 <== Proc#3

                        CALL dgerv2d( ictxt, 1, 1,

     $                       work(iph+(dim1-1)*lnwin+dim1),

     $                       lnwin, rsrc3, csrc3 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                     IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN

*                       Proc#4 <== Proc#2

                        CALL dgerv2d( ictxt, dim1, dim4,

     $                       work(iph+dim1*lnwin),

     $                       lnwin, rsrc2, csrc2 )

                     END IF

                     IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 ) THEN

*                       Proc#4 <== Proc#3

                        CALL dgerv2d( ictxt, 1, 1,

     $                       work(iph+(dim1-1)*lnwin+dim1),

     $                       lnwin, rsrc3, csrc3 )

                     END IF

                  END IF

*

*                 Prepare for call to DLAQR6 - it could happen that no

*                 bulges where introduced in the pre-cross border step

*                 since the chain was too long to fit in the top-left

*                 part of the cross border window. In such a case, the

*                 bulges are introduced here instead.  It could also

*                 happen that the bottom-right part is too small to hold

*                 the whole chain -- in such a case, the bulges are

*                 chasen off immediately, as well.

*

                  IF( (myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1) .OR.

     $                 (myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4) ) THEN

                     IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot .AND.

     $                    (dim1.LE.lchain .OR. dim1.LE.ntiny ) ) THEN

                        job = 'All steps'

                        ichoff = 1

                     ELSEIF( lktop.EQ.ktop .AND.

     $                    ( dim1.LE.lchain .OR. dim1.LE.ntiny ) ) THEN

                        job = 'Introduce and chase'

                     ELSEIF( lkbot.EQ.kbot ) THEN

                        job = 'Off-chase bulges'

                        ichoff = 1

                     ELSE

                        job = 'Chase bulges'

                     END IF

                     ku = lnwin - kdu + 1

                     kwh = kdu + 1

                     nho = ( lnwin-kdu+1-4 ) - ( kdu+1 ) + 1

                     kwv = kdu + 4

                     nve = lnwin - kdu - kwv + 1

                     CALL dlaset( 'All', lnwin, lnwin,

     $                    zero, one, work(ipuu), lnwin )

*

*                    Small-bulge multi-shift QR sweep.

*

                     lks = max(1, ns - win*lns + 1)

                     CALL dlaqr6( job, wantt, .true., lkacc22, lnwin,

     $                    1, dim, lns, sr( lks ), si( lks ),

     $                    work(iph), lnwin, 1, dim,

     $                    work(ipuu), lnwin, work(ipu), 3,

     $                    work( iph+ku-1 ), lnwin, nve,

     $                    work( iph+kwv-1 ), lnwin, nho,

     $                    work( iph-1+ku+(kwh-1)*lnwin ), lnwin )

*

*                    Copy local submatrices of H back to global matrix.

*

                     IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                        iloc = indxg2l( lktop, nb, myrow,

     $                       desch( rsrc_ ), nprow )

                        jloc = indxg2l( lktop, nb, mycol,

     $                       desch( csrc_ ), npcol )

                        CALL dlamov( 'All', dim1, dim1, work(iph),

     $                       lnwin, h((jloc-1)*lldh+iloc),

     $                       lldh )

                     END IF

                     IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                        iloc = indxg2l( lktop+dim1, nb, myrow,

     $                       desch( rsrc_ ), nprow )

                        jloc = indxg2l( lktop+dim1, nb, mycol,

     $                       desch( csrc_ ), npcol )

                        CALL dlamov( 'All', dim4, dim4,

     $                       work(iph+dim1*lnwin+dim1),

     $                       lnwin, h((jloc-1)*lldh+iloc), lldh )

                     END IF

*

*                    Copy actual submatrix of U to the correct place of

*                    the buffer.

*

                     CALL dlamov( 'All', dim, dim,

     $                    work(ipuu), lnwin, work(ipu), dim )

                  END IF

*

*                 Return data to process 2 and 3.

*

                  rws3 = min(3,dim4)

                  cls3 = min(3,dim1)

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN

*                       Proc#1 ==> Proc#3

                        CALL dgesd2d( ictxt, rws3, cls3,

     $                       work( iph+(dim1-cls3)*lnwin+dim1 ),

     $                       lnwin, rsrc3, csrc3 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                     IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN

*                       Proc#4 ==> Proc#2

                        CALL dgesd2d( ictxt, dim1, dim4,

     $                       work( iph+dim1*lnwin),

     $                       lnwin, rsrc2, csrc2 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                     iloc = indxg2l( lktop, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop+dim1, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN

*                       Proc#2 <== Proc#4

                        CALL dgerv2d( ictxt, dim1, dim4,

     $                       work(iph+dim1*lnwin),

     $                       lnwin, rsrc4, csrc4 )

                     END IF

                     CALL dlamov( 'All', dim1, dim4,

     $                    work( iph+dim1*lnwin ), lnwin,

     $                    h((jloc-1)*lldh+iloc), lldh )

                  END IF

                  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                     iloc = indxg2l( lktop+dim1, nb, myrow,

     $                    desch( rsrc_ ), nprow )

                     jloc = indxg2l( lktop+dim1-cls3, nb, mycol,

     $                    desch( csrc_ ), npcol )

                     IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN

*                       Proc#3 <== Proc#1

                        CALL dgerv2d( ictxt, rws3, cls3,

     $                       work( iph+(dim1-cls3)*lnwin+dim1 ),

     $                       lnwin, rsrc1, csrc1 )

                     END IF

                     CALL dlamov( 'Upper', rws3, cls3,

     $                    work( iph+(dim1-cls3)*lnwin+dim1 ),

     $                    lnwin, h((jloc-1)*lldh+iloc),

     $                    lldh )

                     IF( rws3.GT.1 .AND. cls3.GT.1 ) THEN

                        elem = work( iph+(dim1-cls3)*lnwin+dim1+1 )

                        IF( elem.NE.zero ) THEN

                           CALL dlamov( 'Lower', rws3-1, cls3-1,

     $                          work( iph+(dim1-cls3)*lnwin+dim1+1 ),

     $                          lnwin, h((jloc-1)*lldh+iloc+1), lldh )

                        END IF

                     END IF

                  END IF

*

*                 Restore correct value of LNWIN.

*

                  lnwin = dim

*

               END IF

*

*              Increment counter for buffers of orthogonal

*              transformations.

*

               IF( myrow.EQ.rsrc1 .OR. mycol.EQ.csrc1 .OR.

     $              myrow.EQ.rsrc4 .OR. mycol.EQ.csrc4 ) THEN

                  IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 )

     $               lenrbuf = lenrbuf + lnwin*lnwin

                  IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 )

     $               lencbuf = lencbuf + lnwin*lnwin

               END IF

*

*              If no cross border bulge chasing was performed for the

*              current WIN:th window, the processor jump to this point

*              and consider the next one.

*

 165           CONTINUE

*

 160        CONTINUE

*

*           Broadcast orthogonal transformations -- this will only happen

*           if the buffer associated with the orthogonal transformations

*           is not empty (controlled by LENRBUF, for row-wise

*           broadcasts, and LENCBUF, for column-wise broadcasts).

*

            DO 170 dir = 1, 2

               bcdone = .false.

               DO 180 win = oddeven+(chunknum-1)*wchunk,

     $              min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2

                  IF( ( lenrbuf.EQ.0 .AND. lencbuf.EQ.0 ) .OR.

     $                 bcdone ) GO TO 185

                  rsrc1 = iwork( 3+(win-1)*5 )

                  csrc1 = iwork( 4+(win-1)*5 )

                  rsrc4 = mod( rsrc1+1, nprow )

                  csrc4 = mod( csrc1+1, npcol )

                  IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.

     $                 ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN

                     IF( dir.EQ.1 .AND. lenrbuf.GT.0 .AND.

     $                    npcol.GT.1 .AND. nprocs.GT.2 ) THEN

                        IF( myrow.EQ.rsrc1 .OR. ( myrow.EQ.rsrc4

     $                       .AND. rsrc4.NE.rsrc1 ) ) THEN

                           CALL dgebs2d( ictxt, 'Row', '1-Tree',

     $                          lenrbuf, 1, work, lenrbuf )

                        ELSE

                           CALL dgebr2d( ictxt, 'Row', '1-Tree',

     $                          lenrbuf, 1, work, lenrbuf, rsrc1,

     $                          csrc1 )

                        END IF

                     ELSEIF( dir.EQ.2 .AND. lencbuf.GT.0 .AND.

     $                       nprow.GT.1 .AND. nprocs.GT.2 ) THEN

                        IF( mycol.EQ.csrc1 .OR. ( mycol.EQ.csrc4

     $                       .AND. csrc4.NE.csrc1 ) ) THEN

                           CALL dgebs2d( ictxt, 'Col', '1-Tree',

     $                          lencbuf, 1, work, lencbuf )

                        ELSE

                           CALL dgebr2d( ictxt, 'Col', '1-Tree',

     $                          lencbuf, 1, work(1+lenrbuf), lencbuf,

     $                          rsrc1, csrc1 )

                        END IF

                     END IF

                     IF( lenrbuf.GT.0 .AND. ( mycol.EQ.csrc1 .OR.

     $                    ( mycol.EQ.csrc4 .AND. csrc4.NE.csrc1 ) ) )

     $                  CALL dlamov( 'All', lenrbuf, 1, work, lenrbuf,

     $                       work(1+lenrbuf), lencbuf )

                     bcdone = .true.

                  ELSEIF( myrow.EQ.rsrc1 .AND. dir.EQ.1 ) THEN

                     IF( lenrbuf.GT.0 .AND. npcol.GT.1 )

     $                  CALL dgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,

     $                       1, work, lenrbuf, rsrc1, csrc1 )

                     bcdone = .true.

                  ELSEIF( mycol.EQ.csrc1 .AND. dir.EQ.2 ) THEN

                     IF( lencbuf.GT.0 .AND. nprow.GT.1 )

     $                  CALL dgebr2d( ictxt, 'Col', '1-Tree', lencbuf,

     $                       1, work(1+lenrbuf), lencbuf, rsrc1, csrc1 )

                     bcdone = .true.

                  ELSEIF( myrow.EQ.rsrc4 .AND. dir.EQ.1 ) THEN

                     IF( lenrbuf.GT.0 .AND. npcol.GT.1 )

     $                  CALL dgebr2d( ictxt, 'Row', '1-Tree', lenrbuf,

     $                       1, work, lenrbuf, rsrc4, csrc4 )

                     bcdone = .true.

                  ELSEIF( mycol.EQ.csrc4 .AND. dir.EQ.2 ) THEN

                     IF( lencbuf.GT.0 .AND. nprow.GT.1 )

     $                  CALL dgebr2d( ictxt, 'Col', '1-Tree', lencbuf,

     $                       1, work(1+lenrbuf), lencbuf, rsrc4, csrc4 )

                     bcdone = .true.

                  END IF

 185              CONTINUE

 180           CONTINUE

 170        CONTINUE

*

*           Prepare for computing cross border updates by exchanging

*           data in cross border update regions in H and Z.

*

            DO 190 dir = 1, 2

               winid = 0

               ipw3 = 1

               DO 200 win = oddeven+(chunknum-1)*wchunk,

     $              min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2

                  IF( iwork( 5+(win-1)*5 ).NE.1 ) GO TO 205

*

*                 Make sure this part of the code is only executed when

*                 there has been some work performed on the WIN:th

*                 window.

*

                  lktop = iwork( 1+(win-1)*5 )

                  lkbot = iwork( 2+(win-1)*5 )

*

*                 Extract processor indices associated with

*                 the current window.

*

                  rsrc1 = iwork( 3+(win-1)*5 )

                  csrc1 = iwork( 4+(win-1)*5 )

                  rsrc4 = mod( rsrc1+1, nprow )

                  csrc4 = mod( csrc1+1, npcol )

*

*                 Compute local number of rows and columns

*                 of H and Z to exchange.

*

                  IF(((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)

     $                 .OR.((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.

     $                 dir.EQ.1)) THEN

                     winid = winid + 1

                     lnwin = lkbot - lktop + 1

                     ipu = ipnext

                     dim1 = nb - mod(lktop-1,nb)

                     dim4 = lnwin - dim1

                     ipnext = ipu + lnwin*lnwin

                     IF( dir.EQ.2 ) THEN

                        IF( wantz ) THEN

                           zrows = numroc( n, nb, myrow, descz( rsrc_ ),

     $                          nprow )

                        ELSE

                           zrows = 0

                        END IF

                        IF( wantt ) THEN

                           hrows = numroc( lktop-1, nb, myrow,

     $                          desch( rsrc_ ), nprow )

                        ELSE

                           hrows = 0

                        END IF

                     ELSE

                        zrows = 0

                        hrows = 0

                     END IF

                     IF( dir.EQ.1 ) THEN

                        IF( wantt ) THEN

                           hcols = numroc( n - (lktop+dim1-1), nb,

     $                          mycol, csrc4, npcol )

                           IF( mycol.EQ.csrc4 ) hcols = hcols - dim4

                        ELSE

                           hcols = 0

                        END IF

                     ELSE

                        hcols = 0

                     END IF

                     ipw = max( 1 + lenrbuf + lencbuf, ipw3 )

                     ipw1 = ipw + hrows * lnwin

                     IF( wantz ) THEN

                        ipw2 = ipw1 + lnwin * hcols

                        ipw3 = ipw2 + zrows * lnwin

                     ELSE

                        ipw3 = ipw1 + lnwin * hcols

                     END IF

                  END IF

*

*                 Let each process row and column involved in the updates

*                 exchange data in H and Z with their neighbours.

*

                  IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN

                     IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 ) THEN

                        DO 210 indx = 1, nprow

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( 1+(indx-1)*nb, lktop, desch,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc1, rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dlamov( 'All', hrows, dim1,

     $                                h((jloc1-1)*lldh+iloc), lldh,

     $                                work(ipw), hrows )

                                 IF( npcol.GT.1 ) THEN

                                    east = mod( mycol + 1, npcol )

                                    CALL dgesd2d( ictxt, hrows, dim1,

     $                                   work(ipw), hrows, rsrc, east )

                                    CALL dgerv2d( ictxt, hrows, dim4,

     $                                   work(ipw+hrows*dim1), hrows,

     $                                   rsrc, east )

                                 END IF

                              END IF

                           END IF

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( 1+(indx-1)*nb, lktop+dim1,

     $                             desch, nprow, npcol, myrow, mycol,

     $                             iloc, jloc4, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dlamov( 'All', hrows, dim4,

     $                                h((jloc4-1)*lldh+iloc), lldh,

     $                                work(ipw+hrows*dim1), hrows )

                                 IF( npcol.GT.1 ) THEN

                                    west = mod( mycol - 1 + npcol,

     $                                   npcol )

                                    CALL dgesd2d( ictxt, hrows, dim4,

     $                                   work(ipw+hrows*dim1), hrows,

     $                                   rsrc, west )

                                    CALL dgerv2d( ictxt, hrows, dim1,

     $                                   work(ipw), hrows, rsrc, west )

                                 END IF

                              END IF

                           END IF

 210                    CONTINUE

                     END IF

                  END IF

*

                  IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0 ) THEN

                     IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 ) THEN

                        DO 220 indx = 1, npcol

                           IF( myrow.EQ.rsrc1 ) THEN

                              IF( indx.EQ.1 ) THEN

                                 IF( lkbot.LT.n ) THEN

                                    CALL infog2l( lktop, lkbot+1, desch,

     $                                   nprow, npcol, myrow, mycol,

     $                                   iloc1, jloc, rsrc1, csrc )

                                 ELSE

                                    csrc = -1

                                 END IF

                              ELSEIF( mod(lkbot,nb).NE.0 ) THEN

                                 CALL infog2l( lktop,

     $                                (iceil(lkbot,nb)+(indx-2))*nb+1,

     $                                desch, nprow, npcol, myrow, mycol,

     $                                iloc1, jloc, rsrc1, csrc )

                              ELSE

                                 CALL infog2l( lktop,

     $                                (iceil(lkbot,nb)+(indx-1))*nb+1,

     $                                desch, nprow, npcol, myrow, mycol,

     $                                iloc1, jloc, rsrc1, csrc )

                              END IF

                              IF( mycol.EQ.csrc ) THEN

                                 CALL dlamov( 'All', dim1, hcols,

     $                                h((jloc-1)*lldh+iloc1), lldh,

     $                                work(ipw1), lnwin )

                                 IF( nprow.GT.1 ) THEN

                                    south = mod( myrow + 1, nprow )

                                    CALL dgesd2d( ictxt, dim1, hcols,

     $                                   work(ipw1), lnwin, south,

     $                                   csrc )

                                    CALL dgerv2d( ictxt, dim4, hcols,

     $                                   work(ipw1+dim1), lnwin, south,

     $                                   csrc )

                                 END IF

                              END IF

                           END IF

                           IF( myrow.EQ.rsrc4 ) THEN

                              IF( indx.EQ.1 ) THEN

                                 IF( lkbot.LT.n ) THEN

                                    CALL infog2l( lktop+dim1, lkbot+1,

     $                                   desch, nprow, npcol, myrow,

     $                                   mycol, iloc4, jloc, rsrc4,

     $                                   csrc )

                                 ELSE

                                    csrc = -1

                                 END IF

                              ELSEIF( mod(lkbot,nb).NE.0 ) THEN

                                 CALL infog2l( lktop+dim1,

     $                                (iceil(lkbot,nb)+(indx-2))*nb+1,

     $                                desch, nprow, npcol, myrow, mycol,

     $                                iloc4, jloc, rsrc4, csrc )

                              ELSE

                                 CALL infog2l( lktop+dim1,

     $                                (iceil(lkbot,nb)+(indx-1))*nb+1,

     $                                desch, nprow, npcol, myrow, mycol,

     $                                iloc4, jloc, rsrc4, csrc )

                              END IF

                              IF( mycol.EQ.csrc ) THEN

                                 CALL dlamov( 'All', dim4, hcols,

     $                                h((jloc-1)*lldh+iloc4), lldh,

     $                                work(ipw1+dim1), lnwin )

                                 IF( nprow.GT.1 ) THEN

                                    north = mod( myrow - 1 + nprow,

     $                                   nprow )

                                    CALL dgesd2d( ictxt, dim4, hcols,

     $                                   work(ipw1+dim1), lnwin, north,

     $                                   csrc )

                                    CALL dgerv2d( ictxt, dim1, hcols,

     $                                   work(ipw1), lnwin, north,

     $                                   csrc )

                                 END IF

                              END IF

                           END IF

 220                    CONTINUE

                     END IF

                  END IF

*

                  IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0) THEN

                     IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 ) THEN

                        DO 230 indx = 1, nprow

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( 1+(indx-1)*nb, lktop,

     $                             descz, nprow, npcol, myrow, mycol,

     $                             iloc, jloc1, rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dlamov( 'All', zrows, dim1,

     $                                z((jloc1-1)*lldz+iloc), lldz,

     $                                work(ipw2), zrows )

                                 IF( npcol.GT.1 ) THEN

                                    east = mod( mycol + 1, npcol )

                                    CALL dgesd2d( ictxt, zrows, dim1,

     $                                   work(ipw2), zrows, rsrc,

     $                                   east )

                                    CALL dgerv2d( ictxt, zrows, dim4,

     $                                   work(ipw2+zrows*dim1),

     $                                   zrows, rsrc, east )

                                 END IF

                              END IF

                           END IF

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( 1+(indx-1)*nb,

     $                             lktop+dim1, descz, nprow, npcol,

     $                             myrow, mycol, iloc, jloc4, rsrc,

     $                             csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dlamov( 'All', zrows, dim4,

     $                                z((jloc4-1)*lldz+iloc), lldz,

     $                                work(ipw2+zrows*dim1), zrows )

                                 IF( npcol.GT.1 ) THEN

                                    west = mod( mycol - 1 + npcol,

     $                                   npcol )

                                    CALL dgesd2d( ictxt, zrows, dim4,

     $                                   work(ipw2+zrows*dim1),

     $                                   zrows, rsrc, west )

                                    CALL dgerv2d( ictxt, zrows, dim1,

     $                                   work(ipw2), zrows, rsrc,

     $                                   west )

                                 END IF

                              END IF

                           END IF

 230                    CONTINUE

                     END IF

                  END IF

*

*                 If no exchanges was performed for the current window,

*                 all processors jump to this point and try the next

*                 one.

*

 205              CONTINUE

*

 200           CONTINUE

*

*              Compute crossborder bulge-chase updates.

*

               winid = 0

               IF( dir.EQ.1 ) THEN

                  ipnext = 1

               ELSE

                  ipnext = 1 + lenrbuf

               END IF

               ipw3 = 1

               DO 240 win = oddeven+(chunknum-1)*wchunk,

     $              min(anmwin,max(1,oddeven+(chunknum)*wchunk-1)), 2

                  IF( iwork( 5+(win-1)*5 ).NE.1 ) GO TO 245

*

*                 Only perform this part of the code if there was really

*                 some work performed on the WIN:th window.

*

                  lktop = iwork( 1+(win-1)*5 )

                  lkbot = iwork( 2+(win-1)*5 )

                  lnwin = lkbot - lktop + 1

*

*                 Extract the processor indices associated with

*                 the current window.

*

                  rsrc1 = iwork( 3+(win-1)*5 )

                  csrc1 = iwork( 4+(win-1)*5 )

                  rsrc4 = mod( rsrc1+1, nprow )

                  csrc4 = mod( csrc1+1, npcol )

*

                  IF(((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)

     $                 .OR.((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.

     $                 dir.EQ.1)) THEN

*

*                    Set up workspaces.

*

                     winid = winid + 1

                     lktop = iwork( 1+(win-1)*5 )

                     lkbot = iwork( 2+(win-1)*5 )

                     lnwin = lkbot - lktop + 1

                     dim1 = nb - mod(lktop-1,nb)

                     dim4 = lnwin - dim1

                     ipu = ipnext + (winid-1)*lnwin*lnwin

                     IF( dir.EQ.2 ) THEN

                        IF( wantz ) THEN

                           zrows = numroc( n, nb, myrow, descz( rsrc_ ),

     $                          nprow )

                        ELSE

                           zrows = 0

                        END IF

                        IF( wantt ) THEN

                           hrows = numroc( lktop-1, nb, myrow,

     $                          desch( rsrc_ ), nprow )

                        ELSE

                           hrows = 0

                        END IF

                     ELSE

                        zrows = 0

                        hrows = 0

                     END IF

                     IF( dir.EQ.1 ) THEN

                        IF( wantt ) THEN

                           hcols = numroc( n - (lktop+dim1-1), nb,

     $                          mycol, csrc4, npcol )

                           IF( mycol.EQ.csrc4 ) hcols = hcols - dim4

                        ELSE

                           hcols = 0

                        END IF

                     ELSE

                        hcols = 0

                     END IF

*

*                    IPW  = local copy of overlapping column block of H

*                    IPW1 = local copy of overlapping row block of H

*                    IPW2 = local copy of overlapping column block of Z

*                    IPW3 = workspace for right hand side of matrix

*                           multiplication

*

                     ipw = max( 1 + lenrbuf + lencbuf, ipw3 )

                     ipw1 = ipw + hrows * lnwin

                     IF( wantz ) THEN

                        ipw2 = ipw1 + lnwin * hcols

                        ipw3 = ipw2 + zrows * lnwin

                     ELSE

                        ipw3 = ipw1 + lnwin * hcols

                     END IF

*

*                    Recompute job to see if special structure of U

*                    could possibly be exploited.

*

                     IF( lktop.EQ.ktop .AND. lkbot.EQ.kbot ) THEN

                        job = 'All steps'

                     ELSEIF( lktop.EQ.ktop .AND.

     $                    ( dim1.LT.lchain+1 .OR. dim1.LE.ntiny ) )

     $                    THEN

                        job = 'Introduce and chase'

                     ELSEIF( lkbot.EQ.kbot ) THEN

                        job = 'Off-chase bulges'

                     ELSE

                        job = 'Chase bulges'

                     END IF

                  END IF

*

*                 Test if to exploit sparsity structure of

*                 orthogonal matrix U.

*

                  ks = dim1+dim4-lns/2*3

                  IF( .NOT. blk22 .OR. dim1.NE.ks .OR.

     $                 dim4.NE.ks .OR. lsame(job,'I') .OR.

     $                 lsame(job,'O') .OR. lns.LE.2 ) THEN

*

*                    Update the columns of H and Z.

*

                     IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN

                        DO 250 indx = 1, min(lktop-1,1+(nprow-1)*nb), nb

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( indx, lktop, desch, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dgemm( 'No transpose',

     $                                'No transpose', hrows, dim1,

     $                                lnwin, one, work( ipw ), hrows,

     $                                work( ipu ), lnwin, zero,

     $                                work(ipw3), hrows )

                                 CALL dlamov( 'All', hrows, dim1,

     $                                work(ipw3), hrows,

     $                                h((jloc-1)*lldh+iloc), lldh )

                              END IF

                           END IF

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( indx, lktop+dim1, desch,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dgemm( 'No transpose',

     $                                'No transpose', hrows, dim4,

     $                                lnwin, one, work( ipw ), hrows,

     $                                work( ipu+lnwin*dim1 ), lnwin,

     $                                zero, work(ipw3), hrows )

                                 CALL dlamov( 'All', hrows, dim4,

     $                                work(ipw3), hrows,

     $                                h((jloc-1)*lldh+iloc), lldh )

                              END IF

                           END IF

 250                    CONTINUE

                     END IF

*

                     IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0 ) THEN

                        DO 260 indx = 1, min(n,1+(nprow-1)*nb), nb

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( indx, lktop, descz, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dgemm( 'No transpose',

     $                                'No transpose', zrows, dim1,

     $                                lnwin, one, work( ipw2 ),

     $                                zrows, work( ipu ), lnwin,

     $                                zero, work(ipw3), zrows )

                                 CALL dlamov( 'All', zrows, dim1,

     $                                work(ipw3), zrows,

     $                                z((jloc-1)*lldz+iloc), lldz )

                              END IF

                           END IF

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( indx, lktop+dim1, descz,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dgemm( 'No transpose',

     $                                'No transpose', zrows, dim4,

     $                                lnwin, one, work( ipw2 ),

     $                                zrows,

     $                                work( ipu+lnwin*dim1 ), lnwin,

     $                                zero, work(ipw3), zrows )

                                 CALL dlamov( 'All', zrows, dim4,

     $                                work(ipw3), zrows,

     $                                z((jloc-1)*lldz+iloc), lldz )

                              END IF

                           END IF

 260                    CONTINUE

                     END IF

*

*                    Update the rows of H.

*

                     IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0 ) THEN

                        IF( lkbot.LT.n ) THEN

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4 .AND.

     $                          mod(lkbot,nb).NE.0 ) THEN

                              indx = lkbot + 1

                              CALL infog2l( lktop, indx, desch, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc1, csrc4 )

                              CALL dgemm( 'Transpose', 'No Transpose',

     $                             dim1, hcols, lnwin, one, work(ipu),

     $                             lnwin, work( ipw1 ), lnwin, zero,

     $                             work(ipw3), dim1 )

                              CALL dlamov( 'All', dim1, hcols,

     $                             work(ipw3), dim1,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

                           IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4 .AND.

     $                          mod(lkbot,nb).NE.0 ) THEN

                              indx = lkbot + 1

                              CALL infog2l( lktop+dim1, indx, desch,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc4, csrc4 )

                              CALL dgemm( 'Transpose', 'No Transpose',

     $                             dim4, hcols, lnwin, one,

     $                             work( ipu+dim1*lnwin ), lnwin,

     $                             work( ipw1), lnwin, zero,

     $                             work(ipw3), dim4 )

                              CALL dlamov( 'All', dim4, hcols,

     $                             work(ipw3), dim4,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

                           indxs = iceil(lkbot,nb)*nb + 1

                           IF( mod(lkbot,nb).NE.0 ) THEN

                              indxe = min(n,indxs+(npcol-2)*nb)

                           ELSE

                              indxe = min(n,indxs+(npcol-1)*nb)

                           END IF

                           DO 270 indx = indxs, indxe, nb

                              IF( myrow.EQ.rsrc1 ) THEN

                                 CALL infog2l( lktop, indx, desch,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc, rsrc1, csrc )

                                 IF( mycol.EQ.csrc ) THEN

                                    CALL dgemm( 'Transpose',

     $                                   'No Transpose', dim1, hcols,

     $                                   lnwin, one, work( ipu ), lnwin,

     $                                   work( ipw1 ), lnwin, zero,

     $                                   work(ipw3), dim1 )

                                    CALL dlamov( 'All', dim1, hcols,

     $                                   work(ipw3), dim1,

     $                                   h((jloc-1)*lldh+iloc), lldh )

                                 END IF

                              END IF

                              IF( myrow.EQ.rsrc4 ) THEN

                                 CALL infog2l( lktop+dim1, indx, desch,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc, rsrc4, csrc )

                                 IF( mycol.EQ.csrc ) THEN

                                    CALL dgemm( 'Transpose',

     $                                   'No Transpose', dim4, hcols,

     $                                   lnwin, one,

     $                                   work( ipu+lnwin*dim1 ), lnwin,

     $                                   work( ipw1 ), lnwin,

     $                                   zero, work(ipw3), dim4 )

                                    CALL dlamov( 'All', dim4, hcols,

     $                                   work(ipw3), dim4,

     $                                   h((jloc-1)*lldh+iloc), lldh )

                                 END IF

                              END IF

 270                       CONTINUE

                        END IF

                     END IF

                  ELSE

*

*                    Update the columns of H and Z.

*

*                    Compute H2*U21 + H1*U11 on the left side of the border.

*

                     IF( dir.EQ.2 .AND. wantt .AND. lencbuf.GT.0 ) THEN

                        indxe = min(lktop-1,1+(nprow-1)*nb)

                        DO 280 indx = 1, indxe, nb

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( indx, lktop, desch, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dlamov( 'All', hrows, ks,

     $                                work( ipw+hrows*dim4), hrows,

     $                                work(ipw3), hrows )

                                 CALL dtrmm( 'Right', 'Upper',

     $                                'No transpose',

     $                                'Non-unit', hrows, ks, one,

     $                                work( ipu+dim4 ), lnwin,

     $                                work(ipw3), hrows )

                                 CALL dgemm( 'No transpose',

     $                                'No transpose', hrows, ks, dim4,

     $                                one, work( ipw ), hrows,

     $                                work( ipu ), lnwin, one,

     $                                work(ipw3), hrows )

                                 CALL dlamov( 'All', hrows, ks,

     $                                work(ipw3), hrows,

     $                                h((jloc-1)*lldh+iloc), lldh )

                              END IF

                           END IF

*

*                          Compute H1*U12 + H2*U22 on the right side of

*                          the border.

*

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( indx, lktop+dim1, desch,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dlamov( 'All', hrows, dim4,

     $                                work(ipw), hrows, work( ipw3 ),

     $                                hrows )

                                 CALL dtrmm( 'Right', 'Lower',

     $                                'No transpose',

     $                                'Non-unit', hrows, dim4, one,

     $                                work( ipu+lnwin*ks ), lnwin,

     $                                work( ipw3 ), hrows )

                                 CALL dgemm( 'No transpose',

     $                                'No transpose', hrows, dim4, ks,

     $                                one, work( ipw+hrows*dim4),

     $                                hrows,

     $                                work( ipu+lnwin*ks+dim4 ), lnwin,

     $                                one, work( ipw3 ), hrows )

                                 CALL dlamov( 'All', hrows, dim4,

     $                                work(ipw3), hrows,

     $                                h((jloc-1)*lldh+iloc), lldh )

                              END IF

                           END IF

 280                    CONTINUE

                     END IF

*

                     IF( dir.EQ.2 .AND. wantz .AND. lencbuf.GT.0 ) THEN

*

*                       Compute Z2*U21 + Z1*U11 on the left side

*                       of border.

*

                        indxe = min(n,1+(nprow-1)*nb)

                        DO 290 indx = 1, indxe, nb

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( indx, i, descz, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dlamov( 'All', zrows, ks,

     $                                work( ipw2+zrows*dim4),

     $                                zrows, work(ipw3), zrows )

                                 CALL dtrmm( 'Right', 'Upper',

     $                                'No transpose',

     $                                'Non-unit', zrows, ks, one,

     $                                work( ipu+dim4 ), lnwin,

     $                                work(ipw3), zrows )

                                 CALL dgemm( 'No transpose',

     $                                'No transpose', zrows, ks,

     $                                dim4, one, work( ipw2 ),

     $                                zrows, work( ipu ), lnwin,

     $                                one, work(ipw3), zrows )

                                 CALL dlamov( 'All', zrows, ks,

     $                                work(ipw3), zrows,

     $                                z((jloc-1)*lldz+iloc), lldz )

                              END IF

                           END IF

*

*                          Compute Z1*U12 + Z2*U22 on the right side

*                          of border.

*

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( indx, i+dim1, descz,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL dlamov( 'All', zrows, dim4,

     $                                work(ipw2), zrows,

     $                                work( ipw3 ), zrows )

                                 CALL dtrmm( 'Right', 'Lower',

     $                                'No transpose',

     $                                'Non-unit', zrows, dim4,

     $                                one, work( ipu+lnwin*ks ),

     $                                lnwin, work( ipw3 ), zrows )

                                 CALL dgemm( 'No transpose',

     $                                'No transpose', zrows, dim4,

     $                                ks, one,

     $                                work( ipw2+zrows*(dim4)),

     $                                zrows,

     $                                work( ipu+lnwin*ks+dim4 ),

     $                                lnwin, one, work( ipw3 ),

     $                                zrows )

                                 CALL dlamov( 'All', zrows, dim4,

     $                                work(ipw3), zrows,

     $                                z((jloc-1)*lldz+iloc), lldz )

                              END IF

                           END IF

 290                    CONTINUE

                     END IF

*

                     IF( dir.EQ.1 .AND. wantt .AND. lenrbuf.GT.0) THEN

                        IF ( lkbot.LT.n ) THEN

*

*                          Compute U21**T*H2 + U11**T*H1 on the upper

*                          side of the border.

*

                           IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4.AND.

     $                          mod(lkbot,nb).NE.0 ) THEN

                              indx = lkbot + 1

                              CALL infog2l( lktop, indx, desch, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc1, csrc4 )

                              CALL dlamov( 'All', ks, hcols,

     $                             work( ipw1+dim4 ), lnwin,

     $                             work(ipw3), ks )

                              CALL dtrmm( 'Left', 'Upper', 'Transpose',

     $                             'Non-unit', ks, hcols, one,

     $                             work( ipu+dim4 ), lnwin,

     $                             work(ipw3), ks )

                              CALL dgemm( 'Transpose', 'No transpose',

     $                             ks, hcols, dim4, one, work(ipu),

     $                             lnwin, work(ipw1), lnwin,

     $                             one, work(ipw3), ks )

                              CALL dlamov( 'All', ks, hcols,

     $                             work(ipw3), ks,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

*

*                          Compute U12**T*H1 + U22**T*H2 one the lower

*                          side of the border.

*

                           IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4.AND.

     $                          mod(lkbot,nb).NE.0 ) THEN

                              indx = lkbot + 1

                              CALL infog2l( lktop+dim1, indx, desch,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc, rsrc4, csrc4 )

                              CALL dlamov( 'All', dim4, hcols,

     $                             work( ipw1 ), lnwin,

     $                             work( ipw3 ), dim4 )

                              CALL dtrmm( 'Left', 'Lower', 'Transpose',

     $                             'Non-unit', dim4, hcols, one,

     $                             work( ipu+lnwin*ks ), lnwin,

     $                             work( ipw3 ), dim4 )

                              CALL dgemm( 'Transpose', 'No Transpose',

     $                             dim4, hcols, ks, one,

     $                             work( ipu+lnwin*ks+dim4 ), lnwin,

     $                             work( ipw1+dim1 ), lnwin,

     $                             one, work( ipw3), dim4 )

                              CALL dlamov( 'All', dim4, hcols,

     $                             work(ipw3), dim4,

     $                             h((jloc-1)*lldh+iloc), lldh )

                           END IF

*

*                          Compute U21**T*H2 + U11**T*H1 on upper side

*                          on border.

*

                           indxs = iceil(lkbot,nb)*nb+1

                           IF( mod(lkbot,nb).NE.0 ) THEN

                              indxe = min(n,indxs+(npcol-2)*nb)

                           ELSE

                              indxe = min(n,indxs+(npcol-1)*nb)

                           END IF

                           DO 300 indx = indxs, indxe, nb

                              IF( myrow.EQ.rsrc1 ) THEN

                                 CALL infog2l( lktop, indx, desch,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc, rsrc1, csrc )

                                 IF( mycol.EQ.csrc ) THEN

                                    CALL dlamov( 'All', ks, hcols,

     $                                   work( ipw1+dim4 ), lnwin,

     $                                   work(ipw3), ks )

                                    CALL dtrmm( 'Left', 'Upper',

     $                                   'Transpose', 'Non-unit',

     $                                   ks, hcols, one,

     $                                   work( ipu+dim4 ), lnwin,

     $                                   work(ipw3), ks )

                                    CALL dgemm( 'Transpose',

     $                                   'No transpose', ks, hcols,

     $                                   dim4, one, work(ipu), lnwin,

     $                                   work(ipw1), lnwin, one,

     $                                   work(ipw3), ks )

                                    CALL dlamov( 'All', ks, hcols,

     $                                   work(ipw3), ks,

     $                                   h((jloc-1)*lldh+iloc), lldh )

                                 END IF

                              END IF

*

*                             Compute U12**T*H1 + U22**T*H2 on lower

*                             side of border.

*

                              IF( myrow.EQ.rsrc4 ) THEN

                                 CALL infog2l( lktop+dim1, indx, desch,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc, rsrc4, csrc )

                                 IF( mycol.EQ.csrc ) THEN

                                    CALL dlamov( 'All', dim4, hcols,

     $                                   work( ipw1 ), lnwin,

     $                                   work( ipw3 ), dim4 )

                                    CALL dtrmm( 'Left', 'Lower',

     $                                   'Transpose','Non-unit',

     $                                   dim4, hcols, one,

     $                                   work( ipu+lnwin*ks ), lnwin,

     $                                   work( ipw3 ), dim4 )

                                    CALL dgemm( 'Transpose',

     $                                   'No Transpose', dim4, hcols,

     $                                   ks, one,

     $                                   work( ipu+lnwin*ks+dim4 ),

     $                                   lnwin, work( ipw1+dim1 ),

     $                                   lnwin, one, work( ipw3),

     $                                   dim4 )

                                    CALL dlamov( 'All', dim4, hcols,

     $                                   work(ipw3), dim4,

     $                                   h((jloc-1)*lldh+iloc), lldh )

                                 END IF

                              END IF

 300                       CONTINUE

                        END IF

                     END IF

                  END IF

*

*                 Update window information - mark processed windows are

*                 completed.

*

                  IF( dir.EQ.2 ) THEN

                     IF( lkbot.EQ.kbot ) THEN

                        lktop = kbot+1

                        lkbot = kbot+1

                        iwork( 1+(win-1)*5 ) = lktop

                        iwork( 2+(win-1)*5 ) = lkbot

                     ELSE

                        lktop = min( lktop + lnwin - lchain,

     $                       min( kbot, iceil( lkbot, nb )*nb ) -

     $                       lchain + 1 )

                        iwork( 1+(win-1)*5 ) = lktop

                        lkbot = min( max( lkbot + lnwin - lchain,

     $                       lktop + nwin - 1), min( kbot,

     $                       iceil( lkbot, nb )*nb ) )

                        iwork( 2+(win-1)*5 ) = lkbot

                     END IF

                     IF( iwork( 5+(win-1)*5 ).EQ.1 )

     $                    iwork( 5+(win-1)*5 ) = 2

                     iwork( 3+(win-1)*5 ) = rsrc4

                     iwork( 4+(win-1)*5 ) = csrc4

                  END IF

*

*                 If nothing was done for the WIN:th window, all

*                 processors come here and consider the next one

*                 instead.

*

 245              CONTINUE

 240           CONTINUE

 190        CONTINUE

 150     CONTINUE

 140     CONTINUE

*

*        Chased off bulges from first window?

*

         IF( nprocs.GT.1 )

     $      CALL igamx2d( ictxt, 'All', '1-Tree', 1, 1, ichoff, 1,

     $           -1, -1, -1, -1, -1 )

*

*        If the bulge was chasen off from first window it is removed.

*

         IF( ichoff.GT.0 ) THEN

            DO 198 win = 2, anmwin

               iwork( 1+(win-2)*5 ) = iwork( 1+(win-1)*5 )

               iwork( 2+(win-2)*5 ) = iwork( 2+(win-1)*5 )

               iwork( 3+(win-2)*5 ) = iwork( 3+(win-1)*5 )

               iwork( 4+(win-2)*5 ) = iwork( 4+(win-1)*5 )

 198        CONTINUE

            anmwin = anmwin - 1

            ipiw = 6+(anmwin-1)*5

         END IF

*

*        If we have no more windows, return.

*

         IF( anmwin.LT.1 ) RETURN

*

*        Check for any more windows to bring over the border.

*

         winfin = 0

         DO 199 win = 1, anmwin

            winfin = winfin+iwork( 5+(win-1)*5 )

 199     CONTINUE

         IF( winfin.LT.2*anmwin ) GO TO 137

*

*        Zero out process mark for each window - this is legal now when

*        the process starts over with local bulge-chasing etc.

*

         DO 201 win = 1, anmwin

            iwork( 5+(win-1)*5 ) = 0

 201     CONTINUE

*

      END IF

*

*     Go back to local bulge-chase and see if there is more work to do.

*

      GO TO 20

*

*     End of PDLAQR5

*


      END

dlaqr6
subroutine dlaqr6(job, wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, ldh, iloz, ihiz, z, ldz, v, ldv, u, ldu, nv, wv, ldwv, nh, wh, ldwh)
Definition dlaqr6.f:4

infog2l
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3

max
#define max(A, B)
Definition pcgemr.c:180

min
#define min(A, B)
Definition pcgemr.c:181

pdelset
subroutine pdelset(a, ia, ja, desca, alpha)
Definition pdelset.f:2

pdlaqr5
subroutine pdlaqr5(wantt, wantz, kacc22, n, ktop, kbot, nshfts, sr, si, h, desch, iloz, ihiz, z, descz, work, lwork, iwork, liwork)
Definition pdlaqr5.f:4