dc/d4c/pslahqr_8f_source.html

      SUBROUTINE pslahqr( WANTT, WANTZ, N, ILO, IHI, A, DESCA, WR, WI,

     $                    ILOZ, IHIZ, Z, DESCZ, WORK, LWORK, IWORK,

     $                    ILWORK, INFO )

*

*  -- ScaLAPACK routine (version 2.0.2) --

*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver

*     May 1 2012

*

*     .. Scalar Arguments ..

      LOGICAL            WANTT, WANTZ

      INTEGER            IHI, IHIZ, ILO, ILOZ, ILWORK, INFO, LWORK, N

*     ..

*     .. Array Arguments ..

      INTEGER            DESCA( * ), DESCZ( * ), IWORK( * )

      REAL               A( * ), WI( * ), WORK( * ), WR( * ), Z( * )

*     ..

*

*  Purpose

*  =======

*

*  PSLAHQR is an auxiliary routine used to find the Schur decomposition

*    and or eigenvalues of a matrix already in Hessenberg form from

*    cols ILO to IHI.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Arguments

*  =========

*

*  WANTT   (global input) LOGICAL

*          = .TRUE. : the full Schur form T is required;

*          = .FALSE.: only eigenvalues are required.

*

*  WANTZ   (global input) LOGICAL

*          = .TRUE. : the matrix of Schur vectors Z is required;

*          = .FALSE.: Schur vectors are not required.

*

*  N       (global input) INTEGER

*          The order of the Hessenberg matrix A (and Z if WANTZ).

*          N >= 0.

*

*  ILO     (global input) INTEGER

*  IHI     (global input) INTEGER

*          It is assumed that A is already upper quasi-triangular in

*          rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless

*          ILO = 1). PSLAHQR works primarily with the Hessenberg

*          submatrix in rows and columns ILO to IHI, but applies

*          transformations to all of H if WANTT is .TRUE..

*          1 <= ILO <= max(1,IHI); IHI <= N.

*

*  A       (global input/output) REAL array, dimension

*          (DESCA(LLD_),*)

*          On entry, the upper Hessenberg matrix A.

*          On exit, if WANTT is .TRUE., A is upper quasi-triangular in

*          rows and columns ILO:IHI, with any 2-by-2 or larger diagonal

*          blocks not yet in standard form. If WANTT is .FALSE., the

*          contents of A are unspecified on exit.

*

*  DESCA   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix A.

*

*  WR      (global replicated output) REAL array,

*                                                         dimension (N)

*  WI      (global replicated output) REAL array,

*                                                         dimension (N)

*          The real and imaginary parts, respectively, of the computed

*          eigenvalues ILO to IHI are stored in the corresponding

*          elements of WR and WI. If two eigenvalues are computed as a

*          complex conjugate pair, they are stored in consecutive

*          elements of WR and WI, say the i-th and (i+1)th, with

*          WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the

*          eigenvalues are stored in the same order as on the diagonal

*          of the Schur form returned in A.  A may be returned with

*          larger diagonal blocks until the next release.

*

*  ILOZ    (global input) INTEGER

*  IHIZ    (global input) INTEGER

*          Specify the rows of Z to which transformations must be

*          applied if WANTZ is .TRUE..

*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N.

*

*  Z       (global input/output) REAL array.

*          If WANTZ is .TRUE., on entry Z must contain the current

*          matrix Z of transformations accumulated by PDHSEQR, and on

*          exit Z has been updated; transformations are applied only to

*          the submatrix Z(ILOZ:IHIZ,ILO:IHI).

*          If WANTZ is .FALSE., Z is not referenced.

*

*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix Z.

*

*  WORK    (local output) REAL array of size LWORK

*

*  LWORK   (local input) INTEGER

*          WORK(LWORK) is a local array and LWORK is assumed big enough

*          so that LWORK >= 3*N +

*                MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCc(N),

*                     7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) )

*

*  IWORK   (global and local input) INTEGER array of size ILWORK

*

*  ILWORK  (local input) INTEGER

*          This holds the some of the IBLK integer arrays.  This is held

*          as a place holder for the next release.

*

*  INFO    (global output) INTEGER

*          < 0: parameter number -INFO incorrect or inconsistent

*          = 0: successful exit

*          > 0: PSLAHQR failed to compute all the eigenvalues ILO to IHI

*               in a total of 30*(IHI-ILO+1) iterations; if INFO = i,

*               elements i+1:ihi of WR and WI contain those eigenvalues

*               which have been successfully computed.

*

*  Logic:

*       This algorithm is very similar to _LAHQR.  Unlike _LAHQR,

*       instead of sending one double shift through the largest

*       unreduced submatrix, this algorithm sends multiple double shifts

*       and spaces them apart so that there can be parallelism across

*       several processor row/columns.  Another critical difference is

*       that this algorithm aggregrates multiple transforms together in

*       order to apply them in a block fashion.

*

*  Important Local Variables:

*       IBLK = The maximum number of bulges that can be computed.

*           Currently fixed.  Future releases this won't be fixed.

*       HBL  = The square block size (HBL=DESCA(MB_)=DESCA(NB_))

*       ROTN = The number of transforms to block together

*       NBULGE = The number of bulges that will be attempted on the

*           current submatrix.

*       IBULGE = The current number of bulges started.

*       K1(*),K2(*) = The current bulge loops from K1(*) to K2(*).

*

*  Subroutines:

*       This routine calls:

*           PSLACONSB   -> To determine where to start each iteration

*           PSLAWIL   -> Given the shift, get the transformation

*           SLASORTE   -> Pair up eigenvalues so that reals are paired.

*           PSLACP3   -> Parallel array to local replicated array copy &

*                        back.

*           SLAREF   -> Row/column reflector applier.  Core routine

*                        here.

*           PSLASMSUB   -> Finds negligible subdiagonal elements.

*

*  Current Notes and/or Restrictions:

*       1.) This code requires the distributed block size to be square

*           and at least six (6); unlike simpler codes like LU, this

*           algorithm is extremely sensitive to block size.  Unwise

*           choices of too small a block size can lead to bad

*           performance.

*       2.) This code requires A and Z to be distributed identically

*           and have identical contxts.

*       3.) This release currently does not have a routine for

*           resolving the Schur blocks into regular 2x2 form after

*           this code is completed.  Because of this, a significant

*           performance impact is required while the deflation is done

*           by sometimes a single column of processors.

*       4.) This code does not currently block the initial transforms

*           so that none of the rows or columns for any bulge are

*           completed until all are started.  To offset pipeline

*           start-up it is recommended that at least 2*LCM(NPROW,NPCOL)

*           bulges are used (if possible)

*       5.) The maximum number of bulges currently supported is fixed at

*           32.  In future versions this will be limited only by the

*           incoming WORK array.

*       6.) The matrix A must be in upper Hessenberg form.  If elements

*           below the subdiagonal are nonzero, the resulting transforms

*           may be nonsimilar.  This is also true with the LAPACK

*           routine.

*       7.) For this release, it is assumed RSRC_=CSRC_=0

*       8.) Currently, all the eigenvalues are distributed to all the

*           nodes.  Future releases will probably distribute the

*           eigenvalues by the column partitioning.

*       9.) The internals of this routine are subject to change.

*

*  Implemented by:  G. Henry, November 17, 1996

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   LLD_, MB_, M_, NB_, N_, RSRC_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      REAL               ZERO, ONE, HALF

      PARAMETER          ( ZERO = 0.0, one = 1.0, half = 0.5 )

      REAL               CONST

      parameter( const = 1.50 )

      INTEGER            IBLK

      parameter( iblk = 32 )

*     ..

*     .. Local Scalars ..

      INTEGER            CONTXT, DOWN, HBL, I, I1, I2, IAFIRST, IBULGE,

     $                   ICBUF, ICOL, ICOL1, ICOL2, IDIA, IERR, II,

     $                   irbuf, irow, irow1, irow2, ispec, istart,

     $                   istartcol, istartrow, istop, isub, isup,

     $                   itermax, itmp1, itmp2, itn, its, j, jafirst,

     $                   jblk, jj, k, ki, l, lcmrc, lda, ldz, left,

     $                   lihih, lihiz, liloh, liloz, locali1, locali2,

     $                   localk, localm, m, modkm1, mycol, myrow,

     $                   nbulge, nh, node, npcol, nprow, nr, num, nz,

     $                   right, rotn, up, vecsidx

      REAL               AVE, DISC, H00, H10, H11, H12, H21, H22, H33,

     $                   H43H34, H44, OVFL, S, SMLNUM, SUM, T1, T1COPY,

     $                   t2, t3, ulp, unfl, v1save, v2, v2save, v3,

     $                   v3save, cs, sn

*     ..

*     .. Local Arrays ..

      INTEGER            ICURCOL( IBLK ), ICURROW( IBLK ), K1( IBLK ),

     $                   K2( IBLK ), KCOL( IBLK ), KP2COL( IBLK ),

     $                   kp2row( iblk ), krow( iblk ), localk2( iblk )

      REAL               S1( 2*IBLK, 2*IBLK ), SMALLA( 6, 6, IBLK ),

     $                   VCOPY( 3 )

*     ..

*     .. External Functions ..

      INTEGER            ILCM, NUMROC

      REAL               PSLAMCH

      EXTERNAL           ilcm, numroc, pslamch

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, scopy, sgebr2d, sgebs2d,

     $                   sgerv2d, sgesd2d, sgsum2d, slahqr, slaref,

     $                   slarfg, slasorte, igamn2d, infog1l, infog2l,

     $                   pslabad, pslaconsb, pslacp3, pslasmsub,

     $                   pslawil, pxerbla, slanv2

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max, min, mod, sign, sqrt

*     ..

*     .. Executable Statements ..

*

      info = 0

*

      itermax = 30*( ihi-ilo+1 )

*     ITERMAX = 0

      IF( n.EQ.0 )

     $   RETURN

*

*     NODE (IAFIRST,JAFIRST) OWNS A(1,1)

*

      hbl = desca( mb_ )

      contxt = desca( ctxt_ )

      lda = desca( lld_ )

      iafirst = desca( rsrc_ )

      jafirst = desca( csrc_ )

      ldz = descz( lld_ )

      CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )

      node = myrow*npcol + mycol

      num = nprow*npcol

      left = mod( mycol+npcol-1, npcol )

      right = mod( mycol+1, npcol )

      up = mod( myrow+nprow-1, nprow )

      down = mod( myrow+1, nprow )

      lcmrc = ilcm( nprow, npcol )

*

*     Determine the number of columns we have so we can check workspace

*

      localk = numroc( n, hbl, mycol, jafirst, npcol )

      jj = n / hbl

      IF( jj*hbl.LT.n )

     $   jj = jj + 1

      jj = 7*jj / lcmrc

      IF( lwork.LT.3*n+max( 2*max( lda, ldz )+2*localk, jj ) ) THEN

         info = -15

      END IF

      IF( descz( ctxt_ ).NE.desca( ctxt_ ) ) THEN

         info = -( 1300+ctxt_ )

      END IF

      IF( desca( mb_ ).NE.desca( nb_ ) ) THEN

         info = -( 700+nb_ )

      END IF

      IF( descz( mb_ ).NE.descz( nb_ ) ) THEN

         info = -( 1300+nb_ )

      END IF

      IF( desca( mb_ ).NE.descz( mb_ ) ) THEN

         info = -( 1300+mb_ )

      END IF

      IF( ( desca( rsrc_ ).NE.0 ) .OR. ( desca( csrc_ ).NE.0 ) ) THEN

         info = -( 700+rsrc_ )

      END IF

      IF( ( descz( rsrc_ ).NE.0 ) .OR. ( descz( csrc_ ).NE.0 ) ) THEN

         info = -( 1300+rsrc_ )

      END IF

      IF( ( ilo.GT.n ) .OR. ( ilo.LT.1 ) ) THEN

         info = -4

      END IF

      IF( ( ihi.GT.n ) .OR. ( ihi.LT.1 ) ) THEN

         info = -5

      END IF

      IF( hbl.LT.5 ) THEN

         info = -( 700+mb_ )

      END IF

      CALL igamn2d( contxt, 'ALL', ' ', 1, 1, info, 1, itmp1, itmp2, -1,

     $              -1, -1 )

      IF( info.LT.0 ) THEN

         CALL pxerbla( contxt, 'PSLAHQR', -info )

         RETURN

      END IF

*

*     Set work array indices

*

      vecsidx = 0

      idia = 3*n

      isub = 3*n

      isup = 3*n

      irbuf = 3*n

      icbuf = 3*n

*

*     Find a value for ROTN

*

      rotn = hbl / 3

      rotn = max( rotn, hbl-2 )

      rotn = min( rotn, 1 )

*

      IF( ilo.EQ.ihi ) THEN

         CALL infog2l( ilo, ilo, desca, nprow, npcol, myrow, mycol,

     $                 irow, icol, ii, jj )

         IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN

            wr( ilo ) = a( ( icol-1 )*lda+irow )

         ELSE

            wr( ilo ) = zero

         END IF

         wi( ilo ) = zero

         RETURN

      END IF

*

      nh = ihi - ilo + 1

      nz = ihiz - iloz + 1

*

      CALL infog1l( iloz, hbl, nprow, myrow, 0, liloz, lihiz )

      lihiz = numroc( ihiz, hbl, myrow, 0, nprow )

*

*     Set machine-dependent constants for the stopping criterion.

*     If NORM(H) <= SQRT(OVFL), overflow should not occur.

*

      unfl = pslamch( contxt, 'SAFE MINIMUM' )

      ovfl = one / unfl

      CALL pslabad( contxt, unfl, ovfl )

      ulp = pslamch( contxt, 'PRECISION' )

      smlnum = unfl*( nh / ulp )

*

*     I1 and I2 are the indices of the first row and last column of H

*     to which transformations must be applied. If eigenvalues only are

*     being computed, I1 and I2 are set inside the main loop.

*

      IF( wantt ) THEN

         i1 = 1

         i2 = n

      END IF

*

*     ITN is the total number of QR iterations allowed.

*

      itn = itermax

*

*     The main loop begins here. I is the loop index and decreases from

*     IHI to ILO in steps of our schur block size (<=2*IBLK). Each

*     iteration of the loop works  with the active submatrix in rows

*     and columns L to I.   Eigenvalues I+1 to IHI have already

*     converged. Either L = ILO or the global A(L,L-1) is negligible

*     so that the matrix splits.

*

      i = ihi

   10 CONTINUE

      l = ilo

      IF( i.LT.ilo )

     $   GO TO 450

*

*     Perform QR iterations on rows and columns ILO to I until a

*     submatrix of order 1 or 2 splits off at the bottom because a

*     subdiagonal element has become negligible.

*

      DO 420 its = 0, itn

*

*        Look for a single small subdiagonal element.

*

         CALL pslasmsub( a, desca, i, l, k, smlnum, work( irbuf+1 ),

     $                   lwork-irbuf )

         l = k

*

         IF( l.GT.ilo ) THEN

*

*           H(L,L-1) is negligible

*

            CALL infog2l( l, l-1, desca, nprow, npcol, myrow, mycol,

     $                    irow, icol, itmp1, itmp2 )

            IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN

               a( ( icol-1 )*lda+irow ) = zero

            END IF

            work( isub+l-1 ) = zero

         END IF

*

*        Exit from loop if a submatrix of order 1 or 2 has split off.

*

         m = l - 10

*        IF ( L .GE. I - (2*IBLK-1) )

*         IF ( L .GE. I - MAX(2*IBLK-1,HBL) )

         IF( l.GE.i-1 )

     $      GO TO 430

*

*        Now the active submatrix is in rows and columns L to I. If

*        eigenvalues only are being computed, only the active submatrix

*        need be transformed.

*

         IF( .NOT.wantt ) THEN

            i1 = l

            i2 = i

         END IF

*

*        Copy submatrix of size 2*JBLK and prepare to do generalized

*           Wilkinson shift or an exceptional shift

*

         jblk = min( iblk, ( ( i-l+1 ) / 2 )-1 )

         IF( jblk.GT.lcmrc ) THEN

*

*           Make sure it's divisible by LCM (we want even workloads!)

*

            jblk = jblk - mod( jblk, lcmrc )

         END IF

         jblk = min( jblk, 2*lcmrc )

         jblk = max( jblk, 1 )

*

         CALL pslacp3( 2*jblk, i-2*jblk+1, a, desca, s1, 2*iblk, -1, -1,

     $                 0 )

         IF( its.EQ.20 .OR. its.EQ.40 ) THEN

*

*           Exceptional shift.

*

            DO 20 ii = 2*jblk, 2, -1

               s1( ii, ii ) = const*( abs( s1( ii, ii ) )+

     $                        abs( s1( ii, ii-1 ) ) )

               s1( ii, ii-1 ) = zero

               s1( ii-1, ii ) = zero

   20       CONTINUE

            s1( 1, 1 ) = const*abs( s1( 1, 1 ) )

         ELSE

            CALL slahqr( .false., .false., 2*jblk, 1, 2*jblk, s1,

     $                   2*iblk, work( irbuf+1 ), work( icbuf+1 ), 1,

     $                   2*jblk, z, ldz, ierr )

*

*           Prepare to use Wilkinson's double shift

*

            h44 = s1( 2*jblk, 2*jblk )

            h33 = s1( 2*jblk-1, 2*jblk-1 )

            h43h34 = s1( 2*jblk-1, 2*jblk )*s1( 2*jblk, 2*jblk-1 )

            IF( ( jblk.GT.1 ) .AND. ( its.GT.30 ) ) THEN

               s = s1( 2*jblk-1, 2*jblk-2 )

               disc = ( h33-h44 )*half

               disc = disc*disc + h43h34

               IF( disc.GT.zero ) THEN

*

*                 Real roots: Use Wilkinson's shift twice

*

                  disc = sqrt( disc )

                  ave = half*( h33+h44 )

                  IF( abs( h33 )-abs( h44 ).GT.zero ) THEN

                     h33 = h33*h44 - h43h34

                     h44 = h33 / ( sign( disc, ave )+ave )

                  ELSE

                     h44 = sign( disc, ave ) + ave

                  END IF

                  h33 = h44

                  h43h34 = zero

               END IF

            END IF

         END IF

*

*        Look for two consecutive small subdiagonal elements:

*           PSLACONSB is the routine that does this.

*

c         CALL PSLACONSB( A, DESCA, I, L, M, H44, H33, H43H34,

c     $                   WORK( IRBUF+1 ), LWORK-IRBUF )

*

*        Skip small submatrices

*

*        IF ( M .GE. I - 5 )

*    $      GO TO 80

*

*        In principle PSLACONSB needs to check all shifts to decide

*        whether two consecutive small subdiagonal entries are suitable

*        as the starting position of the bulge chasing phase. It can be

*        dangerous to check the first pair of shifts only. Moreover it

*        is quite rare to obtain an M which is much larger than L. This

*        process is a bit expensive compared with the benefit.

*        Therefore it is sensible to abandon this routine. Total amount

*        of communications is saved in average.

*

         m = l

*        Double-shift QR step

*

*        NBULGE is the number of bulges that will be attempted

*

         istop = min( m+rotn-mod( m, rotn ), i-2 )

         istop = min( istop, m+hbl-3-mod( m-1, hbl ) )

         istop = min( istop, i2-2 )

         istop = max( istop, m )

         nbulge = ( i-1-istop ) / hbl

*

*        Do not exceed maximum determined.

*

         nbulge = min( nbulge, jblk )

         IF( nbulge.GT.lcmrc ) THEN

*

*           Make sure it's divisible by LCM (we want even workloads!)

*

            nbulge = nbulge - mod( nbulge, lcmrc )

         END IF

         nbulge = max( nbulge, 1 )

*

         IF( ( its.NE.20 ) .AND. ( its.NE.40 ) .AND. ( nbulge.GT.1 ) )

     $        THEN

*

*           sort the eigenpairs so that they are in twos for double

*           shifts.  only call if several need sorting

*

            CALL slasorte( s1( 2*( jblk-nbulge )+1,

     $                     2*( jblk-nbulge )+1 ), 2*iblk, 2*nbulge,

     $                     work( irbuf+1 ), ierr )

         END IF

*

*        IBULGE is the number of bulges going so far

*

         ibulge = 1

*

*        "A" row defs : main row transforms from LOCALK to LOCALI2

*

         CALL infog1l( m, hbl, npcol, mycol, 0, itmp1, localk )

         localk = numroc( n, hbl, mycol, 0, npcol )

         CALL infog1l( 1, hbl, npcol, mycol, 0, icol1, locali2 )

         locali2 = numroc( i2, hbl, mycol, 0, npcol )

*

*        "A" col defs : main col transforms from LOCALI1 to LOCALM

*

         CALL infog1l( i1, hbl, nprow, myrow, 0, locali1, icol1 )

         icol1 = numroc( n, hbl, myrow, 0, nprow )

         CALL infog1l( 1, hbl, nprow, myrow, 0, localm, icol1 )

         icol1 = numroc( min( m+3, i ), hbl, myrow, 0, nprow )

*

*        Which row & column will start the bulges

*

         istartrow = mod( ( m+1 ) / hbl, nprow ) + iafirst

         istartcol = mod( ( m+1 ) / hbl, npcol ) + jafirst

*

         CALL infog1l( m, hbl, nprow, myrow, 0, ii, itmp2 )

         itmp2 = numroc( n, hbl, myrow, 0, nprow )

         CALL infog1l( m, hbl, npcol, mycol, 0, jj, itmp2 )

         itmp2 = numroc( n, hbl, mycol, 0, npcol )

         CALL infog1l( 1, hbl, nprow, myrow, 0, istop, kp2row( 1 ) )

         kp2row( 1 ) = numroc( m+2, hbl, myrow, 0, nprow )

         CALL infog1l( 1, hbl, npcol, mycol, 0, istop, kp2col( 1 ) )

         kp2col( 1 ) = numroc( m+2, hbl, mycol, 0, npcol )

*

*        Set all values for bulges.  All bulges are stored in

*          intermediate steps as loops over KI.  Their current "task"

*          over the global M to I-1 values is always K1(KI) to K2(KI).

*          However, because there are many bulges, K1(KI) & K2(KI) might

*          go past that range while later bulges (KI+1,KI+2,etc..) are

*          finishing up.

*

*        Rules:

*              If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2

*              If MOD(K1(KI)-1,HBL) = HBL-2 then MOD(K2(KI)-1,HBL)=HBL-2

*              If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1

*              K2(KI)-K1(KI) <= ROTN

*

*        We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit

*        it again when MOD(K1(KI)-1,HBL)=HBL-1.

*

         DO 30 ki = 1, nbulge

            k1( ki ) = m

            istop = min( m+rotn-mod( m, rotn ), i-2 )

            istop = min( istop, m+hbl-3-mod( m-1, hbl ) )

            istop = min( istop, i2-2 )

            istop = max( istop, m )

            k2( ki ) = istop

            icurrow( ki ) = istartrow

            icurcol( ki ) = istartcol

            localk2( ki ) = itmp1

            krow( ki ) = ii

            kcol( ki ) = jj

            IF( ki.GT.1 )

     $         kp2row( ki ) = kp2row( 1 )

            IF( ki.GT.1 )

     $         kp2col( ki ) = kp2col( 1 )

   30    CONTINUE

*

*        Get first transform on node who owns M+2,M+2

*

         DO 31 itmp1 = 1, 3

            vcopy(itmp1) = zero

   31    CONTINUE

         itmp1 = istartrow

         itmp2 = istartcol

         CALL pslawil( itmp1, itmp2, m, a, desca, h44, h33, h43h34,

     $                 vcopy )

         v1save = vcopy( 1 )

         v2save = vcopy( 2 )

         v3save = vcopy( 3 )

         IF( k2( ibulge ).LE.i-1 ) THEN

   40       CONTINUE

            IF( ( k1( ibulge ).GE.m+5 ) .AND. ( ibulge.LT.nbulge ) )

     $           THEN

               IF( ( mod( k2( ibulge )+2, hbl ).EQ.mod( k2( ibulge+1 )+

     $             2, hbl ) ) .AND. ( k1( 1 ).LE.i-1 ) ) THEN

                  h44 = s1( 2*jblk-2*ibulge, 2*jblk-2*ibulge )

                  h33 = s1( 2*jblk-2*ibulge-1, 2*jblk-2*ibulge-1 )

                  h43h34 = s1( 2*jblk-2*ibulge-1, 2*jblk-2*ibulge )*

     $                     s1( 2*jblk-2*ibulge, 2*jblk-2*ibulge-1 )

                  itmp1 = istartrow

                  itmp2 = istartcol

                  CALL pslawil( itmp1, itmp2, m, a, desca, h44, h33,

     $                          h43h34, vcopy )

                  v1save = vcopy( 1 )

                  v2save = vcopy( 2 )

                  v3save = vcopy( 3 )

                  ibulge = ibulge + 1

               END IF

            END IF

*

*        When we hit a border, there are row and column transforms that

*          overlap over several processors and the code gets very

*          "congested."  As a remedy, when we first hit a border, a 6x6

*          *local* matrix is generated on one node (called SMALLA) and

*          work is done on that.  At the end of the border, the data is

*          passed back and everything stays a lot simpler.

*

            DO 80 ki = 1, ibulge

*

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               k = istart

               modkm1 = mod( k-1, hbl )

               IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN

                  DO 81 itmp1 = 1, 6

                     DO 82 itmp2 = 1, 6

                        smalla(itmp1, itmp2, ki) = zero

   82                CONTINUE

   81             CONTINUE

                  IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN

*

*                 Copy 6 elements from global A(K-1:K+4,K-1:K+4)

*

                     CALL infog2l( k+2, k+2, desca, nprow, npcol, myrow,

     $                             mycol, irow1, icol1, itmp1, itmp2 )

                     CALL pslacp3( min( 6, n-k+2 ), k-1, a, desca,

     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,

     $                             0 )

                  END IF

                  IF( modkm1.EQ.hbl-1 ) THEN

*

*                 Copy 6 elements from global A(K-2:K+3,K-2:K+3)

*

                     CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,

     $                             mycol, irow1, icol1, itmp1, itmp2 )

                     CALL pslacp3( min( 6, n-k+3 ), k-2, a, desca,

     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,

     $                             0 )

                  END IF

               END IF

*

*           SLAHQR used to have a single row application and a single

*              column application to H.  Here we do something a little

*              more clever.  We break each transformation down into 3

*              parts:

*                  1.) The minimum amount of work it takes to determine

*                        a group of ROTN transformations (this is on

*                        the critical path.) (Loops 130-180)

*                  2.) The small work it takes so that each of the rows

*                        and columns is at the same place.  For example,

*                        all ROTN row transforms are all complete

*                        through some column TMP.  (Loops within 190)

*                  3.) The majority of the row and column transforms

*                        are then applied in a block fashion.

*                        (Loops 290 on.)

*

*           Each of these three parts are further subdivided into 3

*           parts:

*               A.) Work at the start of a border when

*                       MOD(ISTART-1,HBL) = HBL-2

*               B.) Work at the end of a border when

*                       MOD(ISTART-1,HBL) = HBL-1

*               C.) Work in the middle of the block when

*                       MOD(ISTART-1,HBL) < HBL-2

*

               IF( ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) .AND.

     $             ( modkm1.EQ.hbl-2 ) .AND.

     $             ( istart.LT.min( i-1, istop+1 ) ) ) THEN

                  k = istart

                  nr = min( 3, i-k+1 )

                  IF( k.GT.m ) THEN

                     CALL scopy( nr, smalla( 2, 1, ki ), 1, vcopy, 1 )

                  ELSE

                     vcopy( 1 ) = v1save

                     vcopy( 2 ) = v2save

                     vcopy( 3 ) = v3save

                  END IF

                  CALL slarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )

                  IF( k.GT.m ) THEN

                     smalla( 2, 1, ki ) = vcopy( 1 )

                     smalla( 3, 1, ki ) = zero

                     IF( k.LT.i-1 )

     $                  smalla( 4, 1, ki ) = zero

                  ELSE IF( m.GT.l ) THEN

                     smalla( 2, 1, ki ) = -smalla( 2, 1, ki )

                  END IF

                  v2 = vcopy( 2 )

                  t2 = t1copy*v2

                  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )

                  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )

                  work( vecsidx+( k-1 )*3+3 ) = t1copy

               END IF

*

               IF( ( mod( istop-1, hbl ).EQ.hbl-1 ) .AND.

     $             ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) .AND.

     $             ( istart.LE.min( i, istop ) ) ) THEN

                  k = istart

                  nr = min( 3, i-k+1 )

                  IF( k.GT.m ) THEN

                     CALL scopy( nr, smalla( 3, 2, ki ), 1, vcopy, 1 )

                  ELSE

                     vcopy( 1 ) = v1save

                     vcopy( 2 ) = v2save

                     vcopy( 3 ) = v3save

                  END IF

                  CALL slarfg( nr, vcopy( 1 ), vcopy( 2 ), 1, t1copy )

                  IF( k.GT.m ) THEN

                     smalla( 3, 2, ki ) = vcopy( 1 )

                     smalla( 4, 2, ki ) = zero

                     IF( k.LT.i-1 )

     $                  smalla( 5, 2, ki ) = zero

*

*                 Set a subdiagonal to zero now if it's possible

*

*                 H11 = SMALLA(1,1,KI)

*                 H10 = SMALLA(2,1,KI)

*                 H22 = SMALLA(2,2,KI)

*                 IF ( ABS(H10) .LE. MAX(ULP*(ABS(H11)+ABS(H22)),

*    $                                    SMLNUM) ) THEN

*                    SMALLA(2,1,KI) = ZERO

*     WORK(ISUB+K-2) = ZERO

*                 END IF

                  ELSE IF( m.GT.l ) THEN

                     smalla( 3, 2, ki ) = -smalla( 3, 2, ki )

                  END IF

                  v2 = vcopy( 2 )

                  t2 = t1copy*v2

                  work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )

                  work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )

                  work( vecsidx+( k-1 )*3+3 ) = t1copy

               END IF

*

               IF( ( modkm1.EQ.0 ) .AND. ( istart.LE.i-1 ) .AND.

     $             ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( right.EQ.icurcol( ki ) ) ) THEN

*

*              (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)

*

                  irow1 = krow( ki )

                  icol1 = localk2( ki )

                  IF( istart.GT.m ) THEN

                     vcopy( 1 ) = smalla( 4, 3, ki )

                     vcopy( 2 ) = smalla( 5, 3, ki )

                     vcopy( 3 ) = smalla( 6, 3, ki )

                     nr = min( 3, i-istart+1 )

                     CALL slarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,

     $                            t1copy )

                     a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )

                     a( ( icol1-2 )*lda+irow1+1 ) = zero

                     IF( istart.LT.i-1 ) THEN

                        a( ( icol1-2 )*lda+irow1+2 ) = zero

                     END IF

                  ELSE

                     IF( m.GT.l ) THEN

                        a( ( icol1-2 )*lda+irow1 ) = -a( ( icol1-2 )*

     $                     lda+irow1 )

                     END IF

                  END IF

               END IF

*

               IF( ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) .AND.

     $             ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-

     $             1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-

     $             1 ) ) ) ) THEN

*

*           (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)

*

                  irow1 = krow( ki )

                  icol1 = localk2( ki )

                  DO 70 k = istart, istop

*

*              Create and do these transforms

*

                     nr = min( 3, i-k+1 )

                     IF( k.GT.m ) THEN

                        IF( mod( k-1, hbl ).EQ.0 ) THEN

                           vcopy( 1 ) = smalla( 4, 3, ki )

                           vcopy( 2 ) = smalla( 5, 3, ki )

                           vcopy( 3 ) = smalla( 6, 3, ki )

                        ELSE

                           vcopy( 1 ) = a( ( icol1-2 )*lda+irow1 )

                           vcopy( 2 ) = a( ( icol1-2 )*lda+irow1+1 )

                           IF( nr.EQ.3 ) THEN

                              vcopy( 3 ) = a( ( icol1-2 )*lda+irow1+2 )

                           END IF

                        END IF

                     ELSE

                        vcopy( 1 ) = v1save

                        vcopy( 2 ) = v2save

                        vcopy( 3 ) = v3save

                     END IF

                     CALL slarfg( nr, vcopy( 1 ), vcopy( 2 ), 1,

     $                            t1copy )

                     IF( k.GT.m ) THEN

                        IF( mod( k-1, hbl ).GT.0 ) THEN

                           a( ( icol1-2 )*lda+irow1 ) = vcopy( 1 )

                           a( ( icol1-2 )*lda+irow1+1 ) = zero

                           IF( k.LT.i-1 ) THEN

                              a( ( icol1-2 )*lda+irow1+2 ) = zero

                           END IF

*

*                    Set a subdiagonal to zero now if it's possible

*

*                    IF ( (IROW1.GT.2) .AND. (ICOL1.GT.2) .AND.

*    $                    (MOD(K-1,HBL) .GT. 1) ) THEN

*                       H11 = A((ICOL1-3)*LDA+IROW1-2)

*                       H10 = A((ICOL1-3)*LDA+IROW1-1)

*                       H22 = A((ICOL1-2)*LDA+IROW1-1)

*                       IF ( ABS(H10).LE.MAX(ULP*(ABS(H11)+ABS(H22)),

*    $                                       SMLNUM) ) THEN

*                           A((ICOL1-3)*LDA+IROW1-1) = ZERO

*                       END IF

*                    END IF

                        END IF

                     ELSE IF( m.GT.l ) THEN

                        IF( mod( k-1, hbl ).GT.0 ) THEN

                           a( ( icol1-2 )*lda+irow1 ) = -a( ( icol1-2 )*

     $                        lda+irow1 )

                        END IF

                     END IF

                     v2 = vcopy( 2 )

                     t2 = t1copy*v2

                     work( vecsidx+( k-1 )*3+1 ) = vcopy( 2 )

                     work( vecsidx+( k-1 )*3+2 ) = vcopy( 3 )

                     work( vecsidx+( k-1 )*3+3 ) = t1copy

                     t1 = t1copy

                     IF( k.LT.istop ) THEN

*

*                 Do some work so next step is ready...

*

                        v3 = vcopy( 3 )

                        t3 = t1*v3

                        DO 50 j = icol1, min( k2( ki )+1, i-1 ) +

     $                          icol1 - k

                           sum = a( ( j-1 )*lda+irow1 ) +

     $                           v2*a( ( j-1 )*lda+irow1+1 ) +

     $                           v3*a( ( j-1 )*lda+irow1+2 )

                           a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*lda+

     $                        irow1 ) - sum*t1

                           a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*lda+

     $                        irow1+1 ) - sum*t2

                           a( ( j-1 )*lda+irow1+2 ) = a( ( j-1 )*lda+

     $                        irow1+2 ) - sum*t3

   50                   CONTINUE

                        itmp1 = localk2( ki )

                        DO 60 j = irow1 + 1, irow1 + 3

                           sum = a( ( icol1-1 )*lda+j ) +

     $                           v2*a( icol1*lda+j ) +

     $                           v3*a( ( icol1+1 )*lda+j )

                           a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*lda+

     $                        j ) - sum*t1

                           a( icol1*lda+j ) = a( icol1*lda+j ) - sum*t2

                           a( ( icol1+1 )*lda+j ) = a( ( icol1+1 )*lda+

     $                        j ) - sum*t3

   60                   CONTINUE

                     END IF

                     irow1 = irow1 + 1

                     icol1 = icol1 + 1

   70             CONTINUE

               END IF

*

               IF( modkm1.EQ.hbl-2 ) THEN

                  IF( ( down.EQ.icurrow( ki ) ) .AND.

     $                ( right.EQ.icurcol( ki ) ) .AND. ( num.GT.1 ) )

     $                 THEN

                     CALL sgerv2d( contxt, 3, 1,

     $                             work( vecsidx+( istart-1 )*3+1 ), 3,

     $                             down, right )

                  END IF

                  IF( ( myrow.EQ.icurrow( ki ) ) .AND.

     $                ( mycol.EQ.icurcol( ki ) ) .AND. ( num.GT.1 ) )

     $                 THEN

                     CALL sgesd2d( contxt, 3, 1,

     $                             work( vecsidx+( istart-1 )*3+1 ), 3,

     $                             up, left )

                  END IF

                  IF( ( down.EQ.icurrow( ki ) ) .AND.

     $                ( npcol.GT.1 ) .AND. ( istart.LE.istop ) ) THEN

                     jj = mod( icurcol( ki )+npcol-1, npcol )

                     IF( mycol.NE.jj ) THEN

                        CALL sgebr2d( contxt, 'ROW', ' ',

     $                                3*( istop-istart+1 ), 1,

     $                                work( vecsidx+( istart-1 )*3+1 ),

     $                                3*( istop-istart+1 ), myrow, jj )

                     ELSE

                        CALL sgebs2d( contxt, 'ROW', ' ',

     $                                3*( istop-istart+1 ), 1,

     $                                work( vecsidx+( istart-1 )*3+1 ),

     $                                3*( istop-istart+1 ) )

                     END IF

                  END IF

               END IF

*

*           Broadcast Householder information from the block

*

               IF( ( myrow.EQ.icurrow( ki ) ) .AND. ( npcol.GT.1 ) .AND.

     $             ( istart.LE.istop ) ) THEN

                  IF( mycol.NE.icurcol( ki ) ) THEN

                     CALL sgebr2d( contxt, 'ROW', ' ',

     $                             3*( istop-istart+1 ), 1,

     $                             work( vecsidx+( istart-1 )*3+1 ),

     $                             3*( istop-istart+1 ), myrow,

     $                             icurcol( ki ) )

                  ELSE

                     CALL sgebs2d( contxt, 'ROW', ' ',

     $                             3*( istop-istart+1 ), 1,

     $                             work( vecsidx+( istart-1 )*3+1 ),

     $                             3*( istop-istart+1 ) )

                  END IF

               END IF

   80       CONTINUE

*

*        Now do column transforms and finish work

*

            DO 90 ki = 1, ibulge

*

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

*

               IF( mod( istart-1, hbl ).EQ.hbl-2 ) THEN

                  IF( ( right.EQ.icurcol( ki ) ) .AND.

     $                ( nprow.GT.1 ) .AND. ( istart.LE.istop ) ) THEN

                     jj = mod( icurrow( ki )+nprow-1, nprow )

                     IF( myrow.NE.jj ) THEN

                        CALL sgebr2d( contxt, 'COL', ' ',

     $                                3*( istop-istart+1 ), 1,

     $                                work( vecsidx+( istart-1 )*3+1 ),

     $                                3*( istop-istart+1 ), jj, mycol )

                     ELSE

                        CALL sgebs2d( contxt, 'COL', ' ',

     $                                3*( istop-istart+1 ), 1,

     $                                work( vecsidx+( istart-1 )*3+1 ),

     $                                3*( istop-istart+1 ) )

                     END IF

                  END IF

               END IF

*

               IF( ( mycol.EQ.icurcol( ki ) ) .AND. ( nprow.GT.1 ) .AND.

     $             ( istart.LE.istop ) ) THEN

                  IF( myrow.NE.icurrow( ki ) ) THEN

                     CALL sgebr2d( contxt, 'COL', ' ',

     $                             3*( istop-istart+1 ), 1,

     $                             work( vecsidx+( istart-1 )*3+1 ),

     $                             3*( istop-istart+1 ), icurrow( ki ),

     $                             mycol )

                  ELSE

                     CALL sgebs2d( contxt, 'COL', ' ',

     $                             3*( istop-istart+1 ), 1,

     $                             work( vecsidx+( istart-1 )*3+1 ),

     $                             3*( istop-istart+1 ) )

                  END IF

               END IF

   90       CONTINUE

*

*        Now do make up work to have things in block fashion

*

            DO 150 ki = 1, ibulge

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

*

               modkm1 = mod( istart-1, hbl )

               IF( ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) .AND.

     $             ( modkm1.EQ.hbl-2 ) .AND. ( istart.LT.i-1 ) ) THEN

                  k = istart

*

*              Catch up on column & border work

*

                  nr = min( 3, i-k+1 )

                  v2 = work( vecsidx+( k-1 )*3+1 )

                  v3 = work( vecsidx+( k-1 )*3+2 )

                  t1 = work( vecsidx+( k-1 )*3+3 )

                  IF( nr.EQ.3 ) THEN

*

*                 Do some work so next step is ready...

*

*                 V3 = VCOPY( 3 )

                     t2 = t1*v2

                     t3 = t1*v3

                     itmp1 = min( 6, i2+2-k )

                     itmp2 = max( i1-k+2, 1 )

                     DO 100 j = 2, itmp1

                        sum = smalla( 2, j, ki ) +

     $                        v2*smalla( 3, j, ki ) +

     $                        v3*smalla( 4, j, ki )

                        smalla( 2, j, ki ) = smalla( 2, j, ki ) - sum*t1

                        smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*t2

                        smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*t3

  100                CONTINUE

                     DO 110 j = itmp2, 5

                        sum = smalla( j, 2, ki ) +

     $                        v2*smalla( j, 3, ki ) +

     $                        v3*smalla( j, 4, ki )

                        smalla( j, 2, ki ) = smalla( j, 2, ki ) - sum*t1

                        smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum*t2

                        smalla( j, 4, ki ) = smalla( j, 4, ki ) - sum*t3

  110                CONTINUE

                  END IF

               END IF

*

               IF( ( mod( istart-1, hbl ).EQ.hbl-1 ) .AND.

     $             ( istart.LE.istop ) .AND.

     $             ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) ) THEN

                  k = istop

*

*              Catch up on column & border work

*

                  nr = min( 3, i-k+1 )

                  v2 = work( vecsidx+( k-1 )*3+1 )

                  v3 = work( vecsidx+( k-1 )*3+2 )

                  t1 = work( vecsidx+( k-1 )*3+3 )

                  IF( nr.EQ.3 ) THEN

*

*                 Do some work so next step is ready...

*

*                 V3 = VCOPY( 3 )

                     t2 = t1*v2

                     t3 = t1*v3

                     itmp1 = min( 6, i2-k+3 )

                     itmp2 = max( i1-k+3, 1 )

                     DO 120 j = 3, itmp1

                        sum = smalla( 3, j, ki ) +

     $                        v2*smalla( 4, j, ki ) +

     $                        v3*smalla( 5, j, ki )

                        smalla( 3, j, ki ) = smalla( 3, j, ki ) - sum*t1

                        smalla( 4, j, ki ) = smalla( 4, j, ki ) - sum*t2

                        smalla( 5, j, ki ) = smalla( 5, j, ki ) - sum*t3

  120                CONTINUE

                     DO 130 j = itmp2, 6

                        sum = smalla( j, 3, ki ) +

     $                        v2*smalla( j, 4, ki ) +

     $                        v3*smalla( j, 5, ki )

                        smalla( j, 3, ki ) = smalla( j, 3, ki ) - sum*t1

                        smalla( j, 4, ki ) = smalla( j, 4, ki ) - sum*t2

                        smalla( j, 5, ki ) = smalla( j, 5, ki ) - sum*t3

  130                CONTINUE

                  END IF

               END IF

*

               modkm1 = mod( istart-1, hbl )

               IF( ( myrow.EQ.icurrow( ki ) ) .AND.

     $             ( mycol.EQ.icurcol( ki ) ) .AND.

     $             ( ( ( modkm1.EQ.hbl-2 ) .AND. ( istart.EQ.i-

     $             1 ) ) .OR. ( ( modkm1.LT.hbl-2 ) .AND. ( istart.LE.i-

     $             1 ) ) ) ) THEN

*

*           (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART)

*

                  irow1 = krow( ki )

                  icol1 = localk2( ki )

                  DO 140 k = istart, istop

*

*              Catch up on column & border work

*

                     nr = min( 3, i-k+1 )

                     v2 = work( vecsidx+( k-1 )*3+1 )

                     v3 = work( vecsidx+( k-1 )*3+2 )

                     t1 = work( vecsidx+( k-1 )*3+3 )

                     IF( k.LT.istop ) THEN

*

*                 Do some work so next step is ready...

*

                        t2 = t1*v2

                        t3 = t1*v3

                        CALL slaref( 'Col', a, lda, .false., z, ldz,

     $                               .false., icol1, icol1, istart,

     $                               istop, min( istart+1, i )-k+irow1,

     $                               irow1, liloz, lihiz,

     $                               work( vecsidx+1 ), v2, v3, t1, t2,

     $                               t3 )

                        irow1 = irow1 + 1

                        icol1 = icol1 + 1

                     ELSE

                        IF( ( nr.EQ.3 ) .AND. ( mod( k-1,

     $                      hbl ).LT.hbl-2 ) ) THEN

                           t2 = t1*v2

                           t3 = t1*v3

                           CALL slaref( 'Row', a, lda, .false., z, ldz,

     $                                  .false., irow1, irow1, istart,

     $                                  istop, icol1, min( min( k2( ki )

     $                                  +1, i-1 ), i2 )-k+icol1, liloz,

     $                                  lihiz, work( vecsidx+1 ), v2,

     $                                  v3, t1, t2, t3 )

                        END IF

                     END IF

  140             CONTINUE

               END IF

*

*           Send SMALLA back again.

*

               k = istart

               modkm1 = mod( k-1, hbl )

               IF( ( modkm1.GE.hbl-2 ) .AND. ( k.LE.i-1 ) ) THEN

                  IF( ( modkm1.EQ.hbl-2 ) .AND. ( k.LT.i-1 ) ) THEN

*

*                 Copy 6 elements from global A(K-1:K+4,K-1:K+4)

*

                     CALL infog2l( k+2, k+2, desca, nprow, npcol, myrow,

     $                             mycol, irow1, icol1, itmp1, itmp2 )

                     CALL pslacp3( min( 6, n-k+2 ), k-1, a, desca,

     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,

     $                             1 )

*

                  END IF

                  IF( modkm1.EQ.hbl-1 ) THEN

*

*                 Copy 6 elements from global A(K-2:K+3,K-2:K+3)

*

                     CALL infog2l( k+1, k+1, desca, nprow, npcol, myrow,

     $                             mycol, irow1, icol1, itmp1, itmp2 )

                     CALL pslacp3( min( 6, n-k+3 ), k-2, a, desca,

     $                             smalla( 1, 1, ki ), 6, itmp1, itmp2,

     $                             1 )

                  END IF

               END IF

*

  150       CONTINUE

*

*        Now start major set of block ROW reflections

*

            DO 160 ki = 1, ibulge

               IF( ( myrow.NE.icurrow( ki ) ) .AND.

     $             ( down.NE.icurrow( ki ) ) )GO TO 160

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

*

               IF( ( istop.GT.istart ) .AND.

     $             ( mod( istart-1, hbl ).LT.hbl-2 ) .AND.

     $             ( icurrow( ki ).EQ.myrow ) ) THEN

                  irow1 = min( k2( ki )+1, i-1 ) + 1

                  CALL infog1l( irow1, hbl, npcol, mycol, 0, itmp1,

     $                          itmp2 )

                  itmp2 = numroc( i2, hbl, mycol, 0, npcol )

                  ii = krow( ki )

                  CALL slaref( 'Row', a, lda, wantz, z, ldz, .true., ii,

     $                         ii, istart, istop, itmp1, itmp2, liloz,

     $                         lihiz, work( vecsidx+1 ), v2, v3, t1, t2,

     $                         t3 )

               END IF

  160       CONTINUE

*

            DO 180 ki = 1, ibulge

               IF( krow( ki ).GT.kp2row( ki ) )

     $            GO TO 180

               IF( ( myrow.NE.icurrow( ki ) ) .AND.

     $             ( down.NE.icurrow( ki ) ) )GO TO 180

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               IF( ( istart.EQ.istop ) .OR.

     $             ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.

     $             ( icurrow( ki ).NE.myrow ) ) THEN

                  DO 170 k = istart, istop

                     v2 = work( vecsidx+( k-1 )*3+1 )

                     v3 = work( vecsidx+( k-1 )*3+2 )

                     t1 = work( vecsidx+( k-1 )*3+3 )

                     nr = min( 3, i-k+1 )

                     IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.

     $                   kp2row( ki ) ) ) THEN

                        IF( ( k.LT.istop ) .AND.

     $                      ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN

                           itmp1 = min( k2( ki )+1, i-1 ) + 1

                        ELSE

                           IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

                              itmp1 = min( k2( ki )+1, i-1 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                              itmp1 = min( k+4, i2 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                              itmp1 = min( k+3, i2 ) + 1

                           END IF

                        END IF

*

*                    Find local coor of rows K through K+2

*

                        irow1 = krow( ki )

                        irow2 = kp2row( ki )

                        CALL infog1l( itmp1, hbl, npcol, mycol, 0,

     $                                icol1, icol2 )

                        icol2 = numroc( i2, hbl, mycol, 0, npcol )

                        IF( ( mod( k-1, hbl ).LT.hbl-2 ) .OR.

     $                      ( nprow.EQ.1 ) ) THEN

                           t2 = t1*v2

                           t3 = t1*v3

                           CALL slaref( 'Row', a, lda, wantz, z, ldz,

     $                                  .false., irow1, irow1, istart,

     $                                  istop, icol1, icol2, liloz,

     $                                  lihiz, work( vecsidx+1 ), v2,

     $                                  v3, t1, t2, t3 )

                        END IF

                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.EQ.irow2 ) THEN

                              CALL sgesd2d( contxt, 1, icol2-icol1+1,

     $                                      a( ( icol1-1 )*lda+irow2 ),

     $                                      lda, up, mycol )

                           END IF

                        END IF

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.EQ.irow2 ) THEN

                              CALL sgesd2d( contxt, 1, icol2-icol1+1,

     $                                      a( ( icol1-1 )*lda+irow1 ),

     $                                      lda, down, mycol )

                           END IF

                        END IF

                     END IF

  170             CONTINUE

               END IF

  180       CONTINUE

*

            DO 220 ki = 1, ibulge

               IF( krow( ki ).GT.kp2row( ki ) )

     $            GO TO 220

               IF( ( myrow.NE.icurrow( ki ) ) .AND.

     $             ( down.NE.icurrow( ki ) ) )GO TO 220

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               IF( ( istart.EQ.istop ) .OR.

     $             ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.

     $             ( icurrow( ki ).NE.myrow ) ) THEN

                  DO 210 k = istart, istop

                     v2 = work( vecsidx+( k-1 )*3+1 )

                     v3 = work( vecsidx+( k-1 )*3+2 )

                     t1 = work( vecsidx+( k-1 )*3+3 )

                     nr = min( 3, i-k+1 )

                     IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.

     $                   kp2row( ki ) ) ) THEN

                        IF( ( k.LT.istop ) .AND.

     $                      ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN

                           itmp1 = min( k2( ki )+1, i-1 ) + 1

                        ELSE

                           IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

                              itmp1 = min( k2( ki )+1, i-1 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                              itmp1 = min( k+4, i2 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                              itmp1 = min( k+3, i2 ) + 1

                           END IF

                        END IF

*

                        irow1 = krow( ki ) + k - istart

                        irow2 = kp2row( ki ) + k - istart

                        CALL infog1l( itmp1, hbl, npcol, mycol, 0,

     $                                icol1, icol2 )

                        icol2 = numroc( i2, hbl, mycol, 0, npcol )

                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.NE.irow2 ) THEN

                              CALL sgerv2d( contxt, 1, icol2-icol1+1,

     $                                      work( irbuf+1 ), 1, down,

     $                                      mycol )

                              t2 = t1*v2

                              t3 = t1*v3

                              DO 190 j = icol1, icol2

                                 sum = a( ( j-1 )*lda+irow1 ) +

     $                                 v2*a( ( j-1 )*lda+irow1+1 ) +

     $                                 v3*work( irbuf+j-icol1+1 )

                                 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*

     $                              lda+irow1 ) - sum*t1

                                 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*

     $                              lda+irow1+1 ) - sum*t2

                                 work( irbuf+j-icol1+1 ) = work( irbuf+

     $                              j-icol1+1 ) - sum*t3

  190                         CONTINUE

                              CALL sgesd2d( contxt, 1, icol2-icol1+1,

     $                                      work( irbuf+1 ), 1, down,

     $                                      mycol )

                           END IF

                        END IF

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.NE.irow2 ) THEN

                              CALL sgerv2d( contxt, 1, icol2-icol1+1,

     $                                      work( irbuf+1 ), 1, up,

     $                                      mycol )

                              t2 = t1*v2

                              t3 = t1*v3

                              DO 200 j = icol1, icol2

                                 sum = work( irbuf+j-icol1+1 ) +

     $                                 v2*a( ( j-1 )*lda+irow1 ) +

     $                                 v3*a( ( j-1 )*lda+irow1+1 )

                                 work( irbuf+j-icol1+1 ) = work( irbuf+

     $                              j-icol1+1 ) - sum*t1

                                 a( ( j-1 )*lda+irow1 ) = a( ( j-1 )*

     $                              lda+irow1 ) - sum*t2

                                 a( ( j-1 )*lda+irow1+1 ) = a( ( j-1 )*

     $                              lda+irow1+1 ) - sum*t3

  200                         CONTINUE

                              CALL sgesd2d( contxt, 1, icol2-icol1+1,

     $                                      work( irbuf+1 ), 1, up,

     $                                      mycol )

                           END IF

                        END IF

                     END IF

  210             CONTINUE

               END IF

  220       CONTINUE

*

            DO 240 ki = 1, ibulge

               IF( krow( ki ).GT.kp2row( ki ) )

     $            GO TO 240

               IF( ( myrow.NE.icurrow( ki ) ) .AND.

     $             ( down.NE.icurrow( ki ) ) )GO TO 240

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               IF( ( istart.EQ.istop ) .OR.

     $             ( mod( istart-1, hbl ).GE.hbl-2 ) .OR.

     $             ( icurrow( ki ).NE.myrow ) ) THEN

                  DO 230 k = istart, istop

                     v2 = work( vecsidx+( k-1 )*3+1 )

                     v3 = work( vecsidx+( k-1 )*3+2 )

                     t1 = work( vecsidx+( k-1 )*3+3 )

                     nr = min( 3, i-k+1 )

                     IF( ( nr.EQ.3 ) .AND. ( krow( ki ).LE.

     $                   kp2row( ki ) ) ) THEN

                        IF( ( k.LT.istop ) .AND.

     $                      ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN

                           itmp1 = min( k2( ki )+1, i-1 ) + 1

                        ELSE

                           IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

                              itmp1 = min( k2( ki )+1, i-1 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                              itmp1 = min( k+4, i2 ) + 1

                           END IF

                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                              itmp1 = min( k+3, i2 ) + 1

                           END IF

                        END IF

*

                        irow1 = krow( ki ) + k - istart

                        irow2 = kp2row( ki ) + k - istart

                        CALL infog1l( itmp1, hbl, npcol, mycol, 0,

     $                                icol1, icol2 )

                        icol2 = numroc( i2, hbl, mycol, 0, npcol )

                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.EQ.irow2 ) THEN

                              CALL sgerv2d( contxt, 1, icol2-icol1+1,

     $                                      a( ( icol1-1 )*lda+irow2 ),

     $                                      lda, up, mycol )

                           END IF

                        END IF

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( nprow.GT.1 ) ) THEN

                           IF( irow1.EQ.irow2 ) THEN

                              CALL sgerv2d( contxt, 1, icol2-icol1+1,

     $                                      a( ( icol1-1 )*lda+irow1 ),

     $                                      lda, down, mycol )

                           END IF

                        END IF

                     END IF

  230             CONTINUE

               END IF

  240       CONTINUE

  250       CONTINUE

*

*        Now start major set of block COL reflections

*

            DO 260 ki = 1, ibulge

               IF( ( mycol.NE.icurcol( ki ) ) .AND.

     $             ( right.NE.icurcol( ki ) ) )GO TO 260

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

*

               IF( ( ( mod( istart-1, hbl ).LT.hbl-2 ) .OR. ( npcol.EQ.

     $             1 ) ) .AND. ( icurcol( ki ).EQ.mycol ) .AND.

     $             ( i-istop+1.GE.3 ) ) THEN

                  k = istart

                  IF( ( k.LT.istop ) .AND. ( mod( k-1,

     $                hbl ).LT.hbl-2 ) ) THEN

                     itmp1 = min( istart+1, i ) - 1

                  ELSE

                     IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

                        itmp1 = min( k+3, i )

                     END IF

                     IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                        itmp1 = max( i1, k-1 ) - 1

                     END IF

                     IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                        itmp1 = max( i1, k-2 ) - 1

                     END IF

                  END IF

*

                  icol1 = kcol( ki )

                  CALL infog1l( i1, hbl, nprow, myrow, 0, irow1, irow2 )

                  irow2 = numroc( itmp1, hbl, myrow, 0, nprow )

                  IF( irow1.LE.irow2 ) THEN

                     itmp2 = irow2

                  ELSE

                     itmp2 = -1

                  END IF

                  CALL slaref( 'Col', a, lda, wantz, z, ldz, .true.,

     $                         icol1, icol1, istart, istop, irow1,

     $                         irow2, liloz, lihiz, work( vecsidx+1 ),

     $                         v2, v3, t1, t2, t3 )

                  k = istop

                  IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

*

*                 Do from ITMP1+1 to MIN(K+3,I)

*

                     IF( mod( k-1, hbl ).LT.hbl-3 ) THEN

                        irow1 = itmp2 + 1

                        IF( mod( ( itmp1 / hbl ), nprow ).EQ.myrow )

     $                       THEN

                           IF( itmp2.GT.0 ) THEN

                              irow2 = itmp2 + min( k+3, i ) - itmp1

                           ELSE

                              irow2 = irow1 - 1

                           END IF

                        ELSE

                           irow2 = irow1 - 1

                        END IF

                     ELSE

                        CALL infog1l( itmp1+1, hbl, nprow, myrow, 0,

     $                                irow1, irow2 )

                        irow2 = numroc( min( k+3, i ), hbl, myrow, 0,

     $                          nprow )

                     END IF

                     v2 = work( vecsidx+( k-1 )*3+1 )

                     v3 = work( vecsidx+( k-1 )*3+2 )

                     t1 = work( vecsidx+( k-1 )*3+3 )

                     t2 = t1*v2

                     t3 = t1*v3

                     icol1 = kcol( ki ) + istop - istart

                     CALL slaref( 'Col', a, lda, .false., z, ldz,

     $                            .false., icol1, icol1, istart, istop,

     $                            irow1, irow2, liloz, lihiz,

     $                            work( vecsidx+1 ), v2, v3, t1, t2,

     $                            t3 )

                  END IF

               END IF

  260       CONTINUE

*

            DO 320 ki = 1, ibulge

               IF( kcol( ki ).GT.kp2col( ki ) )

     $            GO TO 320

               IF( ( mycol.NE.icurcol( ki ) ) .AND.

     $             ( right.NE.icurcol( ki ) ) )GO TO 320

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN

*

*              INFO is found in a buffer

*

                  ispec = 1

               ELSE

*

*              All INFO is local

*

                  ispec = 0

               END IF

*

               DO 310 k = istart, istop

*

                  v2 = work( vecsidx+( k-1 )*3+1 )

                  v3 = work( vecsidx+( k-1 )*3+2 )

                  t1 = work( vecsidx+( k-1 )*3+3 )

                  nr = min( 3, i-k+1 )

                  IF( ( nr.EQ.3 ) .AND. ( kcol( ki ).LE.kp2col( ki ) ) )

     $                 THEN

*

                     IF( ( k.LT.istop ) .AND.

     $                   ( mod( k-1, hbl ).LT.hbl-2 ) ) THEN

                        itmp1 = min( istart+1, i ) - 1

                     ELSE

                        IF( mod( k-1, hbl ).LT.hbl-2 ) THEN

                           itmp1 = min( k+3, i )

                        END IF

                        IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                           itmp1 = max( i1, k-1 ) - 1

                        END IF

                        IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                           itmp1 = max( i1, k-2 ) - 1

                        END IF

                     END IF

                     icol1 = kcol( ki ) + k - istart

                     icol2 = kp2col( ki ) + k - istart

                     CALL infog1l( i1, hbl, nprow, myrow, 0, irow1,

     $                             irow2 )

                     irow2 = numroc( itmp1, hbl, myrow, 0, nprow )

                     IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.

     $                   ( npcol.GT.1 ) ) THEN

                        IF( icol1.EQ.icol2 ) THEN

                           CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                   a( ( icol1-1 )*lda+irow1 ),

     $                                   lda, myrow, left )

                           CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                   a( ( icol1-1 )*lda+irow1 ),

     $                                   lda, myrow, left )

                        ELSE

                           CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                   work( icbuf+1 ), irow2-irow1+1,

     $                                   myrow, right )

                           t2 = t1*v2

                           t3 = t1*v3

                           DO 270 j = irow1, irow2

                              sum = a( ( icol1-1 )*lda+j ) +

     $                              v2*a( icol1*lda+j ) +

     $                              v3*work( icbuf+j-irow1+1 )

                              a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*

     $                           lda+j ) - sum*t1

                              a( icol1*lda+j ) = a( icol1*lda+j ) -

     $                                           sum*t2

                              work( icbuf+j-irow1+1 ) = work( icbuf+j-

     $                           irow1+1 ) - sum*t3

  270                      CONTINUE

                           CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                   work( icbuf+1 ), irow2-irow1+1,

     $                                   myrow, right )

                        END IF

                     END IF

                     IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                   ( npcol.GT.1 ) ) THEN

                        IF( icol1.EQ.icol2 ) THEN

                           CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                   a( ( icol1-1 )*lda+irow1 ),

     $                                   lda, myrow, right )

                           CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                   a( ( icol1-1 )*lda+irow1 ),

     $                                   lda, myrow, right )

                        ELSE

                           CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                   work( icbuf+1 ), irow2-irow1+1,

     $                                   myrow, left )

                           t2 = t1*v2

                           t3 = t1*v3

                           DO 280 j = irow1, irow2

                              sum = work( icbuf+j-irow1+1 ) +

     $                              v2*a( ( icol1-1 )*lda+j ) +

     $                              v3*a( icol1*lda+j )

                              work( icbuf+j-irow1+1 ) = work( icbuf+j-

     $                           irow1+1 ) - sum*t1

                              a( ( icol1-1 )*lda+j ) = a( ( icol1-1 )*

     $                           lda+j ) - sum*t2

                              a( icol1*lda+j ) = a( icol1*lda+j ) -

     $                                           sum*t3

  280                      CONTINUE

                           CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                   work( icbuf+1 ), irow2-irow1+1,

     $                                   myrow, left )

                        END IF

                     END IF

*

*                 If we want Z and we haven't already done any Z

                     IF( ( wantz ) .AND. ( mod( k-1,

     $                   hbl ).GE.hbl-2 ) .AND. ( npcol.GT.1 ) ) THEN

*

*                    Accumulate transformations in the matrix Z

*

                        irow1 = liloz

                        irow2 = lihiz

                        IF( mod( k-1, hbl ).EQ.hbl-2 ) THEN

                           IF( icol1.EQ.icol2 ) THEN

                              CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                      z( ( icol1-1 )*ldz+irow1 ),

     $                                      ldz, myrow, left )

                              CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                      z( ( icol1-1 )*ldz+irow1 ),

     $                                      ldz, myrow, left )

                           ELSE

                              CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                      work( icbuf+1 ),

     $                                      irow2-irow1+1, myrow,

     $                                      right )

                              t2 = t1*v2

                              t3 = t1*v3

                              icol1 = ( icol1-1 )*ldz

                              DO 290 j = irow1, irow2

                                 sum = z( icol1+j ) +

     $                                 v2*z( icol1+j+ldz ) +

     $                                 v3*work( icbuf+j-irow1+1 )

                                 z( j+icol1 ) = z( j+icol1 ) - sum*t1

                                 z( j+icol1+ldz ) = z( j+icol1+ldz ) -

     $                                              sum*t2

                                 work( icbuf+j-irow1+1 ) = work( icbuf+

     $                              j-irow1+1 ) - sum*t3

  290                         CONTINUE

                              CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                      work( icbuf+1 ),

     $                                      irow2-irow1+1, myrow,

     $                                      right )

                           END IF

                        END IF

                        IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                           IF( icol1.EQ.icol2 ) THEN

                              CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                      z( ( icol1-1 )*ldz+irow1 ),

     $                                      ldz, myrow, right )

                              CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                      z( ( icol1-1 )*ldz+irow1 ),

     $                                      ldz, myrow, right )

                           ELSE

                              CALL sgerv2d( contxt, irow2-irow1+1, 1,

     $                                      work( icbuf+1 ),

     $                                      irow2-irow1+1, myrow, left )

                              t2 = t1*v2

                              t3 = t1*v3

                              icol1 = ( icol1-1 )*ldz

                              DO 300 j = irow1, irow2

                                 sum = work( icbuf+j-irow1+1 ) +

     $                                 v2*z( j+icol1 ) +

     $                                 v3*z( j+icol1+ldz )

                                 work( icbuf+j-irow1+1 ) = work( icbuf+

     $                              j-irow1+1 ) - sum*t1

                                 z( j+icol1 ) = z( j+icol1 ) - sum*t2

                                 z( j+icol1+ldz ) = z( j+icol1+ldz ) -

     $                                              sum*t3

  300                         CONTINUE

                              CALL sgesd2d( contxt, irow2-irow1+1, 1,

     $                                      work( icbuf+1 ),

     $                                      irow2-irow1+1, myrow, left )

                           END IF

                        END IF

                     END IF

                     IF( icurcol( ki ).EQ.mycol ) THEN

                        IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) ) THEN

                           localk2( ki ) = localk2( ki ) + 1

                        END IF

                     ELSE

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( icurcol( ki ).EQ.right ) ) THEN

                           IF( k.GT.m ) THEN

                              localk2( ki ) = localk2( ki ) + 2

                           ELSE

                              localk2( ki ) = localk2( ki ) + 1

                           END IF

                        END IF

                        IF( ( mod( k-1, hbl ).EQ.hbl-2 ) .AND.

     $                      ( i-k.EQ.2 ) .AND. ( icurcol( ki ).EQ.

     $                      right ) ) THEN

                           localk2( ki ) = localk2( ki ) + 2

                        END IF

                     END IF

                  END IF

  310          CONTINUE

  320       CONTINUE

*

*        Column work done

*

  330       CONTINUE

*

*        Now do NR=2 work

*

            DO 410 ki = 1, ibulge

               istart = max( k1( ki ), m )

               istop = min( k2( ki ), i-1 )

               IF( mod( istart-1, hbl ).GE.hbl-2 ) THEN

*

*              INFO is found in a buffer

*

                  ispec = 1

               ELSE

*

*              All INFO is local

*

                  ispec = 0

               END IF

*

               DO 400 k = istart, istop

*

                  v2 = work( vecsidx+( k-1 )*3+1 )

                  v3 = work( vecsidx+( k-1 )*3+2 )

                  t1 = work( vecsidx+( k-1 )*3+3 )

                  nr = min( 3, i-k+1 )

                  IF( nr.EQ.2 ) THEN

                     IF ( icurrow( ki ).EQ.myrow ) THEN

                        t2 = t1*v2

                     END IF

                     IF ( icurcol( ki ).EQ.mycol ) THEN

                        t2 = t1*v2

                     END IF

*

*              Apply G from the left to transform the rows of the matrix

*              in columns K to I2.

*

                     CALL infog1l( k, hbl, npcol, mycol, 0, liloh,

     $                             lihih )

                     lihih = numroc( i2, hbl, mycol, 0, npcol )

                     CALL infog1l( 1, hbl, nprow, myrow, 0, itmp2,

     $                             itmp1 )

                     itmp1 = numroc( k+1, hbl, myrow, 0, nprow )

                     IF( icurrow( ki ).EQ.myrow ) THEN

                        IF( ( ispec.EQ.0 ) .OR. ( nprow.EQ.1 ) .OR.

     $                      ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN

                           itmp1 = itmp1 - 1

                           DO 340 j = ( liloh-1 )*lda,

     $                             ( lihih-1 )*lda, lda

                              sum = a( itmp1+j ) + v2*a( itmp1+1+j )

                              a( itmp1+j ) = a( itmp1+j ) - sum*t1

                              a( itmp1+1+j ) = a( itmp1+1+j ) - sum*t2

  340                      CONTINUE

                        ELSE

                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                              CALL sgerv2d( contxt, 1, lihih-liloh+1,

     $                                      work( irbuf+1 ), 1, up,

     $                                      mycol )

                              DO 350 j = liloh, lihih

                                 sum = work( irbuf+j-liloh+1 ) +

     $                                 v2*a( ( j-1 )*lda+itmp1 )

                                 work( irbuf+j-liloh+1 ) = work( irbuf+

     $                              j-liloh+1 ) - sum*t1

                                 a( ( j-1 )*lda+itmp1 ) = a( ( j-1 )*

     $                              lda+itmp1 ) - sum*t2

  350                         CONTINUE

                              CALL sgesd2d( contxt, 1, lihih-liloh+1,

     $                                      work( irbuf+1 ), 1, up,

     $                                      mycol )

                           END IF

                        END IF

                     ELSE

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( icurrow( ki ).EQ.down ) ) THEN

                           CALL sgesd2d( contxt, 1, lihih-liloh+1,

     $                                   a( ( liloh-1 )*lda+itmp1 ),

     $                                   lda, down, mycol )

                           CALL sgerv2d( contxt, 1, lihih-liloh+1,

     $                                   a( ( liloh-1 )*lda+itmp1 ),

     $                                   lda, down, mycol )

                        END IF

                     END IF

*

*              Apply G from the right to transform the columns of the

*              matrix in rows I1 to MIN(K+3,I).

*

                     CALL infog1l( i1, hbl, nprow, myrow, 0, liloh,

     $                             lihih )

                     lihih = numroc( i, hbl, myrow, 0, nprow )

*

                     IF( icurcol( ki ).EQ.mycol ) THEN

*                 LOCAL A(LILOZ:LIHIZ,LOCALK2:LOCALK2+2)

                        IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.

     $                      ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN

                           CALL infog1l( k, hbl, npcol, mycol, 0, itmp1,

     $                                   itmp2 )

                           itmp2 = numroc( k+1, hbl, mycol, 0, npcol )

                           DO 360 j = liloh, lihih

                              sum = a( ( itmp1-1 )*lda+j ) +

     $                              v2*a( itmp1*lda+j )

                              a( ( itmp1-1 )*lda+j ) = a( ( itmp1-1 )*

     $                           lda+j ) - sum*t1

                              a( itmp1*lda+j ) = a( itmp1*lda+j ) -

     $                                           sum*t2

  360                      CONTINUE

                        ELSE

                           itmp1 = localk2( ki )

                           IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                              CALL sgerv2d( contxt, lihih-liloh+1, 1,

     $                                      work( icbuf+1 ),

     $                                      lihih-liloh+1, myrow, left )

                              DO 370 j = liloh, lihih

                                 sum = work( icbuf+j ) +

     $                                 v2*a( ( itmp1-1 )*lda+j )

                                 work( icbuf+j ) = work( icbuf+j ) -

     $                                             sum*t1

                                 a( ( itmp1-1 )*lda+j )

     $                              = a( ( itmp1-1 )*lda+j ) - sum*t2

  370                         CONTINUE

                              CALL sgesd2d( contxt, lihih-liloh+1, 1,

     $                                      work( icbuf+1 ),

     $                                      lihih-liloh+1, myrow, left )

                           END IF

                        END IF

                     ELSE

                        IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                      ( icurcol( ki ).EQ.right ) ) THEN

                           itmp1 = kcol( ki )

                           CALL sgesd2d( contxt, lihih-liloh+1, 1,

     $                                   a( ( itmp1-1 )*lda+liloh ),

     $                                   lda, myrow, right )

                           CALL infog1l( k, hbl, npcol, mycol, 0, itmp1,

     $                                   itmp2 )

                           itmp2 = numroc( k+1, hbl, mycol, 0, npcol )

                           CALL sgerv2d( contxt, lihih-liloh+1, 1,

     $                                   a( ( itmp1-1 )*lda+liloh ),

     $                                   lda, myrow, right )

                        END IF

                     END IF

*

                     IF( wantz ) THEN

*

*                 Accumulate transformations in the matrix Z

*

                        IF( icurcol( ki ).EQ.mycol ) THEN

*                    LOCAL Z(LILOZ:LIHIZ,LOCALK2:LOCALK2+2)

                           IF( ( ispec.EQ.0 ) .OR. ( npcol.EQ.1 ) .OR.

     $                         ( mod( k-1, hbl ).EQ.hbl-2 ) ) THEN

                              itmp1 = kcol( ki ) + k - istart

                              itmp1 = ( itmp1-1 )*ldz

                              DO 380 j = liloz, lihiz

                                 sum = z( j+itmp1 ) +

     $                                 v2*z( j+itmp1+ldz )

                                 z( j+itmp1 ) = z( j+itmp1 ) - sum*t1

                                 z( j+itmp1+ldz ) = z( j+itmp1+ldz ) -

     $                                              sum*t2

  380                         CONTINUE

                              localk2( ki ) = localk2( ki ) + 1

                           ELSE

                              itmp1 = localk2( ki )

*                       IF WE ACTUALLY OWN COLUMN K

                              IF( mod( k-1, hbl ).EQ.hbl-1 ) THEN

                                 CALL sgerv2d( contxt, lihiz-liloz+1, 1,

     $                                         work( icbuf+1 ), ldz,

     $                                         myrow, left )

                                 itmp1 = ( itmp1-1 )*ldz

                                 DO 390 j = liloz, lihiz

                                    sum = work( icbuf+j ) +

     $                                    v2*z( j+itmp1 )

                                    work( icbuf+j ) = work( icbuf+j ) -

     $                                 sum*t1

                                    z( j+itmp1 ) = z( j+itmp1 ) - sum*t2

  390                            CONTINUE

                                 CALL sgesd2d( contxt, lihiz-liloz+1, 1,

     $                                         work( icbuf+1 ), ldz,

     $                                         myrow, left )

                                 localk2( ki ) = localk2( ki ) + 1

                              END IF

                           END IF

                        ELSE

*

*                    NO WORK BUT NEED TO UPDATE ANYWAY????

*

                           IF( ( mod( k-1, hbl ).EQ.hbl-1 ) .AND.

     $                         ( icurcol( ki ).EQ.right ) ) THEN

                              itmp1 = kcol( ki )

                              itmp1 = ( itmp1-1 )*ldz

                              CALL sgesd2d( contxt, lihiz-liloz+1, 1,

     $                                      z( liloz+itmp1 ), ldz,

     $                                      myrow, right )

                              CALL sgerv2d( contxt, lihiz-liloz+1, 1,

     $                                      z( liloz+itmp1 ), ldz,

     $                                      myrow, right )

                              localk2( ki ) = localk2( ki ) + 1

                           END IF

                        END IF

                     END IF

                  END IF

  400          CONTINUE

*

*        Adjust local information for this bulge

*

               IF( nprow.EQ.1 ) THEN

                  krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1

                  kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.

     $             ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )

     $              THEN

                  krow( ki ) = krow( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.

     $             ( icurrow( ki ).EQ.myrow ) .AND. ( nprow.GT.1 ) )

     $              THEN

                  kp2row( ki ) = kp2row( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.

     $             ( ( myrow.EQ.icurrow( ki ) ) .OR. ( down.EQ.

     $             icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN

                  CALL infog1l( k2( ki )+1, hbl, nprow, myrow, 0,

     $                          krow( ki ), itmp2 )

                  itmp2 = numroc( n, hbl, myrow, 0, nprow )

               END IF

               IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.

     $             ( ( myrow.EQ.icurrow( ki ) ) .OR. ( up.EQ.

     $             icurrow( ki ) ) ) .AND. ( nprow.GT.1 ) ) THEN

                  CALL infog1l( 1, hbl, nprow, myrow, 0, itmp2,

     $                          kp2row( ki ) )

                  kp2row( ki ) = numroc( k2( ki )+3, hbl, myrow, 0,

     $                           nprow )

               END IF

               IF( npcol.EQ.1 ) THEN

                  kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1

                  kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k1( ki )-1, hbl ).LT.hbl-2 ) .AND.

     $             ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )

     $              THEN

                  kcol( ki ) = kcol( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k2( ki ), hbl ).LT.hbl-2 ) .AND.

     $             ( icurcol( ki ).EQ.mycol ) .AND. ( npcol.GT.1 ) )

     $              THEN

                  kp2col( ki ) = kp2col( ki ) + k2( ki ) - k1( ki ) + 1

               END IF

               IF( ( mod( k1( ki )-1, hbl ).GE.hbl-2 ) .AND.

     $             ( ( mycol.EQ.icurcol( ki ) ) .OR. ( right.EQ.

     $             icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN

                  CALL infog1l( k2( ki )+1, hbl, npcol, mycol, 0,

     $                          kcol( ki ), itmp2 )

                  itmp2 = numroc( n, hbl, mycol, 0, npcol )

               END IF

               IF( ( mod( k2( ki ), hbl ).GE.hbl-2 ) .AND.

     $             ( ( mycol.EQ.icurcol( ki ) ) .OR. ( left.EQ.

     $             icurcol( ki ) ) ) .AND. ( npcol.GT.1 ) ) THEN

                  CALL infog1l( 1, hbl, npcol, mycol, 0, itmp2,

     $                          kp2col( ki ) )

                  kp2col( ki ) = numroc( k2( ki )+3, hbl, mycol, 0,

     $                           npcol )

               END IF

               k1( ki ) = k2( ki ) + 1

               istop = min( k1( ki )+rotn-mod( k1( ki ), rotn ), i-2 )

               istop = min( istop, k1( ki )+hbl-3-

     $                 mod( k1( ki )-1, hbl ) )

               istop = min( istop, i2-2 )

               istop = max( istop, k1( ki ) )

*        ISTOP = MIN( ISTOP , I-1 )

               k2( ki ) = istop

               IF( k1( ki ).EQ.istop ) THEN

                  IF( ( mod( istop-1, hbl ).EQ.hbl-2 ) .AND.

     $                ( i-istop.GT.1 ) ) THEN

*

*              Next step switches rows & cols

*

                     icurrow( ki ) = mod( icurrow( ki )+1, nprow )

                     icurcol( ki ) = mod( icurcol( ki )+1, npcol )

                  END IF

               END IF

  410       CONTINUE

            IF( k2( ibulge ).LE.i-1 )

     $         GO TO 40

         END IF

*

  420 CONTINUE

*

*     Failure to converge in remaining number of iterations

*

      info = i

      RETURN

*

  430 CONTINUE

*

      IF( l.EQ.i ) THEN

*

*        H(I,I-1) is negligible: one eigenvalue has converged.

*

         CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow,

     $                 icol, itmp1, itmp2 )

         IF( ( myrow.EQ.itmp1 ) .AND. ( mycol.EQ.itmp2 ) ) THEN

            wr( i ) = a( ( icol-1 )*lda+irow )

         ELSE

            wr( i ) = zero

         END IF

         wi( i ) = zero

      ELSE IF( l.EQ.i-1 ) THEN

*

*        H(I-1,I-2) is negligible: a pair of eigenvalues have converged.

*

         CALL pselget( 'All', ' ', h11, a, l, l, desca )

         CALL pselget( 'All', ' ', h21, a, i, l, desca )

         CALL pselget( 'All', ' ', h12, a, l, i, desca )

         CALL pselget( 'All', ' ', h22, a, i, i, desca )

         CALL slanv2( h11, h12, h21, h22, wr( l ), wi( l ), wr( i ),

     $                wi( i ), cs, sn )

         IF( node .NE. 0 ) THEN

            wr( l ) = zero

            wr( i ) = zero

            wi( l ) = zero

            wi( i ) = zero

         ENDIF

      ELSE

*

*        Find the eigenvalues in H(L:I,L:I), L < I-1

*

         jblk = i - l + 1

         IF( jblk.LE.2*iblk ) THEN

            CALL pslacp3( i-l+1, l, a, desca, s1, 2*iblk, 0, 0, 0 )

            CALL slahqr( .false., .false., jblk, 1, jblk, s1, 2*iblk,

     $                   wr( l ), wi( l ), 1, jblk, z, ldz, ierr )

            IF( node.NE.0 ) THEN

*

*           Erase the eigenvalues

*

               DO 440 k = l, i

                  wr( k ) = zero

                  wi( k ) = zero

  440          CONTINUE

            END IF

         END IF

      END IF

*

*     Decrement number of remaining iterations, and return to start of

*     the main loop with new value of I.

*

      itn = itn - its

      IF( m.EQ.l-10 ) THEN

         i = l - 1

      ELSE

         i = m

      END IF

*     I = L - 1

      GO TO 10

*

  450 CONTINUE

      CALL sgsum2d( contxt, 'All', ' ', n, 1, wr, n, -1, -1 )

      CALL sgsum2d( contxt, 'All', ' ', n, 1, wi, n, -1, -1 )

      RETURN

*

*     END OF PSLAHQR

*


      END

infog1l
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3

infog2l
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3

max
#define max(A, B)
Definition pcgemr.c:180

min
#define min(A, B)
Definition pcgemr.c:181

pselget
subroutine pselget(scope, top, alpha, a, ia, ja, desca)
Definition pselget.f:2

pslabad
subroutine pslabad(ictxt, small, large)
Definition pslabad.f:2

pslaconsb
subroutine pslaconsb(a, desca, i, l, m, h44, h33, h43h34, buf, lwork)
Definition pslaconsb.f:3

pslacp3
subroutine pslacp3(m, i, a, desca, b, ldb, ii, jj, rev)
Definition pslacp3.f:2

pslahqr
subroutine pslahqr(wantt, wantz, n, ilo, ihi, a, desca, wr, wi, iloz, ihiz, z, descz, work, lwork, iwork, ilwork, info)
Definition pslahqr.f:4

pslasmsub
subroutine pslasmsub(a, desca, i, l, k, smlnum, buf, lwork)
Definition pslasmsub.f:2

pslawil
subroutine pslawil(ii, jj, m, a, desca, h44, h33, h43h34, v)
Definition pslawil.f:2

pxerbla
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2

slaref
subroutine slaref(type, a, lda, wantz, z, ldz, block, irow1, icol1, istart, istop, itmp1, itmp2, liloz, lihiz, vecs, v2, v3, t1, t2, t3)
Definition slaref.f:4

slasorte
subroutine slasorte(s, lds, j, out, info)
Definition slasorte.f:2