d2/de9/pstrord_8f_source.html

      SUBROUTINE pstrord( COMPQ, SELECT, PARA, N, T, IT, JT,

     $     DESCT, Q, IQ, JQ, DESCQ, WR, WI, M, WORK, LWORK,

     $     IWORK, LIWORK, INFO )

*

*     Contribution from the Department of Computing Science and HPC2N,

*     Umea University, Sweden

*

*  -- ScaLAPACK computational routine (version 2.0.2) --

*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver

*     May 1 2012

*

      IMPLICIT NONE

*

*     .. Scalar Arguments ..

      CHARACTER          COMPQ

      INTEGER            INFO, LIWORK, LWORK, M, N,

     $                   it, jt, iq, jq

*     ..

*     .. Array Arguments ..

      INTEGER            SELECT( * )

      INTEGER            PARA( 6 ), DESCT( * ), DESCQ( * ), IWORK( * )

      REAL               Q( * ), T( * ), WI( * ), WORK( * ), WR( * )

*     ..

*

*  Purpose

*  =======

*

*  PSTRORD reorders the real Schur factorization of a real matrix

*  A = Q*T*Q**T, so that a selected cluster of eigenvalues appears

*  in the leading diagonal blocks of the upper quasi-triangular matrix

*  T, and the leading columns of Q form an orthonormal basis of the

*  corresponding right invariant subspace.

*

*  T must be in Schur form (as returned by PSLAHQR), that is, block

*  upper triangular with 1-by-1 and 2-by-2 diagonal blocks.

*

*  This subroutine uses a delay and accumulate procedure for performing

*  the off-diagonal updates (see references for details).

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Arguments

*  =========

*

*

*  COMPQ   (global input) CHARACTER*1

*          = 'V': update the matrix Q of Schur vectors;

*          = 'N': do not update Q.

*

*  SELECT  (global input/output) INTEGER array, dimension (N)

*          SELECT specifies the eigenvalues in the selected cluster. To

*          select a real eigenvalue w(j), SELECT(j) must be set to 1.

*          To select a complex conjugate pair of eigenvalues

*          w(j) and w(j+1), corresponding to a 2-by-2 diagonal block,

*          either SELECT(j) or SELECT(j+1) or both must be set to 1;

*          a complex conjugate pair of eigenvalues must be

*          either both included in the cluster or both excluded.

*          On output, the (partial) reordering is displayed.

*

*  PARA    (global input) INTEGER*6

*          Block parameters (some should be replaced by calls to

*          PILAENV and others by meaningful default values):

*          PARA(1) = maximum number of concurrent computational windows

*                    allowed in the algorithm;

*                    0 < PARA(1) <= min(NPROW,NPCOL) must hold;

*          PARA(2) = number of eigenvalues in each window;

*                    0 < PARA(2) < PARA(3) must hold;

*          PARA(3) = window size; PARA(2) < PARA(3) < DESCT(MB_)

*                    must hold;

*          PARA(4) = minimal percentage of flops required for

*                    performing matrix-matrix multiplications instead

*                    of pipelined orthogonal transformations;

*                    0 <= PARA(4) <= 100 must hold;

*          PARA(5) = width of block column slabs for row-wise

*                    application of pipelined orthogonal

*                    transformations in their factorized form;

*                    0 < PARA(5) <= DESCT(MB_) must hold.

*          PARA(6) = the maximum number of eigenvalues moved together

*                    over a process border; in practice, this will be

*                    approximately half of the cross border window size

*                    0 < PARA(6) <= PARA(2) must hold;

*

*  N       (global input) INTEGER

*          The order of the globally distributed matrix T. N >= 0.

*

*  T       (local input/output) REAL array,

*          dimension (LLD_T,LOCc(N)).

*          On entry, the local pieces of the global distributed

*          upper quasi-triangular matrix T, in Schur form. On exit, T is

*          overwritten by the local pieces of the reordered matrix T,

*          again in Schur form, with the selected eigenvalues in the

*          globally leading diagonal blocks.

*

*  IT      (global input) INTEGER

*  JT      (global input) INTEGER

*          The row and column index in the global array T indicating the

*          first column of sub( T ). IT = JT = 1 must hold.

*

*  DESCT   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the global distributed matrix T.

*

*  Q       (local input/output) REAL array,

*          dimension (LLD_Q,LOCc(N)).

*          On entry, if COMPQ = 'V', the local pieces of the global

*          distributed matrix Q of Schur vectors.

*          On exit, if COMPQ = 'V', Q has been postmultiplied by the

*          global orthogonal transformation matrix which reorders T; the

*          leading M columns of Q form an orthonormal basis for the

*          specified invariant subspace.

*          If COMPQ = 'N', Q is not referenced.

*

*  IQ      (global input) INTEGER

*  JQ      (global input) INTEGER

*          The column index in the global array Q indicating the

*          first column of sub( Q ). IQ = JQ = 1 must hold.

*

*  DESCQ   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the global distributed matrix Q.

*

*  WR      (global output) REAL array, dimension (N)

*  WI      (global output) REAL array, dimension (N)

*          The real and imaginary parts, respectively, of the reordered

*          eigenvalues of T. The eigenvalues are in principle stored in

*          the same order as on the diagonal of T, with WR(i) = T(i,i)

*          and, if T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0

*          and WI(i+1) = -WI(i).

*          Note also that if a complex eigenvalue is sufficiently

*          ill-conditioned, then its value may differ significantly

*          from its value before reordering.

*

*  M       (global output) INTEGER

*          The dimension of the specified invariant subspace.

*          0 <= M <= N.

*

*  WORK    (local workspace/output) REAL array,

*          dimension (LWORK)

*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

*

*  LWORK   (local input) INTEGER

*          The dimension of the array WORK.

*

*          If LWORK = -1, then a workspace query is assumed; the routine

*          only calculates the optimal size of the WORK array, returns

*          this value as the first entry of the WORK array, and no error

*          message related to LWORK is issued by PXERBLA.

*

*  IWORK   (local workspace/output) INTEGER array, dimension (LIWORK)

*

*  LIWORK  (local input) INTEGER

*          The dimension of the array IWORK.

*

*          If LIWORK = -1, then a workspace query is assumed; the

*          routine only calculates the optimal size of the IWORK array,

*          returns this value as the first entry of the IWORK array, and

*          no error message related to LIWORK is issued by PXERBLA.

*

*  INFO    (global output) INTEGER

*          = 0: successful exit

*          < 0: if INFO = -i, the i-th argument had an illegal value.

*          If the i-th argument is an array and the j-entry had

*          an illegal value, then INFO = -(i*1000+j), if the i-th

*          argument is a scalar and had an illegal value, then INFO = -i.

*          > 0: here we have several possibilites

*            *) Reordering of T failed because some eigenvalues are too

*               close to separate (the problem is very ill-conditioned);

*               T may have been partially reordered, and WR and WI

*               contain the eigenvalues in the same order as in T.

*               On exit, INFO = {the index of T where the swap failed}.

*            *) A 2-by-2 block to be reordered split into two 1-by-1

*               blocks and the second block failed to swap with an

*               adjacent block.

*               On exit, INFO = {the index of T where the swap failed}.

*            *) If INFO = N+1, there is no valid BLACS context (see the

*               BLACS documentation for details).

*          In a future release this subroutine may distinguish between

*          the case 1 and 2 above.

*

*  Additional requirements

*  =======================

*

*  The following alignment requirements must hold:

*  (a) DESCT( MB_ ) = DESCT( NB_ ) = DESCQ( MB_ ) = DESCQ( NB_ )

*  (b) DESCT( RSRC_ ) = DESCQ( RSRC_ )

*  (c) DESCT( CSRC_ ) = DESCQ( CSRC_ )

*

*  All matrices must be blocked by a block factor larger than or

*  equal to two (3). This is to simplify reordering across processor

*  borders in the presence of 2-by-2 blocks.

*

*  Limitations

*  ===========

*

*  This algorithm cannot work on submatrices of T and Q, i.e.,

*  IT = JT = IQ = JQ = 1 must hold. This is however no limitation

*  since PDLAHQR does not compute Schur forms of submatrices anyway.

*

*  References

*  ==========

*

*  [1] Z. Bai and J. W. Demmel; On swapping diagonal blocks in real

*      Schur form, Linear Algebra Appl., 186:73--95, 1993. Also as

*      LAPACK Working Note 54.

*

*  [2] D. Kressner; Block algorithms for reordering standard and

*      generalized Schur forms, ACM TOMS, 32(4):521-532, 2006.

*      Also LAPACK Working Note 171.

*

*  [3] R. Granat, B. Kagstrom, and D. Kressner; Parallel eigenvalue

*      reordering in real Schur form, Concurrency and Computations:

*      Practice and Experience, 21(9):1225-1250, 2009. Also as

*      LAPACK Working Note 192.

*

*  Parallel execution recommendations

*  ==================================

*

*  Use a square grid, if possible, for maximum performance. The block

*  parameters in PARA should be kept well below the data distribution

*  block size. In particular, see [3] for recommended settings for

*  these parameters.

*

*  In general, the parallel algorithm strives to perform as much work

*  as possible without crossing the block borders on the main block

*  diagonal.

*

*  Contributors

*  ============

*

*  Implemented by Robert Granat, Dept. of Computing Science and HPC2N,

*  Umea University, Sweden, March 2007,

*  in collaboration with Bo Kagstrom and Daniel Kressner.

*  Modified by Meiyue Shao, October 2011.

*

*  Revisions

*  =========

*

*  Please send bug-reports to granat@cs.umu.se

*

*  Keywords

*  ========

*

*  Real Schur form, eigenvalue reordering

*

*  =====================================================================

*     ..

*     .. Parameters ..

      CHARACTER          TOP

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   lld_, mb_, m_, nb_, n_, rsrc_

      REAL               ZERO, ONE

      PARAMETER          ( TOP = '1-Tree',

     $                     block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9,

     $                     zero = 0.0, one = 1.0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            LQUERY, PAIR, SWAP, WANTQ,

     $                   ISHH, FIRST, SKIP1CR, BORDER, LASTWAIT

      INTEGER            NPROW, NPCOL, MYROW, MYCOL, NB, NPROCS,

     $                   IERR, DIM1, INDX, LLDT, TRSRC, TCSRC, ILOC1,

     $                   jloc1, myierr, ictxt,

     $                   rsrc1, csrc1, iloc3, jloc3, trsrc3,

     $                   tcsrc3, iloc, jloc, trsrc4, tcsrc4,

     $                   flops, i, ilo, ihi, j, k, kk, kks,

     $                   ks, liwmin, lwmin, mmult, n1, n2,

     $                   ncb, ndtraf, nitraf, nwin, numwin, pdtraf,

     $                   pitraf, pdw, wineig, winsiz, lldq,

     $                   rsrc, csrc, ililo, ilihi, ilsel, irsrc,

     $                   icsrc, ipiw, ipw1, ipw2, ipw3, tihi, tilo,

     $                   lihi, window, lilo, lsel, buffer,

     $                   nmwin2, bufflen, lrows, lcols, iloc2, jloc2,

     $                   wneicr, window0, rsrc4, csrc4, lihi4, rsrc3,

     $                   csrc3, rsrc2, csrc2, lihic, lihi1, ilen4,

     $                   seli4, ilen1, dim4, ipw4, qrows, trows,

     $                   tcols, ipw5, ipw6, ipw7, ipw8, jloc4,

     $                   east, west, iloc4, south, north, indxs,

     $                   itt, jtt, ilen, dlen, indxe, trsrc1, tcsrc1,

     $                   trsrc2, tcsrc2, ilos, dir, tlihi, tlilo, tlsel,

     $                   round, last, win0s, win0e, wine

      REAL               ELEM, ELEM1, ELEM2, ELEM3, ELEM4, SN, CS, TMP,

     $                   ELEM5

*     ..

*     .. Local Arrays ..

      INTEGER            IBUFF( 8 ), IDUM1( 1 ), IDUM2( 1 ), MMAX( 1 ),

     $                    MMIN( 1 )

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            NUMROC, INDXG2P, INDXG2L

      EXTERNAL           lsame, numroc, indxg2p, indxg2l

*     ..

*     .. External Subroutines ..

      EXTERNAL           pslacpy, pxerbla, pchk1mat, pchk2mat,

     $                   sgemm, slamov, ilacpy, chk1mat,

     $                   infog2l, dgsum2d, sgesd2d, sgerv2d, sgebs2d,

     $                   sgebr2d, igsum2d, blacs_gridinfo, igebs2d,

     $                   igebr2d, igamx2d, igamn2d, bslaapp, bdtrexc

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max, sqrt, min

*     ..

*     .. Local Functions ..

      INTEGER            ICEIL

*     ..

*     .. Executable Statements ..

*

*     Get grid parameters.

*

      ictxt = desct( ctxt_ )

      CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )

      nprocs = nprow*npcol

*

*     Test if grid is O.K., i.e., the context is valid.

*

      info = 0

      IF( nprow.EQ.-1 ) THEN

         info = n+1

      END IF

*

*     Check if workspace query.

*

      lquery = lwork.EQ.-1 .OR. liwork.EQ.-1

*

*     Test dimensions for local sanity.

*

      IF( info.EQ.0 ) THEN

         CALL chk1mat( n, 5, n, 5, it, jt, desct, 9, info )

      END IF

      IF( info.EQ.0 ) THEN

         CALL chk1mat( n, 5, n, 5, iq, jq, descq, 13, info )

      END IF

*

*     Check the blocking sizes for alignment requirements.

*

      IF( info.EQ.0 ) THEN

         IF( desct( mb_ ).NE.desct( nb_ ) ) info = -(1000*9 + mb_)

      END IF

      IF( info.EQ.0 ) THEN

         IF( descq( mb_ ).NE.descq( nb_ ) ) info = -(1000*13 + mb_)

      END IF

      IF( info.EQ.0 ) THEN

         IF( desct( mb_ ).NE.descq( mb_ ) ) info = -(1000*9 + mb_)

      END IF

*

*     Check the blocking sizes for minimum sizes.

*

      IF( info.EQ.0 ) THEN

         IF( n.NE.desct( mb_ ) .AND. desct( mb_ ).LT.3 )

     $      info = -(1000*9 + mb_)

         IF( n.NE.descq( mb_ ) .AND. descq( mb_ ).LT.3 )

     $      info = -(1000*13 + mb_)

      END IF

*

*     Check parameters in PARA.

*

      nb = desct( mb_ )

      IF( info.EQ.0 ) THEN

         IF( para(1).LT.1 .OR. para(1).GT.min(nprow,npcol) )

     $      info = -(1000 * 4 + 1)

         IF( para(2).LT.1 .OR. para(2).GE.para(3) )

     $      info = -(1000 * 4 + 2)

         IF( para(3).LT.1 .OR. para(3).GT.nb )

     $      info = -(1000 * 4 + 3)

         IF( para(4).LT.0 .OR. para(4).GT.100 )

     $      info = -(1000 * 4 + 4)

         IF( para(5).LT.1 .OR. para(5).GT.nb )

     $      info = -(1000 * 4 + 5)

         IF( para(6).LT.1 .OR. para(6).GT.para(2) )

     $      info = -(1000 * 4 + 6)

      END IF

*

*     Check requirements on IT, JT, IQ and JQ.

*

      IF( info.EQ.0 ) THEN

         IF( it.NE.1 ) info = -6

         IF( jt.NE.it ) info = -7

         IF( iq.NE.1 ) info = -10

         IF( jq.NE.iq ) info = -11

      END IF

*

*     Test input parameters for global sanity.

*

      IF( info.EQ.0 ) THEN

         CALL pchk1mat( n, 5, n, 5, it, jt, desct, 9, 0, idum1,

     $        idum2, info )

      END IF

      IF( info.EQ.0 ) THEN

         CALL pchk1mat( n, 5, n, 5, iq, jq, descq, 13, 0, idum1,

     $        idum2, info )

      END IF

      IF( info.EQ.0 ) THEN

         CALL pchk2mat( n, 5, n, 5, it, jt, desct, 9, n, 5, n, 5,

     $        iq, jq, descq, 13, 0, idum1, idum2, info )

      END IF

*

*     Decode and test the input parameters.

*

      IF( info.EQ.0 .OR. lquery ) THEN

*

         wantq = lsame( compq, 'V' )

         IF( n.LT.0 ) THEN

            info = -4

         ELSE

*

*           Extract local leading dimension.

*

            lldt = desct( lld_ )

            lldq = descq( lld_ )

*

*           Check the SELECT vector for consistency and set M to the

*           dimension of the specified invariant subspace.

*

            m = 0

            DO 10 k = 1, n

               IF( k.LT.n ) THEN

                  CALL infog2l( k+1, k, desct, nprow, npcol,

     $                 myrow, mycol, itt, jtt, trsrc, tcsrc )

                  IF( myrow.EQ.trsrc .AND. mycol.EQ.tcsrc ) THEN

                     elem = t( (jtt-1)*lldt + itt )

                     IF( elem.NE.zero ) THEN

                        IF( SELECT(k).NE.0 .AND.

     $                       SELECT(k+1).EQ.0 ) THEN

*                           INFO = -2

                           SELECT(k+1) = 1

                        ELSEIF( SELECT(k).EQ.0 .AND.

     $                          SELECT(k+1).NE.0 ) THEN

*                           INFO = -2

                           SELECT(k) = 1

                        END IF

                     END IF

                  END IF

               END IF

               IF( SELECT(k).NE.0 ) m = m + 1

 10         CONTINUE

            mmax( 1 ) = m

            mmin( 1 ) = m

            IF( nprocs.GT.1 )

     $         CALL igamx2d( ictxt, 'All', top, 1, 1, mmax( 1 ), 1, -1,

     $              -1, -1, -1, -1 )

            IF( nprocs.GT.1 )

     $         CALL igamn2d( ictxt, 'All', top, 1, 1, mmin( 1 ), 1, -1,

     $              -1, -1, -1, -1 )

            IF( mmax( 1 ).GT.mmin( 1 ) ) THEN

               m = mmax( 1 )

               IF( nprocs.GT.1 )

     $            CALL igamx2d( ictxt, 'All', top, n, 1, SELECT, n,

     $                 -1, -1, -1, -1, -1 )

            END IF

*

*           Compute needed workspace.

*

            n1 = m

            n2 = n - m

*

            trows = numroc( n, nb, myrow, desct(rsrc_), nprow )

            tcols = numroc( n, nb, mycol, desct(csrc_), npcol )

            lwmin = n + 7*nb**2 + 2*trows*para( 3 ) + tcols*para( 3 ) +

     $           max( trows*para( 3 ), tcols*para( 3 ) )

            liwmin = 5*para( 1 ) + para( 2 )*para( 3 ) -

     $           para( 2 ) * ( para( 2 ) + 1 ) / 2

*

            IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN

               info = -17

            ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN

               info = -19

            END IF

         END IF

      END IF

*

*     Global maximum on info.

*

      IF( nprocs.GT.1 ) THEN

            CALL igamx2d( ictxt, 'All', top, 1, 1, info, 1, -1, -1,

     $        -1, -1, -1 )

      END IF

*

*     Return if some argument is incorrect.

*

      IF( info.NE.0 .AND. .NOT.lquery ) THEN

         m = 0

         CALL pxerbla( ictxt, 'PSTRORD', -info )

         RETURN

      ELSEIF( lquery ) THEN

         work( 1 ) = float(lwmin)

         iwork( 1 ) = liwmin

         RETURN

      END IF

*

*     Quick return if possible.

*

      IF( m.EQ.n .OR. m.EQ.0 ) GO TO 545

*

*     Set parameters.

*

      numwin = para( 1 )

      wineig = max( para( 2 ), 2 )

      winsiz = min( max( para( 3 ), para( 2 )*2 ), nb )

      mmult  = para( 4 )

      ncb    = para( 5 )

      wneicr = para( 6 )

*

*     Insert some pointers into INTEGER workspace.

*

*     Information about all the active windows is stored

*     in IWORK( 1:5*NUMWIN ). Each processor has a copy.

*       LILO: start position

*       LIHI: stop position

*       LSEL: number of selected eigenvalues

*       RSRC: processor id (row)

*       CSRC: processor id (col)

*     IWORK( IPIW+ ) contain information of orthogonal transformations.

*

      ililo = 1

      ilihi = ililo + numwin

      ilsel = ilihi + numwin

      irsrc = ilsel + numwin

      icsrc = irsrc + numwin

      ipiw  = icsrc + numwin

*

*     Insert some pointers into REAL workspace - for now we

*     only need two pointers.

*

      ipw1 = 1

      ipw2 = ipw1 + nb

*

*     Collect the selected blocks at the top-left corner of T.

*

*     Globally: ignore eigenvalues that are already in order.

*     ILO is a global variable and is kept updated to be consistent

*     throughout the process mesh.

*

      ilo = 0

 40   CONTINUE

      ilo = ilo + 1

      IF( ilo.LE.n ) THEN

         IF( SELECT(ilo).NE.0 ) GO TO 40

      END IF

*

*     Globally: start the collection at the top of the matrix. Here,

*     IHI is a global variable and is kept updated to be consistent

*     throughout the process mesh.

*

      ihi = n

*

*     Globally:  While ( ILO <= M ) do

 50   CONTINUE

*

      IF( ilo.LE.m ) THEN

*

*        Depending on the value of ILO, find the diagonal block index J,

*        such that T(1+(J-1)*NB:1+J*NB,1+(J-1)*NB:1+J*NB) contains the

*        first unsorted eigenvalue. Check that J does not point to a

*        block with only one selected eigenvalue in the last position

*        which belongs to a splitted 2-by-2 block.

*

         ilos = ilo - 1

 52      CONTINUE

         ilos = ilos + 1

         IF( SELECT(ilos).EQ.0 ) GO TO 52

         IF( ilos.LT.n ) THEN

            IF( SELECT(ilos+1).NE.0 .AND. mod(ilos,nb).EQ.0 ) THEN

               CALL pselget( 'All', top, elem, t, ilos+1, ilos, desct )

               IF( elem.NE.zero ) GO TO 52

            END IF

         END IF

         j = iceil(ilos,nb)

*

*        Globally: Set start values of LILO and LIHI for all processes.

*        Choose also the number of selected eigenvalues at top of each

*        diagonal block such that the number of eigenvalues which remain

*        to be reordered is an integer multiple of WINEIG.

*

*        All the information is saved into the INTEGER workspace such

*        that all processors are aware of each others operations.

*

*        Compute the number of concurrent windows.

*

         nmwin2 = (iceil(ihi,nb)*nb - (ilo-mod(ilo,nb)+1)+1) / nb

         nmwin2 = min( min( numwin, nmwin2 ), iceil(n,nb) - j + 1 )

*

*        For all windows, set LSEL = 0 and find a proper start value of

*        LILO such that LILO points at the first non-selected entry in

*        the corresponding diagonal block of T.

*

         DO 80 k = 1, nmwin2

            iwork( ilsel+k-1) = 0

            iwork( ililo+k-1) = max( ilo, (j-1)*nb+(k-1)*nb+1 )

            lilo = iwork( ililo+k-1 )

 82         CONTINUE

            IF( SELECT(lilo).NE.0 .AND. lilo.LT.(j+k-1)*nb ) THEN

               lilo = lilo + 1

               IF( lilo.LE.n ) GO TO 82

            END IF

            iwork( ililo+k-1 ) = lilo

*

*           Fix each LILO to ensure that no 2-by-2 block is cut in top

*           of the submatrix (LILO:LIHI,LILO:LIHI).

*

            lilo = iwork(ililo+k-1)

            IF( lilo.GT.nb ) THEN

               CALL pselget( 'All', top, elem, t, lilo, lilo-1, desct )

               IF( elem.NE.zero ) THEN

                  IF( lilo.LT.(j+k-1)*nb ) THEN

                     iwork(ililo+k-1) = iwork(ililo+k-1) + 1

                  ELSE

                     iwork(ililo+k-1) = iwork(ililo+k-1) - 1

                  END IF

               END IF

            END IF

*

*           Set a proper LIHI value for each window. Also find the

*           processors corresponding to the corresponding windows.

*

            iwork( ilihi+k-1 ) =  iwork( ililo+k-1 )

            iwork( irsrc+k-1 ) = indxg2p( iwork(ililo+k-1), nb, myrow,

     $           desct( rsrc_ ), nprow )

            iwork( icsrc+k-1 ) = indxg2p( iwork(ililo+k-1), nb, mycol,

     $           desct( csrc_ ), npcol )

            tilo = iwork(ililo+k-1)

            tihi = min( n, iceil( tilo, nb ) * nb )

            DO 90 kk = tihi, tilo, -1

               IF( SELECT(kk).NE.0 ) THEN

                  iwork(ilihi+k-1) = max(iwork(ilihi+k-1) , kk )

                  iwork(ilsel+k-1) = iwork(ilsel+k-1) + 1

                  IF( iwork(ilsel+k-1).GT.wineig ) THEN

                     iwork(ilihi+k-1) = kk

                     iwork(ilsel+k-1) = 1

                  END IF

               END IF

 90         CONTINUE

*

*           Fix each LIHI to avoid that bottom of window cuts 2-by-2

*           block. We exclude such a block if located on block (process)

*           border and on window border or if an inclusion would cause

*           violation on the maximum number of eigenvalues to reorder

*           inside each window. If only on window border, we include it.

*           The excluded block is included automatically later when a

*           subcluster is reordered into the block from South-East.

*

            lihi = iwork(ilihi+k-1)

            IF( lihi.LT.n ) THEN

               CALL pselget( 'All', top, elem, t, lihi+1, lihi, desct )

               IF( elem.NE.zero ) THEN

                  IF( iceil( lihi, nb ) .NE. iceil( lihi+1, nb ) .OR.

     $                 iwork( ilsel+k-1 ).EQ.wineig ) THEN

                     iwork( ilihi+k-1 ) = iwork( ilihi+k-1 ) - 1

                     IF( iwork( ilsel+k-1 ).GT.2 )

     $                  iwork( ilsel+k-1 ) = iwork( ilsel+k-1 ) - 1

                  ELSE

                     iwork( ilihi+k-1 ) = iwork( ilihi+k-1 ) + 1

                     IF( SELECT(lihi+1).NE.0 )

     $                  iwork( ilsel+k-1 ) = iwork( ilsel+k-1 ) + 1

                  END IF

               END IF

            END IF

 80      CONTINUE

*

*        Fix the special cases of LSEL = 0 and LILO = LIHI for each

*        window by assuring that the stop-condition for local reordering

*        is fulfilled directly. Do this by setting LIHI = startposition

*        for the corresponding block and LILO = LIHI + 1.

*

         DO 85 k = 1, nmwin2

            lilo = iwork( ililo + k - 1 )

            lihi = iwork( ilihi + k - 1 )

            lsel = iwork( ilsel + k - 1 )

            IF( lsel.EQ.0 .OR. lilo.EQ.lihi ) THEN

               lihi = iwork( ilihi + k - 1 )

               iwork( ilihi + k - 1 ) = (iceil(lihi,nb)-1)*nb + 1

               iwork( ililo + k - 1 ) = iwork( ilihi + k - 1 ) + 1

            END IF

 85      CONTINUE

*

*        Associate all processors with the first computational window

*        that should be activated, if possible.

*

         lilo = ihi

         lihi = ilo

         lsel = m

         first = .true.

         DO 95 window = 1, nmwin2

            rsrc = iwork(irsrc+window-1)

            csrc = iwork(icsrc+window-1)

            IF( myrow.EQ.rsrc .OR. mycol.EQ.csrc ) THEN

               tlilo = iwork( ililo + window - 1 )

               tlihi = iwork( ilihi + window - 1 )

               tlsel = iwork( ilsel + window - 1 )

               IF( (.NOT. ( lihi .GE. lilo + lsel ) ) .AND.

     $              ( (tlihi .GE. tlilo + tlsel) .OR. first ) ) THEN

                  IF( first ) first = .false.

                  lilo = tlilo

                  lihi = tlihi

                  lsel = tlsel

                  GO TO 97

               END IF

            END IF

 95      CONTINUE

 97      CONTINUE

*

*        Exclude all processors that are not involved in any

*        computational window right now.

*

         ierr = 0

         IF( lilo.EQ.ihi .AND. lihi.EQ.ilo .AND. lsel.EQ.m )

     $      GO TO 114

*

*        Make sure all processors associated with a compuational window

*        enter the local reordering the first time.

*

         first = .true.

*

*        Globally for all computational windows:

*        While ( LIHI >= LILO + LSEL ) do

         round = 1

 130     CONTINUE

         IF( first .OR. ( lihi .GE. lilo + lsel ) ) THEN

*

*           Perform computations in parallel: loop through all

*           compuational windows, do local reordering and accumulate

*           transformations, broadcast them in the corresponding block

*           row and columns and compute the corresponding updates.

*

            DO 110 window = 1, nmwin2

               rsrc = iwork(irsrc+window-1)

               csrc = iwork(icsrc+window-1)

*

*              The process on the block diagonal computes the

*              reordering.

*

               IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN

                  lilo = iwork(ililo+window-1)

                  lihi = iwork(ilihi+window-1)

                  lsel = iwork(ilsel+window-1)

*

*                 Compute the local value of I -- start position.

*

                  i = max( lilo, lihi - winsiz + 1 )

*

*                 Fix my I to avoid that top of window cuts a 2-by-2

*                 block.

*

                  IF( i.GT.lilo ) THEN

                     CALL infog2l( i, i-1, desct, nprow, npcol, myrow,

     $                    mycol, iloc, jloc, rsrc, csrc )

                     IF( t( lldt*(jloc-1) + iloc ).NE.zero )

     $                  i = i + 1

                  END IF

*

*                 Compute local indicies for submatrix to operate on.

*

                  CALL infog2l( i, i, desct, nprow, npcol,

     $                 myrow, mycol, iloc1, jloc1, rsrc, csrc )

*

*                 The active window is ( I:LIHI, I:LIHI ). Reorder

*                 eigenvalues within this window and pipeline

*                 transformations.

*

                  nwin = lihi - i + 1

                  ks = 0

                  pitraf = ipiw

                  pdtraf = ipw2

*

                  pair = .false.

                  DO 140 k = i, lihi

                     IF( pair ) THEN

                        pair = .false.

                     ELSE

                        swap = SELECT( k ).NE.0

                        IF( k.LT.lihi ) THEN

                           CALL infog2l( k+1, k, desct, nprow, npcol,

     $                          myrow, mycol, iloc, jloc, rsrc, csrc )

                           IF( t( lldt*(jloc-1) + iloc ).NE.zero )

     $                        pair = .true.

                        END IF

                        IF( swap ) THEN

                           ks = ks + 1

*

*                       Swap the K-th block to position I+KS-1.

*

                           ierr = 0

                           kk  = k - i + 1

                           kks = ks

                           IF( kk.NE.ks ) THEN

                              nitraf = liwork - pitraf + 1

                              ndtraf = lwork - pdtraf + 1

                              CALL bstrexc( nwin,

     $                             t(lldt*(jloc1-1) + iloc1), lldt, kk,

     $                             kks, nitraf, iwork( pitraf ), ndtraf,

     $                             work( pdtraf ), work(ipw1), ierr )

                              pitraf = pitraf + nitraf

                              pdtraf = pdtraf + ndtraf

*

*                             Update array SELECT.

*

                              IF ( pair ) THEN

                                 DO 150 j = i+kk-1, i+kks, -1

                                    SELECT(j+1) = SELECT(j-1)

 150                             CONTINUE

                                 SELECT(i+kks-1) = 1

                                 SELECT(i+kks) = 1

                              ELSE

                                 DO 160 j = i+kk-1, i+kks, -1

                                    SELECT(j) = SELECT(j-1)

 160                             CONTINUE

                                 SELECT(i+kks-1) = 1

                              END IF

*

                              IF ( ierr.EQ.1 .OR. ierr.EQ.2 ) THEN

*

*                                Some blocks are too close to swap:

*                                prepare to leave in a clean fashion. If

*                                IERR.EQ.2, we must update SELECT to

*                                account for the fact that the 2 by 2

*                                block to be reordered did split and the

*                                first part of this block is already

*                                reordered.

*

                                 IF ( ierr.EQ.2 ) THEN

                                    SELECT( i+kks-3 ) = 1

                                    SELECT( i+kks-1 ) = 0

                                    kks = kks + 1

                                 END IF

*

*                                Update off-diagonal blocks immediately.

*

                                 GO TO 170

                              END IF

                              ks = kks

                           END IF

                           IF( pair )

     $                        ks = ks + 1

                        END IF

                     END IF

 140              CONTINUE

               END IF

 110        CONTINUE

 170        CONTINUE

*

*           The on-diagonal processes save their information from the

*           local reordering in the integer buffer. This buffer is

*           broadcasted to updating processors, see below.

*

            DO 175 window = 1, nmwin2

               rsrc = iwork(irsrc+window-1)

               csrc = iwork(icsrc+window-1)

               IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN

                  ibuff( 1 ) = i

                  ibuff( 2 ) = nwin

                  ibuff( 3 ) = pitraf

                  ibuff( 4 ) = ks

                  ibuff( 5 ) = pdtraf

                  ibuff( 6 ) = ndtraf

                  ilen = pitraf - ipiw

                  dlen = pdtraf - ipw2

                  ibuff( 7 ) = ilen

                  ibuff( 8 ) = dlen

               END IF

 175        CONTINUE

*

*           For the updates with respect to the local reordering, we

*           organize the updates in two phases where the update

*           "direction" (controlled by the DIR variable below) is first

*           chosen to be the corresponding rows, then the corresponding

*           columns.

*

            DO 1111 dir = 1, 2

*

*           Broadcast information about the reordering and the

*           accumulated transformations: I, NWIN, PITRAF, NITRAF,

*           PDTRAF, NDTRAF. If no broadcast is performed, use an

*           artificial value of KS to prevent updating indicies for

*           windows already finished (use KS = -1).

*

            DO 111 window = 1, nmwin2

               rsrc = iwork(irsrc+window-1)

               csrc = iwork(icsrc+window-1)

               IF( myrow.EQ.rsrc .OR. mycol.EQ.csrc ) THEN

                  lilo = iwork(ililo+window-1)

                  lihi = iwork(ilihi+window-1)

                  lsel = iwork(ilsel+window-1)

               END IF

               IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN

                  IF( npcol.GT.1 .AND. dir.EQ.1 )

     $               CALL igebs2d( ictxt, 'Row', top, 8, 1, ibuff, 8 )

                  IF( nprow.GT.1 .AND. dir.EQ.2 )

     $                 CALL igebs2d( ictxt, 'Col', top, 8, 1, ibuff, 8 )

               ELSEIF( myrow.EQ.rsrc .OR. mycol.EQ.csrc ) THEN

                  IF( npcol.GT.1 .AND. dir.EQ.1 .AND. myrow.EQ.rsrc )

     $                 THEN

                     IF( first .OR. (lihi .GE. lilo + lsel) ) THEN

                        CALL igebr2d( ictxt, 'Row', top, 8, 1, ibuff, 8,

     $                       rsrc, csrc )

                        i = ibuff( 1 )

                        nwin = ibuff( 2 )

                        pitraf = ibuff( 3 )

                        ks = ibuff( 4 )

                        pdtraf = ibuff( 5 )

                        ndtraf = ibuff( 6 )

                        ilen = ibuff( 7 )

                        dlen = ibuff( 8 )

                     ELSE

                        ilen = 0

                        dlen = 0

                        ks = -1

                     END IF

                  END IF

                  IF( nprow.GT.1 .AND. dir.EQ.2 .AND. mycol.EQ.csrc )

     $                 THEN

                     IF( first .OR. (lihi .GE. lilo + lsel) ) THEN

                        CALL igebr2d( ictxt, 'Col', top, 8, 1, ibuff, 8,

     $                       rsrc, csrc )

                        i = ibuff( 1 )

                        nwin = ibuff( 2 )

                        pitraf = ibuff( 3 )

                        ks = ibuff( 4 )

                        pdtraf = ibuff( 5 )

                        ndtraf = ibuff( 6 )

                        ilen = ibuff( 7 )

                        dlen = ibuff( 8 )

                     ELSE

                        ilen = 0

                        dlen = 0

                        ks = -1

                     END IF

                  END IF

               END IF

*

*              Broadcast the accumulated transformations - copy all

*              information from IWORK(IPIW:PITRAF-1) and

*              WORK(IPW2:PDTRAF-1) to a buffer and broadcast this

*              buffer in the corresponding block row and column.  On

*              arrival, copy the information back to the correct part of

*              the workspace. This step is avoided if no computations

*              were performed at the diagonal processor, i.e.,

*              BUFFLEN = 0.

*

               IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc ) THEN

                  buffer = pdtraf

                  bufflen = dlen + ilen

                  IF( bufflen.NE.0 ) THEN

                     DO 180 indx = 1, ilen

                        work( buffer+indx-1 ) =

     $                       float( iwork(ipiw+indx-1) )

 180                 CONTINUE

                     CALL slamov( 'All', dlen, 1, work( ipw2 ),

     $                    dlen, work(buffer+ilen), dlen )

                     IF( npcol.GT.1 .AND. dir.EQ.1 ) THEN

                        CALL sgebs2d( ictxt, 'Row', top, bufflen, 1,

     $                       work(buffer), bufflen )

                     END IF

                     IF( nprow.GT.1 .AND. dir.EQ.2 ) THEN

                        CALL sgebs2d( ictxt, 'Col', top, bufflen, 1,

     $                       work(buffer), bufflen )

                     END IF

                  END IF

               ELSEIF( myrow.EQ.rsrc .OR. mycol.EQ.csrc ) THEN

                  IF( npcol.GT.1 .AND. dir.EQ.1 .AND. myrow.EQ.rsrc )

     $                 THEN

                     buffer = pdtraf

                     bufflen = dlen + ilen

                     IF( bufflen.NE.0 ) THEN

                        CALL sgebr2d( ictxt, 'Row', top, bufflen, 1,

     $                       work(buffer), bufflen, rsrc, csrc )

                     END IF

                  END IF

                  IF( nprow.GT.1 .AND. dir.EQ.2 .AND. mycol.EQ.csrc )

     $                 THEN

                     buffer = pdtraf

                     bufflen = dlen + ilen

                     IF( bufflen.NE.0 ) THEN

                        CALL sgebr2d( ictxt, 'Col', top, bufflen, 1,

     $                       work(buffer), bufflen, rsrc, csrc )

                     END IF

                  END IF

                  IF((npcol.GT.1.AND.dir.EQ.1.AND.myrow.EQ.rsrc).OR.

     $                 (nprow.GT.1.AND.dir.EQ.2.AND.mycol.EQ.csrc ) )

     $                 THEN

                     IF( bufflen.NE.0 ) THEN

                        DO 190 indx = 1, ilen

                           iwork(ipiw+indx-1) =

     $                          int(work( buffer+indx-1 ))

 190                    CONTINUE

                        CALL slamov( 'All', dlen, 1,

     $                       work( buffer+ilen ), dlen,

     $                       work( ipw2 ), dlen )

                     END IF

                  END IF

               END IF

 111        CONTINUE

*

*           Now really perform the updates by applying the orthogonal

*           transformations to the out-of-window parts of T and Q. This

*           step is avoided if no reordering was performed by the on-

*           diagonal processor from the beginning, i.e., BUFFLEN = 0.

*

*           Count number of operations to decide whether to use

*           matrix-matrix multiplications for updating off-diagonal

*           parts or not.

*

            DO 112 window = 1, nmwin2

               rsrc = iwork(irsrc+window-1)

               csrc = iwork(icsrc+window-1)

*

               IF( (myrow.EQ.rsrc .AND. dir.EQ.1 ).OR.

     $              (mycol.EQ.csrc .AND. dir.EQ.2 ) ) THEN

                  lilo = iwork(ililo+window-1)

                  lihi = iwork(ilihi+window-1)

                  lsel = iwork(ilsel+window-1)

*

*                 Skip update part for current WINDOW if BUFFLEN = 0.

*

                  IF( bufflen.EQ.0 ) GO TO 295

*

                  nitraf = pitraf - ipiw

                  ishh = .false.

                  flops = 0

                  DO 200 k = 1, nitraf

                     IF( iwork( ipiw + k - 1 ).LE.nwin ) THEN

                        flops = flops + 6

                     ELSE

                        flops = flops + 11

                        ishh = .true.

                     END IF

 200              CONTINUE

*

*                 Compute amount of work space necessary for performing

*                 matrix-matrix multiplications.

*

                  pdw = buffer

                  ipw3 = pdw + nwin*nwin

               ELSE

                  flops = 0

               END IF

*

               IF( flops.NE.0 .AND.

     $              ( flops*100 ) / ( 2*nwin*nwin ) .GE. mmult ) THEN

*

*                 Update off-diagonal blocks and Q using matrix-matrix

*                 multiplications; if there are no Householder

*                 reflectors it is preferable to take the triangular

*                 block structure of the transformation matrix into

*                 account.

*

                  CALL slaset( 'All', nwin, nwin, zero, one,

     $                 work( pdw ), nwin )

                  CALL bslaapp( 1, nwin, nwin, ncb, work( pdw ), nwin,

     $                 nitraf, iwork(ipiw), work( ipw2 ), work(ipw3) )

*

                  IF( ishh ) THEN

*

*                    Loop through the local blocks of the distributed

*                    matrices T and Q and update them according to the

*                    performed reordering.

*

*                    Update the columns of T and Q affected by the

*                    reordering.

*

                     IF( dir.EQ.2 ) THEN

                        DO 210 indx = 1, i-1, nb

                           CALL infog2l( indx, i, desct, nprow, npcol,

     $                          myrow, mycol, iloc, jloc, rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )

     $                          THEN

                              lrows = min(nb,i-indx)

                              CALL sgemm( 'No transpose',

     $                             'No transpose', lrows, nwin, nwin,

     $                             one, t((jloc-1)*lldt+iloc), lldt,

     $                             work( pdw ), nwin, zero,

     $                             work(ipw3), lrows )

                              CALL slamov( 'All', lrows, nwin,

     $                             work(ipw3), lrows,

     $                             t((jloc-1)*lldt+iloc), lldt )

                           END IF

 210                    CONTINUE

                        IF( wantq ) THEN

                           DO 220 indx = 1, n, nb

                              CALL infog2l( indx, i, descq, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc1, csrc1 )

                              IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )

     $                             THEN

                                 lrows = min(nb,n-indx+1)

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', lrows, nwin, nwin,

     $                                one, q((jloc-1)*lldq+iloc), lldq,

     $                                work( pdw ), nwin, zero,

     $                                work(ipw3), lrows )

                                 CALL slamov( 'All', lrows, nwin,

     $                                work(ipw3), lrows,

     $                                q((jloc-1)*lldq+iloc), lldq )

                              END IF

 220                       CONTINUE

                        END IF

                     END IF

*

*                    Update the rows of T affected by the reordering

*

                     IF( dir.EQ.1 ) THEN

                        IF( lihi.LT.n ) THEN

                           IF( mod(lihi,nb).GT.0 ) THEN

                              indx = lihi + 1

                              CALL infog2l( i, indx, desct, nprow,

     $                            npcol, myrow, mycol, iloc, jloc,

     $                            rsrc1, csrc1 )

                              IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )

     $                             THEN

                                 lcols = mod( min( nb-mod(lihi,nb),

     $                                n-lihi ), nb )

                                 CALL sgemm( 'Transpose',

     $                                'No Transpose', nwin, lcols, nwin,

     $                                one, work( pdw ), nwin,

     $                                t((jloc-1)*lldt+iloc), lldt, zero,

     $                                work(ipw3), nwin )

                                 CALL slamov( 'All', nwin, lcols,

     $                                work(ipw3), nwin,

     $                                t((jloc-1)*lldt+iloc), lldt )

                              END IF

                           END IF

                           indxs = iceil(lihi,nb)*nb + 1

                           DO 230 indx = indxs, n, nb

                              CALL infog2l( i, indx, desct, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc1, csrc1 )

                              IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )

     $                             THEN

                                 lcols = min( nb, n-indx+1 )

                                 CALL sgemm( 'Transpose',

     $                                'No Transpose', nwin, lcols, nwin,

     $                                one, work( pdw ), nwin,

     $                                t((jloc-1)*lldt+iloc), lldt, zero,

     $                                work(ipw3), nwin )

                                 CALL slamov( 'All', nwin, lcols,

     $                                work(ipw3), nwin,

     $                                t((jloc-1)*lldt+iloc), lldt )

                              END IF

 230                       CONTINUE

                        END IF

                     END IF

                  ELSE

*

*                    The NWIN-by-NWIN matrix U containing the

*                    accumulated orthogonal transformations has the

*                    following structure:

*

*                                  [ U11  U12 ]

*                              U = [          ],

*                                  [ U21  U22 ]

*

*                    where U21 is KS-by-KS upper triangular and U12 is

*                    (NWIN-KS)-by-(NWIN-KS) lower triangular.

*

*                    Update the columns of T and Q affected by the

*                    reordering.

*

*                    Compute T2*U21 + T1*U11 in workspace.

*

                     IF( dir.EQ.2 ) THEN

                        DO 240 indx = 1, i-1, nb

                           CALL infog2l( indx, i, desct, nprow, npcol,

     $                          myrow, mycol, iloc, jloc, rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )

     $                          THEN

                              jloc1 = indxg2l( i+nwin-ks, nb, mycol,

     $                             desct( csrc_ ), npcol )

                              lrows = min(nb,i-indx)

                              CALL slamov( 'All', lrows, ks,

     $                             t((jloc1-1)*lldt+iloc ), lldt,

     $                             work(ipw3), lrows )

                              CALL strmm( 'Right', 'Upper',

     $                              'No transpose',

     $                             'Non-unit', lrows, ks, one,

     $                             work( pdw+nwin-ks ), nwin,

     $                             work(ipw3), lrows )

                              CALL sgemm( 'No transpose',

     $                             'No transpose', lrows, ks, nwin-ks,

     $                             one, t((jloc-1)*lldt+iloc), lldt,

     $                             work( pdw ), nwin, one, work(ipw3),

     $                             lrows )

*

*                             Compute T1*U12 + T2*U22 in workspace.

*

                              CALL slamov( 'All', lrows, nwin-ks,

     $                             t((jloc-1)*lldt+iloc), lldt,

     $                             work( ipw3+ks*lrows ), lrows )

                              CALL strmm( 'Right', 'Lower',

     $                             'No transpose', 'Non-unit',

     $                             lrows, nwin-ks, one,

     $                             work( pdw+nwin*ks ), nwin,

     $                             work( ipw3+ks*lrows ), lrows )

                              CALL sgemm( 'No transpose',

     $                             'No transpose', lrows, nwin-ks, ks,

     $                             one, t((jloc1-1)*lldt+iloc), lldt,

     $                             work( pdw+nwin*ks+nwin-ks ), nwin,

     $                             one, work( ipw3+ks*lrows ), lrows )

*

*                             Copy workspace to T.

*

                              CALL slamov( 'All', lrows, nwin,

     $                             work(ipw3), lrows,

     $                             t((jloc-1)*lldt+iloc), lldt )

                           END IF

 240                    CONTINUE

                        IF( wantq ) THEN

*

*                          Compute Q2*U21 + Q1*U11 in workspace.

*

                           DO 250 indx = 1, n, nb

                              CALL infog2l( indx, i, descq, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc1, csrc1 )

                              IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )

     $                             THEN

                                 jloc1 = indxg2l( i+nwin-ks, nb,

     $                                mycol, descq( csrc_ ), npcol )

                                 lrows = min(nb,n-indx+1)

                                 CALL slamov( 'All', lrows, ks,

     $                                q((jloc1-1)*lldq+iloc ), lldq,

     $                                work(ipw3), lrows )

                                 CALL strmm( 'Right', 'Upper',

     $                                'No transpose', 'Non-unit',

     $                                lrows, ks, one,

     $                                work( pdw+nwin-ks ), nwin,

     $                                work(ipw3), lrows )

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', lrows, ks,

     $                                nwin-ks, one,

     $                                q((jloc-1)*lldq+iloc), lldq,

     $                                work( pdw ), nwin, one,

     $                                work(ipw3), lrows )

*

*                                Compute Q1*U12 + Q2*U22 in workspace.

*

                                 CALL slamov( 'All', lrows, nwin-ks,

     $                                q((jloc-1)*lldq+iloc), lldq,

     $                                work( ipw3+ks*lrows ), lrows)

                                 CALL strmm( 'Right', 'Lower',

     $                                'No transpose', 'Non-unit',

     $                                lrows, nwin-ks, one,

     $                                work( pdw+nwin*ks ), nwin,

     $                                work( ipw3+ks*lrows ), lrows)

                                 CALL sgemm( 'No transpose',

     $                                'No transpose', lrows, nwin-ks,

     $                                ks, one, q((jloc1-1)*lldq+iloc),

     $                                lldq, work(pdw+nwin*ks+nwin-ks),

     $                                nwin, one, work( ipw3+ks*lrows ),

     $                                lrows )

*

*                                Copy workspace to Q.

*

                                 CALL slamov( 'All', lrows, nwin,

     $                                work(ipw3), lrows,

     $                                q((jloc-1)*lldq+iloc), lldq )

                              END IF

 250                       CONTINUE

                        END IF

                     END IF

*

                     IF( dir.EQ.1 ) THEN

                        IF ( lihi.LT.n ) THEN

*

*                          Compute U21**T*T2 + U11**T*T1 in workspace.

*

                           IF( mod(lihi,nb).GT.0 ) THEN

                              indx = lihi + 1

                              CALL infog2l( i, indx, desct, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc1, csrc1 )

                              IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )

     $                             THEN

                                 iloc1 = indxg2l( i+nwin-ks, nb, myrow,

     $                                desct( rsrc_ ), nprow )

                                 lcols = mod( min( nb-mod(lihi,nb),

     $                                n-lihi ), nb )

                                 CALL slamov( 'All', ks, lcols,

     $                                t((jloc-1)*lldt+iloc1), lldt,

     $                                work(ipw3), nwin )

                                 CALL strmm( 'Left', 'Upper',

     $                                'Transpose', 'Non-unit', ks,

     $                                lcols, one, work( pdw+nwin-ks ),

     $                                nwin, work(ipw3), nwin )

                                 CALL sgemm( 'Transpose',

     $                                'No transpose', ks, lcols,

     $                                nwin-ks, one, work(pdw), nwin,

     $                                t((jloc-1)*lldt+iloc), lldt, one,

     $                                work(ipw3), nwin )

*

*                                Compute U12**T*T1 + U22**T*T2 in

*                                workspace.

*

                                 CALL slamov( 'All', nwin-ks, lcols,

     $                                t((jloc-1)*lldt+iloc), lldt,

     $                                work( ipw3+ks ), nwin )

                                 CALL strmm( 'Left', 'Lower',

     $                                'Transpose', 'Non-unit',

     $                                nwin-ks, lcols, one,

     $                                work( pdw+nwin*ks ), nwin,

     $                                work( ipw3+ks ), nwin )

                                 CALL sgemm( 'Transpose',

     $                                'No Transpose', nwin-ks, lcols,

     $                                ks, one,

     $                                work( pdw+nwin*ks+nwin-ks ),

     $                                nwin, t((jloc-1)*lldt+iloc1),

     $                                lldt, one, work( ipw3+ks ),

     $                                nwin )

*

*                                Copy workspace to T.

*

                                 CALL slamov( 'All', nwin, lcols,

     $                                work(ipw3), nwin,

     $                                t((jloc-1)*lldt+iloc), lldt )

                              END IF

                           END IF

                           indxs = iceil(lihi,nb)*nb + 1

                           DO 260 indx = indxs, n, nb

                              CALL infog2l( i, indx, desct, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc1, csrc1 )

                              IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc1 )

     $                             THEN

*

*                                Compute U21**T*T2 + U11**T*T1 in

*                                workspace.

*

                                 iloc1 = indxg2l( i+nwin-ks, nb,

     $                                myrow, desct( rsrc_ ), nprow )

                                 lcols = min( nb, n-indx+1 )

                                 CALL slamov( 'All', ks, lcols,

     $                                t((jloc-1)*lldt+iloc1), lldt,

     $                                work(ipw3), nwin )

                                 CALL strmm( 'Left', 'Upper',

     $                                'Transpose', 'Non-unit', ks,

     $                                lcols, one,

     $                                work( pdw+nwin-ks ), nwin,

     $                                work(ipw3), nwin )

                                 CALL sgemm( 'Transpose',

     $                                'No transpose', ks, lcols,

     $                                nwin-ks, one, work(pdw), nwin,

     $                                t((jloc-1)*lldt+iloc), lldt, one,

     $                                work(ipw3), nwin )

*

*                                Compute U12**T*T1 + U22**T*T2 in

*                                workspace.

*

                                 CALL slamov( 'All', nwin-ks, lcols,

     $                                t((jloc-1)*lldt+iloc), lldt,

     $                                work( ipw3+ks ), nwin )

                                 CALL strmm( 'Left', 'Lower',

     $                                'Transpose', 'Non-unit',

     $                                nwin-ks, lcols, one,

     $                                work( pdw+nwin*ks ), nwin,

     $                                work( ipw3+ks ), nwin )

                                 CALL sgemm( 'Transpose',

     $                                'No Transpose', nwin-ks, lcols,

     $                                ks, one,

     $                                work( pdw+nwin*ks+nwin-ks ),

     $                                nwin, t((jloc-1)*lldt+iloc1),

     $                                lldt, one, work(ipw3+ks), nwin )

*

*                                Copy workspace to T.

*

                                 CALL slamov( 'All', nwin, lcols,

     $                                work(ipw3), nwin,

     $                                t((jloc-1)*lldt+iloc), lldt )

                              END IF

 260                       CONTINUE

                        END IF

                     END IF

                  END IF

               ELSEIF( flops.NE.0 ) THEN

*

*                 Update off-diagonal blocks and Q using the pipelined

*                 elementary transformations.

*

                  IF( dir.EQ.2 ) THEN

                     DO 270 indx = 1, i-1, nb

                        CALL infog2l( indx, i, desct, nprow, npcol,

     $                       myrow, mycol, iloc, jloc, rsrc1, csrc1 )

                        IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                           lrows = min(nb,i-indx)

                           CALL bslaapp( 1, lrows, nwin, ncb,

     $                          t((jloc-1)*lldt+iloc ), lldt, nitraf,

     $                          iwork(ipiw), work( ipw2 ),

     $                          work(ipw3) )

                        END IF

 270                 CONTINUE

                     IF( wantq ) THEN

                        DO 280 indx = 1, n, nb

                           CALL infog2l( indx, i, descq, nprow, npcol,

     $                          myrow, mycol, iloc, jloc, rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )

     $                          THEN

                              lrows = min(nb,n-indx+1)

                              CALL bslaapp( 1, lrows, nwin, ncb,

     $                             q((jloc-1)*lldq+iloc), lldq, nitraf,

     $                             iwork(ipiw), work( ipw2 ),

     $                             work(ipw3) )

                           END IF

 280                    CONTINUE

                     END IF

                  END IF

                  IF( dir.EQ.1 ) THEN

                     IF( lihi.LT.n ) THEN

                        IF( mod(lihi,nb).GT.0 ) THEN

                           indx = lihi + 1

                           CALL infog2l( i, indx, desct, nprow, npcol,

     $                          myrow, mycol, iloc, jloc, rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )

     $                          THEN

                              lcols = mod( min( nb-mod(lihi,nb),

     $                             n-lihi ), nb )

                              CALL bslaapp( 0, nwin, lcols, ncb,

     $                             t((jloc-1)*lldt+iloc), lldt, nitraf,

     $                             iwork(ipiw), work( ipw2 ),

     $                             work(ipw3) )

                           END IF

                        END IF

                        indxs = iceil(lihi,nb)*nb + 1

                        DO 290 indx = indxs, n, nb

                           CALL infog2l( i, indx, desct, nprow, npcol,

     $                          myrow, mycol, iloc, jloc, rsrc1, csrc1 )

                           IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 )

     $                          THEN

                              lcols = min( nb, n-indx+1 )

                              CALL bslaapp( 0, nwin, lcols, ncb,

     $                             t((jloc-1)*lldt+iloc), lldt, nitraf,

     $                             iwork(ipiw), work( ipw2 ),

     $                             work(ipw3) )

                           END IF

 290                    CONTINUE

                     END IF

                  END IF

               END IF

*

*              If I was not involved in the updates for the current

*              window or the window was fully processed, I go here and

*              try again for the next window.

*

 295           CONTINUE

*

*              Update LIHI and LIHI depending on the number of

*              eigenvalues really moved - for on-diagonal processes we

*              do this update only once since each on-diagonal process

*              is only involved with one window at one time. The

*              indicies are updated in three cases:

*                1) When some reordering was really performed

*                   -- indicated by BUFFLEN > 0.

*                2) When no selected eigenvalues was found in the

*                   current window -- indicated by KS = 0.

*                3) When some selected eigenvalues was found in the

*                   current window but no one of them was moved

*                   (KS > 0 and BUFFLEN = 0)

*              False index updating is avoided by sometimes setting

*              KS = -1. This will affect processors involved in more

*              than one window and where the first one ends up with

*              KS = 0 and for the second one is done already.

*

               IF( myrow.EQ.rsrc.AND.mycol.EQ.csrc ) THEN

                  IF( dir.EQ.2 ) THEN

                     IF( bufflen.NE.0 .OR. ks.EQ.0 .OR.

     $                    ( bufflen.EQ.0 .AND. ks.GT.0 ) )

     $                  lihi = i + ks - 1

                     iwork( ilihi+window-1 ) = lihi

                     IF( .NOT. lihi.GE.lilo+lsel ) THEN

                        lilo = lilo + lsel

                        iwork( ililo+window-1 ) = lilo

                     END IF

                  END IF

               ELSEIF( myrow.EQ.rsrc .AND. dir.EQ.1 ) THEN

                  IF( bufflen.NE.0 .OR. ks.EQ.0 .OR.

     $                 ( bufflen.EQ.0 .AND. ks.GT.0 ) )

     $               lihi = i + ks - 1

                  iwork( ilihi+window-1 ) = lihi

                  IF( .NOT. lihi.GE.lilo+lsel ) THEN

                     lilo = lilo + lsel

                     iwork( ililo+window-1 ) = lilo

                  END IF

               ELSEIF( mycol.EQ.csrc .AND. dir.EQ.2 ) THEN

                  IF( bufflen.NE.0 .OR. ks.EQ.0 .OR.

     $                 ( bufflen.EQ.0 .AND. ks.GT.0 ) )

     $               lihi = i + ks - 1

                  iwork( ilihi+window-1 ) = lihi

                  IF( .NOT. lihi.GE.lilo+lsel ) THEN

                     lilo = lilo + lsel

                     iwork( ililo+window-1 ) = lilo

                  END IF

               END IF

*

 112        CONTINUE

*

*           End of direction loop for updates with respect to local

*           reordering.

*

 1111       CONTINUE

*

*           Associate each process with one of the corresponding

*           computational windows such that the test for another round

*           of local reordering is carried out properly. Since the

*           column updates were computed after the row updates, it is

*           sufficient to test for changing the association to the

*           window in the corresponding process row.

*

            DO 113 window = 1, nmwin2

               rsrc = iwork( irsrc + window - 1 )

               IF( myrow.EQ.rsrc .AND. (.NOT. lihi.GE.lilo+lsel ) ) THEN

                  lilo = iwork( ililo + window - 1 )

                  lihi = iwork( ilihi + window - 1 )

                  lsel = iwork( ilsel + window - 1 )

               END IF

 113        CONTINUE

*

*           End While ( LIHI >= LILO + LSEL )

            round = round + 1

            IF( first ) first = .false.

            GO TO 130

         END IF

*

*        All processors excluded from the local reordering go here.

*

 114     CONTINUE

*

*        Barrier to collect the processes before proceeding.

*

         CALL blacs_barrier( ictxt, 'All' )

*

*        Compute global maximum of IERR so that we know if some process

*        experienced a failure in the reordering.

*

         myierr = ierr

         IF( nprocs.GT.1 ) THEN

            CALL igamx2d( ictxt, 'All', top, 1, 1, ierr, 1, -1,

     $           -1, -1, -1, -1 )

         END IF

*

         IF( ierr.NE.0 ) THEN

*

*           When calling BDTREXC, the block at position I+KKS-1 failed

*           to swap.

*

            IF( myierr.NE.0 ) info = max(1,i+kks-1)

            IF( nprocs.GT.1 ) THEN

               CALL igamx2d( ictxt, 'All', top, 1, 1, info, 1, -1,

     $              -1, -1, -1, -1 )

            END IF

            GO TO 300

         END IF

*

*        Now, for each compuational window, move the selected

*        eigenvalues across the process border. Do this by forming the

*        processors into groups of four working together to bring the

*        window over the border. The processes are numbered as follows

*

*                1 | 2

*                --+--

*                3 | 4

*

*        where '|' and '-' denotes the process (and block) borders.

*        This implies that the cluster to be reordered over the border

*        is held by process 4, process 1 will receive the cluster after

*        the reordering, process 3 holds the local (2,1)th element of a

*        2-by-2 diagonal block located on the block border and process 2

*        holds the closest off-diagonal part of the window that is

*        affected by the cross-border reordering.

*

*        The active window is now ( I : LIHI[4], I : LIHI[4] ), where

*        I = MAX( ILO, LIHI - 2*MOD(LIHI,NB) ). If this active window is

*        too large compared to the value of PARA( 6 ), it will be

*        truncated in both ends such that a maximum of PARA( 6 )

*        eigenvalues is reordered across the border this time.

*

*        The active window will be collected and built in workspace at

*        process 1 and 4, which both compute the reordering and return

*        the updated parts to the corresponding processes 2-3. Next, the

*        accumulated transformations are broadcasted for updates in the

*        block rows and column that corresponds to the process rows and

*        columns where process 1 and 4 reside.

*

*        The off-diagonal blocks are updated by the processes receiving

*        from the broadcasts of the orthogonal transformations. Since

*        the active window is split over the process borders, the

*        updates of T and Q requires that stripes of block rows of

*        columns are exchanged between neighboring processes in the

*        corresponding process rows and columns.

*

*        First, form each group of processors involved in the

*        crossborder reordering. Do this in two (or three) phases:

*        1) Reorder each odd window over the border.

*        2) Reorder each even window over the border.

*        3) Reorder the last odd window over the border, if it was not

*           processed in the first phase.

*

*        When reordering the odd windows over the border, we must make

*        sure that no process row or column is involved in both the

*        first and the last window at the same time. This happens when

*        the total number of windows is odd, greater than one and equal

*        to the minumum process mesh dimension. Therefore the last odd

*        window may be reordered over the border at last.

*

         lastwait = nmwin2.GT.1 .AND. mod(nmwin2,2).EQ.1 .AND.

     $        nmwin2.EQ.min(nprow,npcol)

*

         last = 0

 308     CONTINUE

         IF( lastwait ) THEN

            IF( last.EQ.0 ) THEN

               win0s = 1

               win0e = 2

               wine = nmwin2 - 1

            ELSE

               win0s = nmwin2

               win0e = nmwin2

               wine = nmwin2

            END IF

         ELSE

            win0s = 1

            win0e = 2

            wine = nmwin2

         END IF

         DO 310 window0 = win0s, win0e

            DO 320 window = window0, wine, 2

*

*              Define the process holding the down-right part of the

*              window.

*

               rsrc4 = iwork(irsrc+window-1)

               csrc4 = iwork(icsrc+window-1)

*

*              Define the other processes in the group of four.

*

               rsrc3 = rsrc4

               csrc3 = mod( csrc4 - 1 + npcol, npcol )

               rsrc2 = mod( rsrc4 - 1 + nprow, nprow )

               csrc2 = csrc4

               rsrc1 = rsrc2

               csrc1 = csrc3

               IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.

     $             ( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) .OR.

     $             ( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) .OR.

     $             ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN

*

*                 Compute the correct active window - for reordering

*                 into a block that has not been active at all before,

*                 we try to reorder as many of our eigenvalues over the

*                 border as possible without knowing of the situation on

*                 the other side - this may cause very few eigenvalues

*                 to be reordered over the border this time (perhaps not

*                 any) but this should be an initial problem.  Anyway,

*                 the bottom-right position of the block will be at

*                 position LIHIC.

*

                  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                     lihi4 = ( iwork( ililo + window - 1 ) +

     $                    iwork( ilihi + window - 1 ) ) / 2

                     lihic = min(lihi4,(iceil(lihi4,nb)-1)*nb+wneicr)

*

*                    Fix LIHIC to avoid that bottom of window cuts

*                    2-by-2 block and make sure all processors in the

*                    group knows about the correct value.

*

                     IF( (.NOT. lihic.LE.nb) .AND. lihic.LT.n ) THEN

                        iloc = indxg2l( lihic+1, nb, myrow,

     $                       desct( rsrc_ ), nprow )

                        jloc = indxg2l( lihic, nb, mycol,

     $                       desct( csrc_ ), npcol )

                        IF( t( (jloc-1)*lldt+iloc ).NE.zero ) THEN

                           IF( mod( lihic, nb ).EQ.1 .OR.

     $                          ( mod( lihic, nb ).EQ.2 .AND.

     $                          SELECT(lihic-2).EQ.0 ) )

     $                          THEN

                              lihic = lihic + 1

                           ELSE

                              lihic = lihic - 1

                           END IF

                        END IF

                     END IF

                     IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 )

     $                  CALL igesd2d( ictxt, 1, 1, lihic, 1, rsrc1,

     $                       csrc1 )

                     IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 )

     $                  CALL igesd2d( ictxt, 1, 1, lihic, 1, rsrc2,

     $                       csrc2 )

                     IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 )

     $                  CALL igesd2d( ictxt, 1, 1, lihic, 1, rsrc3,

     $                       csrc3 )

                  END IF

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 )

     $                  CALL igerv2d( ictxt, 1, 1, lihic, 1, rsrc4,

     $                       csrc4 )

                  END IF

                  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                     IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 )

     $                  CALL igerv2d( ictxt, 1, 1, lihic, 1, rsrc4,

     $                       csrc4 )

                  END IF

                  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                     IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 )

     $                  CALL igerv2d( ictxt, 1, 1, lihic, 1, rsrc4,

     $                       csrc4 )

                  END IF

*

*                 Avoid going over the border with the first window if

*                 it resides in the block where the last global position

*                 T(ILO,ILO) is or ILO has been updated to point to a

*                 position right of T(LIHIC,LIHIC).

*

                  skip1cr = window.EQ.1 .AND.

     $                 iceil(lihic,nb).LE.iceil(ilo,nb)

*

*                 Decide I, where to put top of window, such that top of

*                 window does not cut 2-by-2 block. Make sure that we do

*                 not end up in a situation where a 2-by-2 block

*                 splitted on the border is left in its original place

*                 -- this can cause infinite loops.

*                 Remedy: make sure that the part of the window that

*                 resides left to the border is at least of dimension

*                 two (2) in case we have 2-by-2 blocks in top of the

*                 cross border window.

*

*                 Also make sure all processors in the group knows about

*                 the correct value of I. When skipping the crossborder

*                 reordering, just set I = LIHIC.

*

                  IF( .NOT. skip1cr ) THEN

                     IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                        IF( window.EQ.1 ) THEN

                           lihi1 = ilo

                        ELSE

                           lihi1 = iwork( ilihi + window - 2 )

                        END IF

                        i = max( lihi1,

     $                       min( lihic-2*mod(lihic,nb) + 1,

     $                       (iceil(lihic,nb)-1)*nb - 1  ) )

                        iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),

     $                       nprow )

                        jloc = indxg2l( i-1, nb, mycol, desct( csrc_ ),

     $                       npcol )

                        IF( t( (jloc-1)*lldt+iloc ).NE.zero )

     $                     i = i - 1

                        IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 )

     $                     CALL igesd2d( ictxt, 1, 1, i, 1, rsrc4,

     $                          csrc4 )

                        IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 )

     $                     CALL igesd2d( ictxt, 1, 1, i, 1, rsrc2,

     $                          csrc2 )

                        IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 )

     $                     CALL igesd2d( ictxt, 1, 1, i, 1, rsrc3,

     $                          csrc3 )

                     END IF

                     IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                        IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 )

     $                     CALL igerv2d( ictxt, 1, 1, i, 1, rsrc1,

     $                          csrc1 )

                     END IF

                     IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                        IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 )

     $                     CALL igerv2d( ictxt, 1, 1, i, 1, rsrc1,

     $                          csrc1 )

                     END IF

                     IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                        IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 )

     $                     CALL igerv2d( ictxt, 1, 1, i, 1, rsrc1,

     $                          csrc1 )

                     END IF

                  ELSE

                     i = lihic

                  END IF

*

*                 Finalize computation of window size: active window is

*                 now (I:LIHIC,I:LIHIC).

*

                  nwin = lihic - i + 1

                  ks = 0

*

*                 Skip rest of this part if appropriate.

*

                  IF( skip1cr ) GO TO 360

*

*                 Divide workspace -- put active window in

*                 WORK(IPW2:IPW2+NWIN**2-1) and orthogonal

*                 transformations in WORK(IPW3:...).

*

                  CALL slaset( 'All', nwin, nwin, zero, zero,

     $                 work( ipw2 ), nwin )

*

                  pitraf = ipiw

                  ipw3 = ipw2 + nwin*nwin

                  pdtraf = ipw3

*

*                 Exchange the current view of SELECT for the active

*                 window between process 1 and 4 to make sure that

*                 exactly the same job is performed for both processes.

*

                  IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 ) THEN

                     ilen4 = mod(lihic,nb)

                     seli4 = iceil(i,nb)*nb+1

                     ilen1 = nwin - ilen4

                     IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                        CALL igesd2d( ictxt, ilen1, 1, SELECT(i),

     $                       ilen1, rsrc4, csrc4 )

                        CALL igerv2d( ictxt, ilen4, 1, SELECT(seli4),

     $                       ilen4, rsrc4, csrc4 )

                     END IF

                     IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                        CALL igesd2d( ictxt, ilen4, 1, SELECT(seli4),

     $                       ilen4, rsrc1, csrc1 )

                        CALL igerv2d( ictxt, ilen1, 1, SELECT(i),

     $                       ilen1, rsrc1, csrc1 )

                     END IF

                  END IF

*

*                 Form the active window by a series of point-to-point

*                 sends and receives.

*

                  dim1 = nb - mod(i-1,nb)

                  dim4 = nwin - dim1

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),

     $                    nprow )

                     jloc = indxg2l( i, nb, mycol, desct( csrc_ ),

     $                    npcol )

                     CALL slamov( 'All', dim1, dim1,

     $                    t((jloc-1)*lldt+iloc), lldt, work(ipw2),

     $                    nwin )

                     IF( rsrc1.NE.rsrc4 .OR. csrc1.NE.csrc4 ) THEN

                        CALL sgesd2d( ictxt, dim1, dim1,

     $                       work(ipw2), nwin, rsrc4, csrc4 )

                        CALL sgerv2d( ictxt, dim4, dim4,

     $                       work(ipw2+dim1*nwin+dim1), nwin, rsrc4,

     $                       csrc4 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                     iloc = indxg2l( i+dim1, nb, myrow, desct( rsrc_ ),

     $                    nprow )

                     jloc = indxg2l( i+dim1, nb, mycol, desct( csrc_ ),

     $                    npcol )

                     CALL slamov( 'All', dim4, dim4,

     $                    t((jloc-1)*lldt+iloc), lldt,

     $                    work(ipw2+dim1*nwin+dim1), nwin )

                     IF( rsrc4.NE.rsrc1 .OR. csrc4.NE.csrc1 ) THEN

                        CALL sgesd2d( ictxt, dim4, dim4,

     $                       work(ipw2+dim1*nwin+dim1), nwin, rsrc1,

     $                       csrc1 )

                        CALL sgerv2d( ictxt, dim1, dim1,

     $                       work(ipw2), nwin, rsrc1, csrc1 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                     iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),

     $                    nprow )

                     jloc = indxg2l( i+dim1, nb, mycol, desct( csrc_ ),

     $                    npcol )

                     CALL slamov( 'All', dim1, dim4,

     $                    t((jloc-1)*lldt+iloc), lldt,

     $                    work(ipw2+dim1*nwin), nwin )

                     IF( rsrc2.NE.rsrc1 .OR. csrc2.NE.csrc1 ) THEN

                        CALL sgesd2d( ictxt, dim1, dim4,

     $                       work(ipw2+dim1*nwin), nwin, rsrc1, csrc1 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                     IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN

                        CALL sgesd2d( ictxt, dim1, dim4,

     $                       work(ipw2+dim1*nwin), nwin, rsrc4, csrc4 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                     iloc = indxg2l( i+dim1, nb, myrow, desct( rsrc_ ),

     $                    nprow )

                     jloc = indxg2l( i+dim1-1, nb, mycol,

     $                    desct( csrc_ ), npcol )

                     CALL slamov( 'All', 1, 1,

     $                    t((jloc-1)*lldt+iloc), lldt,

     $                    work(ipw2+(dim1-1)*nwin+dim1), nwin )

                     IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN

                        CALL sgesd2d( ictxt, 1, 1,

     $                       work(ipw2+(dim1-1)*nwin+dim1), nwin,

     $                       rsrc1, csrc1 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                     IF( rsrc3.NE.rsrc4 .OR. csrc3.NE.csrc4 ) THEN

                        CALL sgesd2d( ictxt, 1, 1,

     $                       work(ipw2+(dim1-1)*nwin+dim1), nwin,

     $                       rsrc4, csrc4 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     IF( rsrc1.NE.rsrc2 .OR. csrc1.NE.csrc2 ) THEN

                        CALL sgerv2d( ictxt, dim1, dim4,

     $                       work(ipw2+dim1*nwin), nwin, rsrc2,

     $                       csrc2 )

                     END IF

                     IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN

                        CALL sgerv2d( ictxt, 1, 1,

     $                       work(ipw2+(dim1-1)*nwin+dim1), nwin,

     $                       rsrc3, csrc3 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                     IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN

                        CALL sgerv2d( ictxt, dim1, dim4,

     $                       work(ipw2+dim1*nwin), nwin, rsrc2,

     $                       csrc2 )

                     END IF

                     IF( rsrc4.NE.rsrc3 .OR. csrc4.NE.csrc3 ) THEN

                        CALL sgerv2d( ictxt, 1, 1,

     $                       work(ipw2+(dim1-1)*nwin+dim1), nwin,

     $                       rsrc3, csrc3 )

                     END IF

                  END IF

*

*                 Compute the reordering (just as in the total local

*                 case) and accumulate the transformations (ONLY

*                 ON-DIAGONAL PROCESSES).

*

                  IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.

     $                ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN

                     pair = .false.

                     DO 330 k = i, lihic

                        IF( pair ) THEN

                           pair = .false.

                        ELSE

                           swap = SELECT( k ).NE.0

                           IF( k.LT.lihic ) THEN

                              elem = work(ipw2+(k-i)*nwin+k-i+1)

                              IF( elem.NE.zero )

     $                           pair = .true.

                           END IF

                           IF( swap ) THEN

                              ks = ks + 1

*

*                             Swap the K-th block to position I+KS-1.

*

                              ierr = 0

                              kk  = k - i + 1

                              kks = ks

                              IF( kk.NE.ks ) THEN

                                 nitraf = liwork - pitraf + 1

                                 ndtraf = lwork - pdtraf + 1

                                 CALL bstrexc( nwin, work(ipw2), nwin,

     $                                kk, kks, nitraf, iwork( pitraf ),

     $                                ndtraf, work( pdtraf ),

     $                                work(ipw1), ierr )

                                 pitraf = pitraf + nitraf

                                 pdtraf = pdtraf + ndtraf

*

*                                Update array SELECT.

*

                                 IF ( pair ) THEN

                                    DO 340 j = i+kk-1, i+kks, -1

                                       SELECT(j+1) = SELECT(j-1)

 340                                CONTINUE

                                    SELECT(i+kks-1) = 1

                                    SELECT(i+kks) = 1

                                 ELSE

                                    DO 350 j = i+kk-1, i+kks, -1

                                       SELECT(j) = SELECT(j-1)

 350                                CONTINUE

                                    SELECT(i+kks-1) = 1

                                 END IF

*

                                 IF ( ierr.EQ.1 .OR. ierr.EQ.2 ) THEN

*

                                    IF ( ierr.EQ.2 ) THEN

                                       SELECT( i+kks-3 ) = 1

                                       SELECT( i+kks-1 ) = 0

                                       kks = kks + 1

                                    END IF

*

                                    GO TO 360

                                 END IF

                                 ks = kks

                              END IF

                              IF( pair )

     $                           ks = ks + 1

                           END IF

                        END IF

 330                 CONTINUE

                  END IF

 360              CONTINUE

*

*                 Save information about the reordering.

*

                  IF( ( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) .OR.

     $                 ( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) ) THEN

                     ibuff( 1 ) = i

                     ibuff( 2 ) = nwin

                     ibuff( 3 ) = pitraf

                     ibuff( 4 ) = ks

                     ibuff( 5 ) = pdtraf

                     ibuff( 6 ) = ndtraf

                     ilen = pitraf - ipiw + 1

                     dlen = pdtraf - ipw3 + 1

                     ibuff( 7 ) = ilen

                     ibuff( 8 ) = dlen

*

*                    Put reordered data back into global matrix if a

*                    reordering took place.

*

                     IF( .NOT. skip1cr ) THEN

                        IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                           iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),

     $                          nprow )

                           jloc = indxg2l( i, nb, mycol, desct( csrc_ ),

     $                          npcol )

                           CALL slamov( 'All', dim1, dim1, work(ipw2),

     $                          nwin, t((jloc-1)*lldt+iloc), lldt )

                        END IF

                        IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                           iloc = indxg2l( i+dim1, nb, myrow,

     $                          desct( rsrc_ ), nprow )

                           jloc = indxg2l( i+dim1, nb, mycol,

     $                          desct( csrc_ ), npcol )

                           CALL slamov( 'All', dim4, dim4,

     $                          work(ipw2+dim1*nwin+dim1), nwin,

     $                          t((jloc-1)*lldt+iloc), lldt )

                        END IF

                     END IF

                  END IF

*

*                 Break if appropriate -- IBUFF(3:8) may now contain

*                 nonsens, but that's no problem. The processors outside

*                 the cross border group only needs to know about I and

*                 NWIN to get a correct value of SKIP1CR (see below) and

*                 to skip the cross border updates if necessary.

*

                  IF( window.EQ.1 .AND. skip1cr ) GO TO 325

*

*                 Return reordered data to process 2 and 3.

*

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     IF( rsrc1.NE.rsrc3 .OR. csrc1.NE.csrc3 ) THEN

                        CALL sgesd2d( ictxt, 1, 1,

     $                       work( ipw2+(dim1-1)*nwin+dim1 ), nwin,

     $                       rsrc3, csrc3 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                     IF( rsrc4.NE.rsrc2 .OR. csrc4.NE.csrc2 ) THEN

                        CALL sgesd2d( ictxt, dim1, dim4,

     $                       work( ipw2+dim1*nwin), nwin, rsrc2,

     $                       csrc2 )

                     END IF

                  END IF

                  IF( myrow.EQ.rsrc2 .AND. mycol.EQ.csrc2 ) THEN

                     iloc = indxg2l( i, nb, myrow, desct( rsrc_ ),

     $                    nprow )

                     jloc = indxg2l( i+dim1, nb, mycol,

     $                    desct( csrc_ ), npcol )

                     IF( rsrc2.NE.rsrc4 .OR. csrc2.NE.csrc4 ) THEN

                        CALL sgerv2d( ictxt, dim1, dim4,

     $                       work(ipw2+dim1*nwin), nwin, rsrc4, csrc4 )

                     END IF

                     CALL slamov( 'All', dim1, dim4,

     $                    work( ipw2+dim1*nwin ), nwin,

     $                    t((jloc-1)*lldt+iloc), lldt )

                  END IF

                  IF( myrow.EQ.rsrc3 .AND. mycol.EQ.csrc3 ) THEN

                     iloc = indxg2l( i+dim1, nb, myrow,

     $                    desct( rsrc_ ), nprow )

                     jloc = indxg2l( i+dim1-1, nb, mycol,

     $                    desct( csrc_ ), npcol )

                     IF( rsrc3.NE.rsrc1 .OR. csrc3.NE.csrc1 ) THEN

                        CALL sgerv2d( ictxt, 1, 1,

     $                       work( ipw2+(dim1-1)*nwin+dim1 ), nwin,

     $                       rsrc1, csrc1 )

                     END IF

                     t((jloc-1)*lldt+iloc) =

     $                    work( ipw2+(dim1-1)*nwin+dim1 )

                  END IF

               END IF

*

 325           CONTINUE

*

 320        CONTINUE

*

*           For the crossborder updates, we use the same directions as

*           in the local reordering case above.

*

            DO 2222 dir = 1, 2

*

*              Broadcast information about the reordering.

*

               DO 321 window = window0, wine, 2

                  rsrc4 = iwork(irsrc+window-1)

                  csrc4 = iwork(icsrc+window-1)

                  rsrc1 = mod( rsrc4 - 1 + nprow, nprow )

                  csrc1 = mod( csrc4 - 1 + npcol, npcol )

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     IF( npcol.GT.1 .AND. dir.EQ.1 )

     $                  CALL igebs2d( ictxt, 'Row', top, 8, 1,

     $                       ibuff, 8 )

                     IF( nprow.GT.1 .AND. dir.EQ.2 )

     $                  CALL igebs2d( ictxt, 'Col', top, 8, 1,

     $                       ibuff, 8 )

                     skip1cr = window.EQ.1 .AND.

     $                    iceil(lihic,nb).LE.iceil(ilo,nb)

                  ELSEIF( myrow.EQ.rsrc1 .OR. mycol.EQ.csrc1 ) THEN

                     IF( npcol.GT.1 .AND. dir.EQ.1 .AND.

     $                    myrow.EQ.rsrc1 ) THEN

                        CALL igebr2d( ictxt, 'Row', top, 8, 1,

     $                       ibuff, 8, rsrc1, csrc1 )

                        i = ibuff( 1 )

                        nwin = ibuff( 2 )

                        pitraf = ibuff( 3 )

                        ks = ibuff( 4 )

                        pdtraf = ibuff( 5 )

                        ndtraf = ibuff( 6 )

                        ilen = ibuff( 7 )

                        dlen = ibuff( 8 )

                        bufflen = ilen + dlen

                        ipw3 = ipw2 + nwin*nwin

                        dim1 = nb - mod(i-1,nb)

                        dim4 = nwin - dim1

                        lihic = nwin + i - 1

                        skip1cr = window.EQ.1 .AND.

     $                       iceil(lihic,nb).LE.iceil(ilo,nb)

                     END IF

                     IF( nprow.GT.1 .AND. dir.EQ.2 .AND.

     $                    mycol.EQ.csrc1 ) THEN

                        CALL igebr2d( ictxt, 'Col', top, 8, 1,

     $                       ibuff, 8, rsrc1, csrc1 )

                        i = ibuff( 1 )

                        nwin = ibuff( 2 )

                        pitraf = ibuff( 3 )

                        ks = ibuff( 4 )

                        pdtraf = ibuff( 5 )

                        ndtraf = ibuff( 6 )

                        ilen = ibuff( 7 )

                        dlen = ibuff( 8 )

                        bufflen = ilen + dlen

                        ipw3 = ipw2 + nwin*nwin

                        dim1 = nb - mod(i-1,nb)

                        dim4 = nwin - dim1

                        lihic = nwin + i - 1

                        skip1cr = window.EQ.1 .AND.

     $                       iceil(lihic,nb).LE.iceil(ilo,nb)

                     END IF

                  END IF

                  IF( rsrc1.NE.rsrc4 ) THEN

                     IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                        IF( npcol.GT.1 .AND. dir.EQ.1 )

     $                     CALL igebs2d( ictxt, 'Row', top, 8, 1,

     $                          ibuff, 8 )

                        skip1cr = window.EQ.1 .AND.

     $                       iceil(lihic,nb).LE.iceil(ilo,nb)

                     ELSEIF( myrow.EQ.rsrc4 ) THEN

                        IF( npcol.GT.1 .AND. dir.EQ.1 ) THEN

                           CALL igebr2d( ictxt, 'Row', top, 8, 1,

     $                          ibuff, 8, rsrc4, csrc4 )

                           i = ibuff( 1 )

                           nwin = ibuff( 2 )

                           pitraf = ibuff( 3 )

                           ks = ibuff( 4 )

                           pdtraf = ibuff( 5 )

                           ndtraf = ibuff( 6 )

                           ilen = ibuff( 7 )

                           dlen = ibuff( 8 )

                           bufflen = ilen + dlen

                           ipw3 = ipw2 + nwin*nwin

                           dim1 = nb - mod(i-1,nb)

                           dim4 = nwin - dim1

                           lihic = nwin + i - 1

                           skip1cr = window.EQ.1 .AND.

     $                          iceil(lihic,nb).LE.iceil(ilo,nb)

                        END IF

                     END IF

                  END IF

                  IF( csrc1.NE.csrc4 ) THEN

                     IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                        IF( nprow.GT.1 .AND. dir.EQ.2 )

     $                     CALL igebs2d( ictxt, 'Col', top, 8, 1,

     $                          ibuff, 8 )

                        skip1cr = window.EQ.1 .AND.

     $                       iceil(lihic,nb).LE.iceil(ilo,nb)

                     ELSEIF( mycol.EQ.csrc4 ) THEN

                        IF( nprow.GT.1 .AND. dir.EQ.2 ) THEN

                           CALL igebr2d( ictxt, 'Col', top, 8, 1,

     $                          ibuff, 8, rsrc4, csrc4 )

                           i = ibuff( 1 )

                           nwin = ibuff( 2 )

                           pitraf = ibuff( 3 )

                           ks = ibuff( 4 )

                           pdtraf = ibuff( 5 )

                           ndtraf = ibuff( 6 )

                           ilen = ibuff( 7 )

                           dlen = ibuff( 8 )

                           bufflen = ilen + dlen

                           ipw3 = ipw2 + nwin*nwin

                           dim1 = nb - mod(i-1,nb)

                           dim4 = nwin - dim1

                           lihic = nwin + i - 1

                           skip1cr = window.EQ.1 .AND.

     $                          iceil(lihic,nb).LE.iceil(ilo,nb)

                        END IF

                     END IF

                  END IF

*

*                 Skip rest of broadcasts and updates if appropriate.

*

                  IF( skip1cr ) GO TO 326

*

*                 Broadcast the orthogonal transformations.

*

                  IF( myrow.EQ.rsrc1 .AND. mycol.EQ.csrc1 ) THEN

                     buffer = pdtraf

                     bufflen = dlen + ilen

                     IF( (nprow.GT.1 .AND. dir.EQ.2) .OR.

     $                   (npcol.GT.1 .AND. dir.EQ.1) ) THEN

                        DO 370 indx = 1, ilen

                           work( buffer+indx-1 ) =

     $                          float( iwork(ipiw+indx-1) )

 370                    CONTINUE

                        CALL slamov( 'All', dlen, 1, work( ipw3 ),

     $                       dlen, work(buffer+ilen), dlen )

                     END IF

                     IF( npcol.GT.1 .AND. dir.EQ.1 ) THEN

                        CALL sgebs2d( ictxt, 'Row', top, bufflen, 1,

     $                       work(buffer), bufflen )

                     END IF

                     IF( nprow.GT.1 .AND. dir.EQ.2 ) THEN

                        CALL sgebs2d( ictxt, 'Col', top, bufflen, 1,

     $                       work(buffer), bufflen )

                     END IF

                  ELSEIF( myrow.EQ.rsrc1 .OR. mycol.EQ.csrc1 ) THEN

                     IF( npcol.GT.1 .AND. dir.EQ.1 .AND.

     $                    myrow.EQ.rsrc1 ) THEN

                        buffer = pdtraf

                        bufflen = dlen + ilen

                        CALL sgebr2d( ictxt, 'Row', top, bufflen, 1,

     $                       work(buffer), bufflen, rsrc1, csrc1 )

                     END IF

                     IF( nprow.GT.1 .AND. dir.EQ.2 .AND.

     $                    mycol.EQ.csrc1 ) THEN

                        buffer = pdtraf

                        bufflen = dlen + ilen

                        CALL sgebr2d( ictxt, 'Col', top, bufflen, 1,

     $                       work(buffer), bufflen, rsrc1, csrc1 )

                     END IF

                     IF( (npcol.GT.1.AND.dir.EQ.1.AND.myrow.EQ.rsrc1)

     $                    .OR. (nprow.GT.1.AND.dir.EQ.2.AND.

     $                    mycol.EQ.csrc1) ) THEN

                        DO 380 indx = 1, ilen

                           iwork(ipiw+indx-1) =

     $                          int( work( buffer+indx-1 ) )

 380                    CONTINUE

                        CALL slamov( 'All', dlen, 1,

     $                       work( buffer+ilen ), dlen,

     $                       work( ipw3 ), dlen )

                     END IF

                  END IF

                  IF( rsrc1.NE.rsrc4 ) THEN

                     IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                        buffer = pdtraf

                        bufflen = dlen + ilen

                        IF( npcol.GT.1 .AND. dir.EQ.1 ) THEN

                           DO 390 indx = 1, ilen

                              work( buffer+indx-1 ) =

     $                             float( iwork(ipiw+indx-1) )

 390                       CONTINUE

                           CALL slamov( 'All', dlen, 1, work( ipw3 ),

     $                          dlen, work(buffer+ilen), dlen )

                           CALL sgebs2d( ictxt, 'Row', top, bufflen,

     $                          1, work(buffer), bufflen )

                        END IF

                     ELSEIF( myrow.EQ.rsrc4 .AND. dir.EQ.1 .AND.

     $                    npcol.GT.1 ) THEN

                        buffer = pdtraf

                        bufflen = dlen + ilen

                        CALL sgebr2d( ictxt, 'Row', top, bufflen,

     $                       1, work(buffer), bufflen, rsrc4, csrc4 )

                        DO 400 indx = 1, ilen

                           iwork(ipiw+indx-1) =

     $                          int( work( buffer+indx-1 ) )

 400                    CONTINUE

                        CALL slamov( 'All', dlen, 1,

     $                       work( buffer+ilen ), dlen,

     $                       work( ipw3 ), dlen )

                     END IF

                  END IF

                  IF( csrc1.NE.csrc4 ) THEN

                     IF( myrow.EQ.rsrc4 .AND. mycol.EQ.csrc4 ) THEN

                        buffer = pdtraf

                        bufflen = dlen + ilen

                        IF( nprow.GT.1 .AND. dir.EQ.2 ) THEN

                           DO 395 indx = 1, ilen

                              work( buffer+indx-1 ) =

     $                             float( iwork(ipiw+indx-1) )

 395                       CONTINUE

                           CALL slamov( 'All', dlen, 1, work( ipw3 ),

     $                          dlen, work(buffer+ilen), dlen )

                           CALL sgebs2d( ictxt, 'Col', top, bufflen,

     $                          1, work(buffer), bufflen )

                        END IF

                     ELSEIF( mycol.EQ.csrc4 .AND. dir.EQ.2 .AND.

     $                    nprow.GT.1 ) THEN

                        buffer = pdtraf

                        bufflen = dlen + ilen

                        CALL sgebr2d( ictxt, 'Col', top, bufflen, 1,

     $                       work(buffer), bufflen, rsrc4, csrc4 )

                        DO 402 indx = 1, ilen

                           iwork(ipiw+indx-1) =

     $                          int( work( buffer+indx-1 ) )

 402                    CONTINUE

                        CALL slamov( 'All', dlen, 1,

     $                       work( buffer+ilen ), dlen,

     $                       work( ipw3 ), dlen )

                     END IF

                  END IF

*

 326              CONTINUE

*

 321           CONTINUE

*

*              Compute crossborder updates.

*

               DO 322 window = window0, wine, 2

                  IF( window.EQ.1 .AND. skip1cr ) GO TO 327

                  rsrc4 = iwork(irsrc+window-1)

                  csrc4 = iwork(icsrc+window-1)

                  rsrc1 = mod( rsrc4 - 1 + nprow, nprow )

                  csrc1 = mod( csrc4 - 1 + npcol, npcol )

*

*                 Prepare workspaces for updates:

*                   IPW3 holds now the orthogonal transformations

*                   IPW4 holds the explicit orthogonal matrix, if formed

*                   IPW5 holds the crossborder block column of T

*                   IPW6 holds the crossborder block row of T

*                   IPW7 holds the crossborder block column of Q

*                        (if WANTQ=.TRUE.)

*                   IPW8 points to the leftover workspace used as lhs in

*                        matrix multiplications

*

                  IF( ((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)

     $                 .OR. ((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.

     $                 dir.EQ.1)) THEN

                     ipw4 = buffer

                     IF( dir.EQ.2 ) THEN

                        IF( wantq ) THEN

                           qrows = numroc( n, nb, myrow, descq( rsrc_ ),

     $                          nprow )

                        ELSE

                           qrows = 0

                        END IF

                        trows = numroc( i-1, nb, myrow, desct( rsrc_ ),

     $                       nprow )

                     ELSE

                        qrows = 0

                        trows = 0

                     END IF

                     IF( dir.EQ.1 ) THEN

                        tcols = numroc( n - (i+dim1-1), nb, mycol,

     $                       csrc4, npcol )

                        IF( mycol.EQ.csrc4 ) tcols = tcols - dim4

                     ELSE

                        tcols = 0

                     END IF

                     ipw5 = ipw4 + nwin*nwin

                     ipw6 = ipw5 + trows * nwin

                     IF( wantq ) THEN

                        ipw7 = ipw6 + nwin * tcols

                        ipw8 = ipw7 + qrows * nwin

                     ELSE

                        ipw8 = ipw6 + nwin * tcols

                     END IF

                  END IF

*

*                 Let each process row and column involved in the updates

*                 exchange data in T and Q with their neighbours.

*

                  IF( dir.EQ.2 ) THEN

                     IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 ) THEN

                        DO 410 indx = 1, nprow

                           IF( mycol.EQ.csrc1 ) THEN

                              CALL infog2l( 1+(indx-1)*nb, i, desct,

     $                             nprow, npcol, myrow, mycol, iloc,

     $                             jloc1, rsrc, csrc1 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL slamov( 'All', trows, dim1,

     $                                t((jloc1-1)*lldt+iloc), lldt,

     $                                work(ipw5), trows )

                                 IF( npcol.GT.1 ) THEN

                                    east = mod( mycol + 1, npcol )

                                    CALL sgesd2d( ictxt, trows, dim1,

     $                                   work(ipw5), trows, rsrc,

     $                                   east )

                                    CALL sgerv2d( ictxt, trows, dim4,

     $                                   work(ipw5+trows*dim1), trows,

     $                                   rsrc, east )

                                 END IF

                              END IF

                           END IF

                           IF( mycol.EQ.csrc4 ) THEN

                              CALL infog2l( 1+(indx-1)*nb, i+dim1,

     $                             desct, nprow, npcol, myrow, mycol,

     $                             iloc, jloc4, rsrc, csrc4 )

                              IF( myrow.EQ.rsrc ) THEN

                                 CALL slamov( 'All', trows, dim4,

     $                                t((jloc4-1)*lldt+iloc), lldt,

     $                                work(ipw5+trows*dim1), trows )

                                 IF( npcol.GT.1 ) THEN

                                    west = mod( mycol-1+npcol, npcol )

                                    CALL sgesd2d( ictxt, trows, dim4,

     $                                   work(ipw5+trows*dim1), trows,

     $                                   rsrc, west )

                                    CALL sgerv2d( ictxt, trows, dim1,

     $                                   work(ipw5), trows, rsrc,

     $                                   west )

                                 END IF

                              END IF

                           END IF

 410                    CONTINUE

                     END IF

                  END IF

*

                  IF( dir.EQ.1 ) THEN

                     IF( myrow.EQ.rsrc1 .OR. myrow.EQ.rsrc4 ) THEN

                        DO 420 indx = 1, npcol

                           IF( myrow.EQ.rsrc1 ) THEN

                              IF( indx.EQ.1 ) THEN

                                 CALL infog2l( i, lihic+1, desct, nprow,

     $                                npcol, myrow, mycol, iloc1, jloc,

     $                                rsrc1, csrc )

                              ELSE

                                 CALL infog2l( i,

     $                                (iceil(lihic,nb)+(indx-2))*nb+1,

     $                                desct, nprow, npcol, myrow, mycol,

     $                                iloc1, jloc, rsrc1, csrc )

                              END IF

                              IF( mycol.EQ.csrc ) THEN

                                 CALL slamov( 'All', dim1, tcols,

     $                                t((jloc-1)*lldt+iloc1), lldt,

     $                                work(ipw6), nwin )

                                 IF( nprow.GT.1 ) THEN

                                    south = mod( myrow + 1, nprow )

                                    CALL sgesd2d( ictxt, dim1, tcols,

     $                                   work(ipw6), nwin, south,

     $                                   csrc )

                                    CALL sgerv2d( ictxt, dim4, tcols,

     $                                   work(ipw6+dim1), nwin, south,

     $                                   csrc )

                                 END IF

                              END IF

                           END IF

                           IF( myrow.EQ.rsrc4 ) THEN

                              IF( indx.EQ.1 ) THEN

                                 CALL infog2l( i+dim1, lihic+1, desct,

     $                                nprow, npcol, myrow, mycol, iloc4,

     $                                jloc, rsrc4, csrc )

                              ELSE

                                 CALL infog2l( i+dim1,

     $                                (iceil(lihic,nb)+(indx-2))*nb+1,

     $                                desct, nprow, npcol, myrow, mycol,

     $                                iloc4, jloc, rsrc4, csrc )

                              END IF

                              IF( mycol.EQ.csrc ) THEN

                                 CALL slamov( 'All', dim4, tcols,

     $                                t((jloc-1)*lldt+iloc4), lldt,

     $                                work(ipw6+dim1), nwin )

                                 IF( nprow.GT.1 ) THEN

                                    north = mod( myrow-1+nprow, nprow )

                                    CALL sgesd2d( ictxt, dim4, tcols,

     $                                   work(ipw6+dim1), nwin, north,

     $                                   csrc )

                                    CALL sgerv2d( ictxt, dim1, tcols,

     $                                   work(ipw6), nwin, north,

     $                                   csrc )

                                 END IF

                              END IF

                           END IF

 420                    CONTINUE

                     END IF

                  END IF

*

                  IF( dir.EQ.2 ) THEN

                     IF( wantq ) THEN

                        IF( mycol.EQ.csrc1 .OR. mycol.EQ.csrc4 ) THEN

                           DO 430 indx = 1, nprow

                              IF( mycol.EQ.csrc1 ) THEN

                                 CALL infog2l( 1+(indx-1)*nb, i, descq,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc1, rsrc, csrc1 )

                                 IF( myrow.EQ.rsrc ) THEN

                                    CALL slamov( 'All', qrows, dim1,

     $                                   q((jloc1-1)*lldq+iloc), lldq,

     $                                   work(ipw7), qrows )

                                    IF( npcol.GT.1 ) THEN

                                       east = mod( mycol + 1, npcol )

                                       CALL sgesd2d( ictxt, qrows, dim1,

     $                                      work(ipw7), qrows, rsrc,

     $                                      east )

                                       CALL sgerv2d( ictxt, qrows, dim4,

     $                                      work(ipw7+qrows*dim1),

     $                                      qrows, rsrc, east )

                                    END IF

                                 END IF

                              END IF

                              IF( mycol.EQ.csrc4 ) THEN

                                 CALL infog2l( 1+(indx-1)*nb, i+dim1,

     $                                descq, nprow, npcol, myrow, mycol,

     $                                iloc, jloc4, rsrc, csrc4 )

                                 IF( myrow.EQ.rsrc ) THEN

                                    CALL slamov( 'All', qrows, dim4,

     $                                   q((jloc4-1)*lldq+iloc), lldq,

     $                                   work(ipw7+qrows*dim1), qrows )

                                    IF( npcol.GT.1 ) THEN

                                       west = mod( mycol-1+npcol,

     $                                      npcol )

                                       CALL sgesd2d( ictxt, qrows, dim4,

     $                                      work(ipw7+qrows*dim1),

     $                                      qrows, rsrc, west )

                                       CALL sgerv2d( ictxt, qrows, dim1,

     $                                      work(ipw7), qrows, rsrc,

     $                                      west )

                                    END IF

                                 END IF

                              END IF

 430                       CONTINUE

                        END IF

                     END IF

                  END IF

*

 327              CONTINUE

*

 322           CONTINUE

*

               DO 323 window = window0, wine, 2

                  rsrc4 = iwork(irsrc+window-1)

                  csrc4 = iwork(icsrc+window-1)

                  rsrc1 = mod( rsrc4 - 1 + nprow, nprow )

                  csrc1 = mod( csrc4 - 1 + npcol, npcol )

                  flops = 0

                  IF( ((mycol.EQ.csrc1.OR.mycol.EQ.csrc4).AND.dir.EQ.2)

     $                 .OR. ((myrow.EQ.rsrc1.OR.myrow.EQ.rsrc4).AND.

     $                 dir.EQ.1) ) THEN

*

*                    Skip this part of the updates if appropriate.

*

                     IF( window.EQ.1 .AND. skip1cr ) GO TO 328

*

*                    Count number of operations to decide whether to use

*                    matrix-matrix multiplications for updating

*                    off-diagonal parts or not.

*

                     nitraf = pitraf - ipiw

                     ishh = .false.

                     DO 405 k = 1, nitraf

                        IF( iwork( ipiw + k - 1 ).LE.nwin ) THEN

                           flops = flops + 6

                        ELSE

                           flops = flops + 11

                           ishh = .true.

                        END IF

 405                 CONTINUE

*

*                    Perform updates in parallel.

*

                     IF( flops.NE.0 .AND.

     $                    ( 2*flops*100 )/( 2*nwin*nwin ) .GE. mmult )

     $                    THEN

*

                        CALL slaset( 'All', nwin, nwin, zero, one,

     $                       work( ipw4 ), nwin )

                        work(ipw8) = float(myrow)

                        work(ipw8+1) = float(mycol)

                        CALL bslaapp( 1, nwin, nwin, ncb, work( ipw4 ),

     $                       nwin, nitraf, iwork(ipiw), work( ipw3 ),

     $                       work(ipw8) )

*

*                       Test if sparsity structure of orthogonal matrix

*                       can be exploited (see below).

*

                        IF( ishh .OR. dim1.NE.ks .OR. dim4.NE.ks ) THEN

*

*                          Update the columns of T and Q affected by the

*                          reordering.

*

                           IF( dir.EQ.2 ) THEN

                              DO 440 indx = 1, min(i-1,1+(nprow-1)*nb),

     $                             nb

                                 IF( mycol.EQ.csrc1 ) THEN

                                    CALL infog2l( indx, i, desct, nprow,

     $                                   npcol, myrow, mycol, iloc,

     $                                   jloc, rsrc, csrc1 )

                                    IF( myrow.EQ.rsrc ) THEN

                                       CALL sgemm( 'No transpose',

     $                                      'No transpose', trows, dim1,

     $                                      nwin, one, work( ipw5 ),

     $                                      trows, work( ipw4 ), nwin,

     $                                      zero, work(ipw8), trows )

                                       CALL slamov( 'All', trows, dim1,

     $                                      work(ipw8), trows,

     $                                      t((jloc-1)*lldt+iloc),

     $                                      lldt )

                                    END IF

                                 END IF

                                 IF( mycol.EQ.csrc4 ) THEN

                                    CALL infog2l( indx, i+dim1, desct,

     $                                   nprow, npcol, myrow, mycol,

     $                                   iloc, jloc, rsrc, csrc4 )

                                    IF( myrow.EQ.rsrc ) THEN

                                       CALL sgemm( 'No transpose',

     $                                      'No transpose', trows, dim4,

     $                                      nwin, one, work( ipw5 ),

     $                                      trows,

     $                                      work( ipw4+nwin*dim1 ),

     $                                      nwin, zero, work(ipw8),

     $                                      trows )

                                       CALL slamov( 'All', trows, dim4,

     $                                      work(ipw8), trows,

     $                                      t((jloc-1)*lldt+iloc),

     $                                      lldt )

                                    END IF

                                 END IF

 440                          CONTINUE

*

                              IF( wantq ) THEN

                                 DO 450 indx = 1, min(n,1+(nprow-1)*nb),

     $                                nb

                                    IF( mycol.EQ.csrc1 ) THEN

                                       CALL infog2l( indx, i, descq,

     $                                      nprow, npcol, myrow, mycol,

     $                                      iloc, jloc, rsrc, csrc1 )

                                       IF( myrow.EQ.rsrc ) THEN

                                          CALL sgemm( 'No transpose',

     $                                         'No transpose', qrows,

     $                                         dim1, nwin, one,

     $                                         work( ipw7 ), qrows,

     $                                         work( ipw4 ), nwin,

     $                                         zero, work(ipw8),

     $                                         qrows )

                                          CALL slamov( 'All', qrows,

     $                                         dim1, work(ipw8), qrows,

     $                                         q((jloc-1)*lldq+iloc),

     $                                         lldq )

                                       END IF

                                    END IF

                                    IF( mycol.EQ.csrc4 ) THEN

                                       CALL infog2l( indx, i+dim1,

     $                                      descq, nprow, npcol, myrow,

     $                                      mycol, iloc, jloc, rsrc,

     $                                      csrc4 )

                                       IF( myrow.EQ.rsrc ) THEN

                                          CALL sgemm( 'No transpose',

     $                                         'No transpose', qrows,

     $                                         dim4, nwin, one,

     $                                         work( ipw7 ), qrows,

     $                                         work( ipw4+nwin*dim1 ),

     $                                         nwin, zero, work(ipw8),

     $                                         qrows )

                                          CALL slamov( 'All', qrows,

     $                                         dim4, work(ipw8), qrows,

     $                                         q((jloc-1)*lldq+iloc),

     $                                         lldq )

                                       END IF

                                    END IF

 450                             CONTINUE

                              END IF

                           END IF

*

*                          Update the rows of T affected by the

*                          reordering.

*

                           IF( dir.EQ.1 ) THEN

                              IF ( lihic.LT.n ) THEN

                                 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4

     $                               .AND.mod(lihic,nb).NE.0 ) THEN

                                    indx = lihic + 1

                                    CALL infog2l( i, indx, desct, nprow,

     $                                   npcol, myrow, mycol, iloc,

     $                                   jloc, rsrc1, csrc4 )

                                    CALL sgemm( 'Transpose',

     $                                   'No Transpose', dim1, tcols,

     $                                   nwin, one, work(ipw4), nwin,

     $                                   work( ipw6 ), nwin, zero,

     $                                   work(ipw8), dim1 )

                                    CALL slamov( 'All', dim1, tcols,

     $                                   work(ipw8), dim1,

     $                                   t((jloc-1)*lldt+iloc), lldt )

                                 END IF

                                 IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4

     $                               .AND.mod(lihic,nb).NE.0 ) THEN

                                    indx = lihic + 1

                                    CALL infog2l( i+dim1, indx, desct,

     $                                   nprow, npcol, myrow, mycol,

     $                                   iloc, jloc, rsrc4, csrc4 )

                                    CALL sgemm( 'Transpose',

     $                                  'No Transpose', dim4, tcols,

     $                                   nwin, one,

     $                                   work( ipw4+dim1*nwin ), nwin,

     $                                   work( ipw6), nwin, zero,

     $                                   work(ipw8), dim4 )

                                    CALL slamov( 'All', dim4, tcols,

     $                                   work(ipw8), dim4,

     $                                   t((jloc-1)*lldt+iloc), lldt )

                                 END IF

                                 indxs = iceil(lihic,nb)*nb + 1

                                 indxe = min(n,indxs+(npcol-2)*nb)

                                 DO 460 indx = indxs, indxe, nb

                                    IF( myrow.EQ.rsrc1 ) THEN

                                       CALL infog2l( i, indx, desct,

     $                                      nprow, npcol, myrow, mycol,

     $                                      iloc, jloc, rsrc1, csrc )

                                       IF( mycol.EQ.csrc ) THEN

                                          CALL sgemm( 'Transpose',

     $                                         'No Transpose', dim1,

     $                                         tcols, nwin, one,

     $                                         work( ipw4 ), nwin,

     $                                         work( ipw6 ), nwin,

     $                                         zero, work(ipw8), dim1 )

                                          CALL slamov( 'All', dim1,

     $                                         tcols, work(ipw8), dim1,

     $                                         t((jloc-1)*lldt+iloc),

     $                                         lldt )

                                       END IF

                                    END IF

                                    IF( myrow.EQ.rsrc4 ) THEN

                                       CALL infog2l( i+dim1, indx,

     $                                      desct, nprow, npcol, myrow,

     $                                      mycol, iloc, jloc, rsrc4,

     $                                      csrc )

                                       IF( mycol.EQ.csrc ) THEN

                                          CALL sgemm( 'Transpose',

     $                                         'No Transpose', dim4,

     $                                         tcols, nwin, one,

     $                                         work( ipw4+nwin*dim1 ),

     $                                         nwin, work( ipw6 ),

     $                                         nwin, zero, work(ipw8),

     $                                         dim4 )

                                          CALL slamov( 'All', dim4,

     $                                         tcols, work(ipw8), dim4,

     $                                         t((jloc-1)*lldt+iloc),

     $                                         lldt )

                                       END IF

                                    END IF

 460                             CONTINUE

                              END IF

                           END IF

                        ELSE

*

*                          The NWIN-by-NWIN matrix U containing the

*                          accumulated orthogonal transformations has

*                          the following structure:

*

*                                        [ U11  U12 ]

*                                    U = [          ],

*                                        [ U21  U22 ]

*

*                          where U21 is KS-by-KS upper triangular and

*                          U12 is (NWIN-KS)-by-(NWIN-KS) lower

*                          triangular. For reordering over the border

*                          the structure is only exploited when the

*                          border cuts the columns of U conformally with

*                          the structure itself. This happens exactly

*                          when all eigenvalues in the subcluster was

*                          moved to the other side of the border and

*                          fits perfectly in their new positions, i.e.,

*                          the reordering stops when the last eigenvalue

*                          to cross the border is reordered to the

*                          position closest to the border. Tested by

*                          checking is KS = DIM1 = DIM4 (see above).

*                          This should hold quite often. But this branch

*                          is entered only if all involved eigenvalues

*                          are real.

*

*                          Update the columns of T and Q affected by the

*                          reordering.

*

*                          Compute T2*U21 + T1*U11 on the left side of

*                          the border.

*

                           IF( dir.EQ.2 ) THEN

                              indxe = min(i-1,1+(nprow-1)*nb)

                              DO 470 indx = 1, indxe, nb

                                 IF( mycol.EQ.csrc1 ) THEN

                                    CALL infog2l( indx, i, desct, nprow,

     $                                   npcol, myrow, mycol, iloc,

     $                                   jloc, rsrc, csrc1 )

                                    IF( myrow.EQ.rsrc ) THEN

                                       CALL slamov( 'All', trows, ks,

     $                                      work( ipw5+trows*dim4),

     $                                      trows, work(ipw8), trows )

                                       CALL strmm( 'Right', 'Upper',

     $                                      'No transpose',

     $                                      'Non-unit', trows, ks,

     $                                      one, work( ipw4+dim4 ),

     $                                      nwin, work(ipw8), trows )

                                       CALL sgemm( 'No transpose',

     $                                      'No transpose', trows, ks,

     $                                      dim4, one, work( ipw5 ),

     $                                      trows, work( ipw4 ), nwin,

     $                                      one, work(ipw8), trows )

                                       CALL slamov( 'All', trows, ks,

     $                                      work(ipw8), trows,

     $                                      t((jloc-1)*lldt+iloc),

     $                                      lldt )

                                    END IF

                                 END IF

*

*                                Compute T1*U12 + T2*U22 on the right

*                                side of the border.

*

                                 IF( mycol.EQ.csrc4 ) THEN

                                    CALL infog2l( indx, i+dim1, desct,

     $                                   nprow, npcol, myrow, mycol,

     $                                   iloc, jloc, rsrc, csrc4 )

                                    IF( myrow.EQ.rsrc ) THEN

                                       CALL slamov( 'All', trows, dim4,

     $                                      work(ipw5), trows,

     $                                      work( ipw8 ), trows )

                                       CALL strmm( 'Right', 'Lower',

     $                                      'No transpose',

     $                                      'Non-unit', trows, dim4,

     $                                      one, work( ipw4+nwin*ks ),

     $                                      nwin, work( ipw8 ), trows )

                                       CALL sgemm( 'No transpose',

     $                                      'No transpose', trows, dim4,

     $                                      ks, one,

     $                                      work( ipw5+trows*dim4),

     $                                      trows,

     $                                      work( ipw4+nwin*ks+dim4 ),

     $                                      nwin, one, work( ipw8 ),

     $                                      trows )

                                       CALL slamov( 'All', trows, dim4,

     $                                      work(ipw8), trows,

     $                                      t((jloc-1)*lldt+iloc),

     $                                      lldt )

                                    END IF

                                 END IF

 470                          CONTINUE

                              IF( wantq ) THEN

*

*                                Compute Q2*U21 + Q1*U11 on the left

*                                side of border.

*

                                 indxe = min(n,1+(nprow-1)*nb)

                                 DO 480 indx = 1, indxe, nb

                                    IF( mycol.EQ.csrc1 ) THEN

                                       CALL infog2l( indx, i, descq,

     $                                      nprow, npcol, myrow, mycol,

     $                                      iloc, jloc, rsrc, csrc1 )

                                       IF( myrow.EQ.rsrc ) THEN

                                          CALL slamov( 'All', qrows, ks,

     $                                         work( ipw7+qrows*dim4),

     $                                         qrows, work(ipw8),

     $                                         qrows )

                                          CALL strmm( 'Right', 'Upper',

     $                                         'No transpose',

     $                                         'Non-unit', qrows,

     $                                         ks, one,

     $                                         work( ipw4+dim4 ), nwin,

     $                                         work(ipw8), qrows )

                                          CALL sgemm( 'No transpose',

     $                                         'No transpose', qrows,

     $                                         ks, dim4, one,

     $                                         work( ipw7 ), qrows,

     $                                         work( ipw4 ), nwin, one,

     $                                         work(ipw8), qrows )

                                          CALL slamov( 'All', qrows, ks,

     $                                         work(ipw8), qrows,

     $                                         q((jloc-1)*lldq+iloc),

     $                                         lldq )

                                       END IF

                                    END IF

*

*                                   Compute Q1*U12 + Q2*U22 on the right

*                                   side of border.

*

                                    IF( mycol.EQ.csrc4 ) THEN

                                       CALL infog2l( indx, i+dim1,

     $                                      descq, nprow, npcol, myrow,

     $                                      mycol, iloc, jloc, rsrc,

     $                                      csrc4 )

                                       IF( myrow.EQ.rsrc ) THEN

                                          CALL slamov( 'All', qrows,

     $                                         dim4, work(ipw7), qrows,

     $                                         work( ipw8 ), qrows )

                                          CALL strmm( 'Right', 'Lower',

     $                                         'No transpose',

     $                                         'Non-unit', qrows,

     $                                         dim4, one,

     $                                         work( ipw4+nwin*ks ),

     $                                         nwin, work( ipw8 ),

     $                                         qrows )

                                          CALL sgemm( 'No transpose',

     $                                         'No transpose', qrows,

     $                                         dim4, ks, one,

     $                                         work(ipw7+qrows*(dim4)),

     $                                         qrows,

     $                                         work(ipw4+nwin*ks+dim4),

     $                                         nwin, one, work( ipw8 ),

     $                                         qrows )

                                          CALL slamov( 'All', qrows,

     $                                         dim4, work(ipw8), qrows,

     $                                         q((jloc-1)*lldq+iloc),

     $                                         lldq )

                                       END IF

                                    END IF

 480                             CONTINUE

                              END IF

                           END IF

*

                           IF( dir.EQ.1 ) THEN

                              IF ( lihic.LT.n ) THEN

*

*                                Compute U21**T*T2 + U11**T*T1 on the

*                                upper side of the border.

*

                                 IF( myrow.EQ.rsrc1.AND.mycol.EQ.csrc4

     $                               .AND.mod(lihic,nb).NE.0 ) THEN

                                    indx = lihic + 1

                                    CALL infog2l( i, indx, desct, nprow,

     $                                   npcol, myrow, mycol, iloc,

     $                                   jloc, rsrc1, csrc4 )

                                    CALL slamov( 'All', ks, tcols,

     $                                   work( ipw6+dim4 ), nwin,

     $                                   work(ipw8), ks )

                                    CALL strmm( 'Left', 'Upper',

     $                                   'Transpose', 'Non-unit',

     $                                   ks, tcols, one,

     $                                   work( ipw4+dim4 ), nwin,

     $                                   work(ipw8), ks )

                                    CALL sgemm( 'Transpose',

     $                                   'No transpose', ks, tcols,

     $                                   dim4, one, work(ipw4), nwin,

     $                                   work(ipw6), nwin, one,

     $                                   work(ipw8), ks )

                                    CALL slamov( 'All', ks, tcols,

     $                                   work(ipw8), ks,

     $                                   t((jloc-1)*lldt+iloc), lldt )

                                 END IF

*

*                                Compute U12**T*T1 + U22**T*T2 on the

*                                lower side of the border.

*

                                 IF( myrow.EQ.rsrc4.AND.mycol.EQ.csrc4

     $                               .AND.mod(lihic,nb).NE.0 ) THEN

                                    indx = lihic + 1

                                    CALL infog2l( i+dim1, indx, desct,

     $                                   nprow, npcol, myrow, mycol,

     $                                   iloc, jloc, rsrc4, csrc4 )

                                    CALL slamov( 'All', dim4, tcols,

     $                                   work( ipw6 ), nwin,

     $                                   work( ipw8 ), dim4 )

                                    CALL strmm( 'Left', 'Lower',

     $                                   'Transpose', 'Non-unit',

     $                                   dim4, tcols, one,

     $                                   work( ipw4+nwin*ks ), nwin,

     $                                   work( ipw8 ), dim4 )

                                    CALL sgemm( 'Transpose',

     $                                   'No Transpose', dim4, tcols,

     $                                   ks, one,

     $                                   work( ipw4+nwin*ks+dim4 ),

     $                                   nwin, work( ipw6+dim1 ), nwin,

     $                                   one, work( ipw8), dim4 )

                                    CALL slamov( 'All', dim4, tcols,

     $                                   work(ipw8), dim4,

     $                                   t((jloc-1)*lldt+iloc), lldt )

                                 END IF

*

*                                Compute U21**T*T2 + U11**T*T1 on upper

*                                side on border.

*

                                 indxs = iceil(lihic,nb)*nb+1

                                 indxe = min(n,indxs+(npcol-2)*nb)

                                 DO 490 indx = indxs, indxe, nb

                                    IF( myrow.EQ.rsrc1 ) THEN

                                       CALL infog2l( i, indx, desct,

     $                                      nprow, npcol, myrow, mycol,

     $                                      iloc, jloc, rsrc1, csrc )

                                       IF( mycol.EQ.csrc ) THEN

                                          CALL slamov( 'All', ks, tcols,

     $                                         work( ipw6+dim4 ), nwin,

     $                                         work(ipw8), ks )

                                          CALL strmm( 'Left', 'Upper',

     $                                         'Transpose',

     $                                         'Non-unit', ks,

     $                                         tcols, one,

     $                                         work( ipw4+dim4 ), nwin,

     $                                         work(ipw8), ks )

                                          CALL sgemm( 'Transpose',

     $                                         'No transpose', ks,

     $                                         tcols, dim4, one,

     $                                         work(ipw4), nwin,

     $                                         work(ipw6), nwin, one,

     $                                         work(ipw8), ks )

                                          CALL slamov( 'All', ks, tcols,

     $                                         work(ipw8), ks,

     $                                         t((jloc-1)*lldt+iloc),

     $                                         lldt )

                                       END IF

                                    END IF

*

*                                   Compute U12**T*T1 + U22**T*T2 on

*                                   lower side of border.

*

                                    IF( myrow.EQ.rsrc4 ) THEN

                                       CALL infog2l( i+dim1, indx,

     $                                      desct, nprow, npcol, myrow,

     $                                      mycol, iloc, jloc, rsrc4,

     $                                      csrc )

                                       IF( mycol.EQ.csrc ) THEN

                                          CALL slamov( 'All', dim4,

     $                                         tcols, work( ipw6 ),

     $                                         nwin, work( ipw8 ),

     $                                         dim4 )

                                          CALL strmm( 'Left', 'Lower',

     $                                         'Transpose',

     $                                         'Non-unit', dim4,

     $                                         tcols, one,

     $                                         work( ipw4+nwin*ks ),

     $                                         nwin, work( ipw8 ),

     $                                         dim4 )

                                          CALL sgemm( 'Transpose',

     $                                         'No Transpose', dim4,

     $                                         tcols, ks, one,

     $                                         work(ipw4+nwin*ks+dim4),

     $                                         nwin, work( ipw6+dim1 ),

     $                                         nwin, one, work( ipw8),

     $                                         dim4 )

                                          CALL slamov( 'All', dim4,

     $                                         tcols, work(ipw8), dim4,

     $                                         t((jloc-1)*lldt+iloc),

     $                                         lldt )

                                       END IF

                                    END IF

 490                             CONTINUE

                              END IF

                           END IF

                        END IF

                     ELSEIF( flops.NE.0 ) THEN

*

*                       Update off-diagonal blocks and Q using the

*                       pipelined elementary transformations. Now we

*                       have a delicate problem: how to do this without

*                       redundant work? For now, we let the processes

*                       involved compute the whole crossborder block

*                       rows and column saving only the part belonging

*                       to the corresponding side of the border. To make

*                       this a realistic alternative, we have modified

*                       the ratio r_flops (see Reference [2] above) to

*                       give more favor to the ordinary matrix

*                       multiplication.

*

                        IF( dir.EQ.2 ) THEN

                           indxe =  min(i-1,1+(nprow-1)*nb)

                           DO 500 indx = 1, indxe, nb

                              CALL infog2l( indx, i, desct, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc )

                              IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )

     $                             THEN

                                 CALL bslaapp( 1, trows, nwin, ncb,

     $                                work(ipw5), trows, nitraf,

     $                                iwork(ipiw), work( ipw3 ),

     $                                work(ipw8) )

                                 CALL slamov( 'All', trows, dim1,

     $                                work(ipw5), trows,

     $                                t((jloc-1)*lldt+iloc ), lldt )

                              END IF

                              CALL infog2l( indx, i+dim1, desct, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc )

                              IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )

     $                             THEN

                                 IF( npcol.GT.1 )

     $                                CALL bslaapp( 1, trows, nwin, ncb,

     $                                work(ipw5), trows, nitraf,

     $                                iwork(ipiw), work( ipw3 ),

     $                                work(ipw8) )

                                 CALL slamov( 'All', trows, dim4,

     $                                work(ipw5+trows*dim1), trows,

     $                                t((jloc-1)*lldt+iloc ), lldt )

                              END IF

 500                       CONTINUE

                           IF( wantq ) THEN

                              indxe = min(n,1+(nprow-1)*nb)

                              DO 510 indx = 1, indxe, nb

                                 CALL infog2l( indx, i, descq, nprow,

     $                                npcol, myrow, mycol, iloc, jloc,

     $                                rsrc, csrc )

                                 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )

     $                                THEN

                                    CALL bslaapp( 1, qrows, nwin, ncb,

     $                                   work(ipw7), qrows, nitraf,

     $                                   iwork(ipiw), work( ipw3 ),

     $                                   work(ipw8) )

                                    CALL slamov( 'All', qrows, dim1,

     $                                   work(ipw7), qrows,

     $                                   q((jloc-1)*lldq+iloc ), lldq )

                                 END IF

                                 CALL infog2l( indx, i+dim1, descq,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc, rsrc, csrc )

                                 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )

     $                                THEN

                                    IF( npcol.GT.1 )

     $                                   CALL bslaapp( 1, qrows, nwin,

     $                                   ncb, work(ipw7), qrows,

     $                                   nitraf, iwork(ipiw),

     $                                   work( ipw3 ), work(ipw8) )

                                    CALL slamov( 'All', qrows, dim4,

     $                                   work(ipw7+qrows*dim1), qrows,

     $                                   q((jloc-1)*lldq+iloc ), lldq )

                                 END IF

 510                          CONTINUE

                           END IF

                        END IF

*

                        IF( dir.EQ.1 ) THEN

                           IF( lihic.LT.n ) THEN

                              indx = lihic + 1

                              CALL infog2l( i, indx, desct, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc )

                              IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc.AND.

     $                            mod(lihic,nb).NE.0 ) THEN

                                 CALL bslaapp( 0, nwin, tcols, ncb,

     $                                work( ipw6 ), nwin, nitraf,

     $                                iwork(ipiw), work( ipw3 ),

     $                                work(ipw8) )

                                 CALL slamov( 'All', dim1, tcols,

     $                                work( ipw6 ), nwin,

     $                                t((jloc-1)*lldt+iloc), lldt )

                              END IF

                              CALL infog2l( i+dim1, indx, desct, nprow,

     $                             npcol, myrow, mycol, iloc, jloc,

     $                             rsrc, csrc )

                              IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc.AND.

     $                             mod(lihic,nb).NE.0 ) THEN

                                 IF( nprow.GT.1 )

     $                                CALL bslaapp( 0, nwin, tcols, ncb,

     $                                work( ipw6 ), nwin, nitraf,

     $                                iwork(ipiw), work( ipw3 ),

     $                                work(ipw8) )

                                 CALL slamov( 'All', dim4, tcols,

     $                                work( ipw6+dim1 ), nwin,

     $                                t((jloc-1)*lldt+iloc), lldt )

                              END IF

                              indxs = iceil(lihic,nb)*nb + 1

                              indxe = min(n,indxs+(npcol-2)*nb)

                              DO 520 indx = indxs, indxe, nb

                                 CALL infog2l( i, indx, desct, nprow,

     $                                npcol, myrow, mycol, iloc, jloc,

     $                                rsrc, csrc )

                                 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )

     $                                THEN

                                    CALL bslaapp( 0, nwin, tcols, ncb,

     $                                   work(ipw6), nwin, nitraf,

     $                                   iwork(ipiw), work( ipw3 ),

     $                                   work(ipw8) )

                                    CALL slamov( 'All', dim1, tcols,

     $                                   work( ipw6 ), nwin,

     $                                   t((jloc-1)*lldt+iloc), lldt )

                                 END IF

                                 CALL infog2l( i+dim1, indx, desct,

     $                                nprow, npcol, myrow, mycol, iloc,

     $                                jloc, rsrc, csrc )

                                 IF( myrow.EQ.rsrc .AND. mycol.EQ.csrc )

     $                                THEN

                                    IF( nprow.GT.1 )

     $                                   CALL bslaapp( 0, nwin, tcols,

     $                                   ncb, work(ipw6), nwin, nitraf,

     $                                   iwork(ipiw), work( ipw3 ),

     $                                   work(ipw8) )

                                    CALL slamov( 'All', dim4, tcols,

     $                                   work( ipw6+dim1 ), nwin,

     $                                   t((jloc-1)*lldt+iloc), lldt )

                                 END IF

 520                          CONTINUE

                           END IF

                        END IF

                     END IF

                  END IF

*

 328              CONTINUE

*

 323           CONTINUE

*

*              End of loops over directions (DIR).

*

 2222       CONTINUE

*

*           End of loops over diagonal blocks for reordering over the

*           block diagonal.

*

 310     CONTINUE

         last = last + 1

         IF( lastwait .AND. last.LT.2 ) GO TO 308

*

*        Barrier to collect the processes before proceeding.

*

         CALL blacs_barrier( ictxt, 'All' )

*

*        Compute global maximum of IERR so that we know if some process

*        experienced a failure in the reordering.

*

         myierr = ierr

         IF( nprocs.GT.1 ) THEN

            CALL igamx2d( ictxt, 'All', top, 1, 1, ierr, 1, -1,

     $           -1, -1, -1, -1 )

         END IF

*

         IF( ierr.NE.0 ) THEN

*

*           When calling BDTREXC, the block at position I+KKS-1 failed

*           to swap.

*

            IF( myierr.NE.0 ) info = max(1,i+kks-1)

            IF( nprocs.GT.1 ) THEN

               CALL igamx2d( ictxt, 'All', top, 1, 1, info, 1, -1,

     $              -1, -1, -1, -1 )

            END IF

            GO TO 300

         END IF

*

*        Do a global update of the SELECT vector.

*

         DO 530 k = 1, n

            rsrc = indxg2p( k, nb, myrow, desct( rsrc_ ), nprow )

            csrc = indxg2p( k, nb, mycol, desct( csrc_ ), npcol )

            IF( myrow.NE.rsrc .OR. mycol.NE.csrc )

     $         SELECT( k ) = 0

 530     CONTINUE

         IF( nprocs.GT.1 )

     $      CALL igsum2d( ictxt, 'All', top, n, 1, SELECT, n, -1, -1 )

*

*        Find the global minumum of ILO and IHI.

*

         ilo = ilo - 1

 523     CONTINUE

         ilo = ilo + 1

         IF( ilo.LE.n ) THEN

            IF( SELECT(ilo).NE.0 ) GO TO 523

         END IF

         ihi = ihi + 1

 527     CONTINUE

         ihi = ihi - 1

         IF( ihi.GE.1 ) THEN

            IF( SELECT(ihi).EQ.0 ) GO TO 527

         END IF

*

*        End While ( ILO <= M )

         GO TO 50

      END IF

*

 300  CONTINUE

*

*     In case an error occured, do an additional global update of

*     SELECT.

*

      IF( info.NE.0 ) THEN

         DO 540 k = 1, n

            rsrc = indxg2p( k, nb, myrow, desct( rsrc_ ), nprow )

            csrc = indxg2p( k, nb, mycol, desct( csrc_ ), npcol )

            IF( myrow.NE.rsrc .OR. mycol.NE.csrc )

     $           SELECT( k ) = 0

 540     CONTINUE

         IF( nprocs.GT.1 )

     $        CALL igsum2d( ictxt, 'All', top, n, 1, SELECT, n, -1, -1 )

      END IF

*

 545  CONTINUE

*

*     Store the output eigenvalues in WR and WI: first let all the

*     processes compute the eigenvalue inside their diagonal blocks in

*     parallel, except for the eigenvalue located next to a block

*     border. After that, compute all eigenvalues located next to the

*     block borders. Finally, do a global summation over WR and WI so

*     that all processors receive the result. Notice: real eigenvalues

*     extracted from a non-canonical 2-by-2 block are not stored in

*     any particular order.

*

      DO 550 k = 1, n

         wr( k ) = zero

         wi( k ) = zero

 550  CONTINUE

*

*     Loop 560: extract eigenvalues from the blocks which are not laid

*     out across a border of the processor mesh, except for those 1x1

*     blocks on the border.

*

      pair = .false.

      DO 560 k = 1, n

         IF( .NOT. pair ) THEN

            border = ( k.NE.n .AND. mod( k, nb ).EQ.0 ) .OR.

     %           ( k.NE.1 .AND. mod( k, nb ).EQ.1 )

            IF( .NOT. border ) THEN

               CALL infog2l( k, k, desct, nprow, npcol, myrow, mycol,

     $              iloc1, jloc1, trsrc1, tcsrc1 )

               IF( myrow.EQ.trsrc1 .AND. mycol.EQ.tcsrc1 ) THEN

                  elem1 = t((jloc1-1)*lldt+iloc1)

                  IF( k.LT.n ) THEN

                     elem3 = t((jloc1-1)*lldt+iloc1+1)

                  ELSE

                     elem3 = zero

                  END IF

                  IF( elem3.NE.zero ) THEN

                     elem2 = t((jloc1)*lldt+iloc1)

                     elem4 = t((jloc1)*lldt+iloc1+1)

                     CALL slanv2( elem1, elem2, elem3, elem4,

     $                    wr( k ), wi( k ), wr( k+1 ), wi( k+1 ), sn,

     $                    cs )

                     pair = .true.

                  ELSE

                     IF( k.GT.1 ) THEN

                        tmp = t((jloc1-2)*lldt+iloc1)

                        IF( tmp.NE.zero ) THEN

                           elem1 = t((jloc1-2)*lldt+iloc1-1)

                           elem2 = t((jloc1-1)*lldt+iloc1-1)

                           elem3 = t((jloc1-2)*lldt+iloc1)

                           elem4 = t((jloc1-1)*lldt+iloc1)

                           CALL slanv2( elem1, elem2, elem3, elem4,

     $                          wr( k-1 ), wi( k-1 ), wr( k ),

     $                          wi( k ), sn, cs )

                        ELSE

                           wr( k ) = elem1

                        END IF

                     ELSE

                        wr( k ) = elem1

                     END IF

                  END IF

               END IF

            END IF

         ELSE

            pair = .false.

         END IF

 560  CONTINUE

*

*     Loop 570: extract eigenvalues from the blocks which are laid

*     out across a border of the processor mesh. The processors are

*     numbered as below:

*

*                1 | 2

*                --+--

*                3 | 4

*

      DO 570 k = nb, n-1, nb

         CALL infog2l( k, k, desct, nprow, npcol, myrow, mycol,

     $        iloc1, jloc1, trsrc1, tcsrc1 )

         CALL infog2l( k, k+1, desct, nprow, npcol, myrow, mycol,

     $        iloc2, jloc2, trsrc2, tcsrc2 )

         CALL infog2l( k+1, k, desct, nprow, npcol, myrow, mycol,

     $        iloc3, jloc3, trsrc3, tcsrc3 )

         CALL infog2l( k+1, k+1, desct, nprow, npcol, myrow, mycol,

     $        iloc4, jloc4, trsrc4, tcsrc4 )

         IF( myrow.EQ.trsrc2 .AND. mycol.EQ.tcsrc2 ) THEN

            elem2 = t((jloc2-1)*lldt+iloc2)

            IF( trsrc1.NE.trsrc2 .OR. tcsrc1.NE.tcsrc2 )

     $         CALL sgesd2d( ictxt, 1, 1, elem2, 1, trsrc1, tcsrc1 )

         END IF

         IF( myrow.EQ.trsrc3 .AND. mycol.EQ.tcsrc3 ) THEN

            elem3 = t((jloc3-1)*lldt+iloc3)

            IF( trsrc1.NE.trsrc3 .OR. tcsrc1.NE.tcsrc3 )

     $         CALL sgesd2d( ictxt, 1, 1, elem3, 1, trsrc1, tcsrc1 )

         END IF

         IF( myrow.EQ.trsrc4 .AND. mycol.EQ.tcsrc4 ) THEN

            work(1) = t((jloc4-1)*lldt+iloc4)

            IF( k+1.LT.n ) THEN

               work(2) = t((jloc4-1)*lldt+iloc4+1)

            ELSE

               work(2) = zero

            END IF

            IF( trsrc1.NE.trsrc4 .OR. tcsrc1.NE.tcsrc4 )

     $         CALL sgesd2d( ictxt, 2, 1, work, 2, trsrc1, tcsrc1 )

         END IF

         IF( myrow.EQ.trsrc1 .AND. mycol.EQ.tcsrc1 ) THEN

            elem1 = t((jloc1-1)*lldt+iloc1)

            IF( trsrc1.NE.trsrc2 .OR. tcsrc1.NE.tcsrc2 )

     $         CALL sgerv2d( ictxt, 1, 1, elem2, 1, trsrc2, tcsrc2 )

            IF( trsrc1.NE.trsrc3 .OR. tcsrc1.NE.tcsrc3 )

     $         CALL sgerv2d( ictxt, 1, 1, elem3, 1, trsrc3, tcsrc3 )

            IF( trsrc1.NE.trsrc4 .OR. tcsrc1.NE.tcsrc4 )

     $         CALL sgerv2d( ictxt, 2, 1, work, 2, trsrc4, tcsrc4 )

            elem4 = work(1)

            elem5 = work(2)

            IF( elem5.EQ.zero ) THEN

               IF( wr( k ).EQ.zero .AND. wi( k ).EQ.zero ) THEN

                  CALL slanv2( elem1, elem2, elem3, elem4, wr( k ),

     $                 wi( k ), wr( k+1 ), wi( k+1 ), sn, cs )

               ELSEIF( wr( k+1 ).EQ.zero .AND. wi( k+1 ).EQ.zero ) THEN

                  wr( k+1 ) = elem4

               END IF

            ELSEIF( wr( k ).EQ.zero .AND. wi( k ).EQ.zero ) THEN

               wr( k ) = elem1

            END IF

         END IF

 570  CONTINUE

*

      IF( nprocs.GT.1 ) THEN

         CALL sgsum2d( ictxt, 'All', top, n, 1, wr, n, -1, -1 )

         CALL sgsum2d( ictxt, 'All', top, n, 1, wi, n, -1, -1 )

      END IF

*

*     Store storage requirements in workspaces.

*

      work( 1 ) = float(lwmin)

      iwork( 1 ) = liwmin

*

*     Return to calling program.

*

      RETURN

*

*     End of PSTRORD

*


      END

*

bdtrexc
subroutine bdtrexc(n, t, ldt, ifst, ilst, nitraf, itraf, ndtraf, dtraf, work, info)
Definition bdtrexc.f:3

bslaapp
subroutine bslaapp(iside, m, n, nb, a, lda, nitraf, itraf, dtraf, work)
Definition bslaapp.f:3

bstrexc
subroutine bstrexc(n, t, ldt, ifst, ilst, nitraf, itraf, ndtraf, dtraf, work, info)
Definition bstrexc.f:3

chk1mat
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3

ilacpy
subroutine ilacpy(uplo, m, n, a, lda, b, ldb)
Definition ilacpy.f:2

infog2l
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3

max
#define max(A, B)
Definition pcgemr.c:180

min
#define min(A, B)
Definition pcgemr.c:181

pchk1mat
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3

pchk2mat
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175

pselget
subroutine pselget(scope, top, alpha, a, ia, ja, desca)
Definition pselget.f:2

pslacpy
subroutine pslacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pslacpy.f:3

pstrord
subroutine pstrord(compq, select, para, n, t, it, jt, desct, q, iq, jq, descq, wr, wi, m, work, lwork, iwork, liwork, info)
Definition pstrord.f:4

pxerbla
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2