da/dd3/pdlaqr2_8f_source.html

      SUBROUTINE pdlaqr2( WANTT, WANTZ, N, KTOP, KBOT, NW, A, DESCA,

     $                    ILOZ, IHIZ, Z, DESCZ, NS, ND, SR, SI, T, LDT,

     $                    V, LDV, WR, WI, WORK, LWORK )

*

*     Contribution from the Department of Computing Science and HPC2N,

*     Umea University, Sweden

*

*  -- ScaLAPACK routine (version 2.0.2) --

*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver

*     May 1 2012

*

      IMPLICIT NONE

*

*     .. Scalar Arguments ..

      INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDT, LDV, LWORK, N, ND,

     $                   NS, NW

      LOGICAL            WANTT, WANTZ

*     ..

*     .. Array Arguments ..

      INTEGER            DESCA( * ), DESCZ( * )

      DOUBLE PRECISION   A( * ), SI( KBOT ), SR( KBOT ), T( LDT, * ),

     $                   v( ldv, * ), work( * ), wi( * ), wr( * ),

     $                   z( * )

*     ..

*

*  Purpose

*  =======

*

*  Aggressive early deflation:

*

*  PDLAQR2 accepts as input an upper Hessenberg matrix A and performs an

*  orthogonal similarity transformation designed to detect and deflate

*  fully converged eigenvalues from a trailing principal submatrix.  On

*  output A has been overwritten by a new Hessenberg matrix that is a

*  perturbation of an orthogonal similarity transformation of A.  It is

*  to be hoped that the final version of H has many zero subdiagonal

*  entries.

*

*  This routine handles small deflation windows which is affordable by

*  one processor. Normally, it is called by PDLAQR1. All the inputs are

*  assumed to be valid without checking.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Arguments

*  =========

*

*  WANTT   (global input) LOGICAL

*          If .TRUE., then the Hessenberg matrix H is fully updated

*          so that the quasi-triangular Schur factor may be

*          computed (in cooperation with the calling subroutine).

*          If .FALSE., then only enough of H is updated to preserve

*          the eigenvalues.

*

*  WANTZ   (global input) LOGICAL

*          If .TRUE., then the orthogonal matrix Z is updated so

*          so that the orthogonal Schur factor may be computed

*          (in cooperation with the calling subroutine).

*          If .FALSE., then Z is not referenced.

*

*  N       (global input) INTEGER

*          The order of the matrix H and (if WANTZ is .TRUE.) the

*          order of the orthogonal matrix Z.

*

*  KTOP    (global input) INTEGER

*  KBOT    (global input) INTEGER

*          It is assumed without a check that either

*          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together

*          determine an isolated block along the diagonal of the

*          Hessenberg matrix. However, H(KTOP,KTOP-1)=0 is not

*          essentially necessary if WANTT is .TRUE. .

*

*  NW      (global input) INTEGER

*          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).

*          Normally NW .GE. 3 if PDLAQR2 is called by PDLAQR1.

*

*  A       (local input/output) DOUBLE PRECISION array, dimension

*          (DESCH(LLD_),*)

*          On input the initial N-by-N section of A stores the

*          Hessenberg matrix undergoing aggressive early deflation.

*          On output A has been transformed by an orthogonal

*          similarity transformation, perturbed, and the returned

*          to Hessenberg form that (it is to be hoped) has some

*          zero subdiagonal entries.

*

*  DESCA   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix A.

*

*  ILOZ    (global input) INTEGER

*  IHIZ    (global input) INTEGER

*          Specify the rows of Z to which transformations must be

*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.

*

*  Z       (input/output) DOUBLE PRECISION array, dimension

*          (DESCH(LLD_),*)

*          IF WANTZ is .TRUE., then on output, the orthogonal

*          similarity transformation mentioned above has been

*          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.

*          If WANTZ is .FALSE., then Z is unreferenced.

*

*  DESCZ   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix Z.

*

*  NS      (global output) INTEGER

*          The number of unconverged (ie approximate) eigenvalues

*          returned in SR and SI that may be used as shifts by the

*          calling subroutine.

*

*  ND      (global output) INTEGER

*          The number of converged eigenvalues uncovered by this

*          subroutine.

*

*  SR      (global output) DOUBLE PRECISION array, dimension KBOT

*  SI      (global output) DOUBLE PRECISION array, dimension KBOT

*          On output, the real and imaginary parts of approximate

*          eigenvalues that may be used for shifts are stored in

*          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and

*          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.

*          On proc #0, the real and imaginary parts of converged

*          eigenvalues are stored in SR(KBOT-ND+1) through SR(KBOT) and

*          SI(KBOT-ND+1) through SI(KBOT), respectively. On other

*          processors, these entries are set to zero.

*

*  T       (local workspace) DOUBLE PRECISION array, dimension LDT*NW.

*

*  LDT     (local input) INTEGER

*          The leading dimension of the array T.

*          LDT >= NW.

*

*  V       (local workspace) DOUBLE PRECISION array, dimension LDV*NW.

*

*  LDV     (local input) INTEGER

*          The leading dimension of the array V.

*          LDV >= NW.

*

*  WR      (local workspace) DOUBLE PRECISION array, dimension KBOT.

*  WI      (local workspace) DOUBLE PRECISION array, dimension KBOT.

*

*  WORK    (local workspace) DOUBLE PRECISION array, dimension LWORK.

*

*  LWORK   (local input) INTEGER

*          WORK(LWORK) is a local array and LWORK is assumed big enough

*          so that LWORK >= NW*NW.

*

*  ================================================================

*  Implemented by

*        Meiyue Shao, Department of Computing Science and HPC2N,

*        Umea University, Sweden

*

*  ================================================================

*  References:

*        B. Kagstrom, D. Kressner, and M. Shao,

*        On Aggressive Early Deflation in Parallel Variants of the QR

*        Algorithm.

*        Para 2010, to appear.

*

*  ================================================================

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   LLD_, MB_, M_, NB_, N_, RSRC_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      DOUBLE PRECISION   ZERO, ONE

      PARAMETER          ( ZERO = 0.0d+0, one = 1.0d+0 )

*     ..

*     .. Local Scalars ..

      INTEGER            CONTXT, HBL, I, I1, I2, IAFIRST, ICOL, ICOL1,

     $                   ICOL2, INFO, II, IROW, IROW1, IROW2, ITMP1,

     $                   itmp2, j, jafirst, jj, k, l, lda, ldz, lldtmp,

     $                   mycol, myrow, node, npcol, nprow, dblk,

     $                   hstep, vstep, kkrow, kkcol, kln, ltop, left,

     $                   right, up, down, d1, d2

*     ..

*     .. Local Arrays ..

      INTEGER            DESCT( 9 ), DESCV( 9 ), DESCWH( 9 ),

     $                   DESCWV( 9 )

*     ..

*     .. External Functions ..

      INTEGER            NUMROC

      EXTERNAL           NUMROC

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, infog2l, dlaset,

     $                   dlaqr3, descinit, pdgemm, pdgemr2d, dgemm,

     $                   dlamov, dgesd2d, dgerv2d, dgebs2d, dgebr2d,

     $                   igebs2d, igebr2d

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min, mod

*     ..

*     .. Executable Statements ..

*

      info = 0

*

      IF( n.EQ.0 )

     $   RETURN

*

*     NODE (IAFIRST,JAFIRST) OWNS A(1,1)

*

      hbl = desca( mb_ )

      contxt = desca( ctxt_ )

      lda = desca( lld_ )

      iafirst = desca( rsrc_ )

      jafirst = desca( csrc_ )

      ldz = descz( lld_ )

      CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )

      node = myrow*npcol + mycol

      left = mod( mycol+npcol-1, npcol )

      right = mod( mycol+1, npcol )

      up = mod( myrow+nprow-1, nprow )

      down = mod( myrow+1, nprow )

*

*     I1 and I2 are the indices of the first row and last column of A

*     to which transformations must be applied.

*

      i = kbot

      l = ktop

      IF( wantt ) THEN

         i1 = 1

         i2 = n

         ltop = 1

      ELSE

         i1 = l

         i2 = i

         ltop = l

      END IF

*

*     Begin Aggressive Early Deflation.

*

      dblk = nw

      CALL infog2l( i-dblk+1, i-dblk+1, desca, nprow, npcol, myrow,

     $     mycol, irow, icol, ii, jj )

      IF ( myrow .EQ. ii ) THEN

         CALL descinit( desct, dblk, dblk, dblk, dblk, ii, jj, contxt,

     $        ldt, info )

         CALL descinit( descv, dblk, dblk, dblk, dblk, ii, jj, contxt,

     $        ldv, info )

      ELSE

         CALL descinit( desct, dblk, dblk, dblk, dblk, ii, jj, contxt,

     $        1, info )

         CALL descinit( descv, dblk, dblk, dblk, dblk, ii, jj, contxt,

     $        1, info )

      END IF

      CALL pdgemr2d( dblk, dblk, a, i-dblk+1, i-dblk+1, desca, t, 1, 1,

     $     desct, contxt )

      IF ( myrow .EQ. ii .AND. mycol .EQ. jj ) THEN

         CALL dlaset( 'All', dblk, dblk, zero, one, v, ldv )

         CALL dlaqr3( .true., .true., dblk, 1, dblk, dblk-1, t, ldt, 1,

     $        dblk, v, ldv, ns, nd, wr, wi, work, dblk, dblk,

     $        work( dblk*dblk+1 ), dblk, dblk, work( 2*dblk*dblk+1 ),

     $        dblk, work( 3*dblk*dblk+1 ), lwork-3*dblk*dblk )

         CALL dgebs2d( contxt, 'All', ' ', dblk, dblk, v, ldv )

         CALL igebs2d( contxt, 'All', ' ', 1, 1, nd, 1 )

      ELSE

         CALL dgebr2d( contxt, 'All', ' ', dblk, dblk, v, ldv, ii, jj )

         CALL igebr2d( contxt, 'All', ' ', 1, 1, nd, 1, ii, jj )

      END IF

*

      IF( nd .GT. 0 ) THEN

*

*        Copy the local matrix back to the diagonal block.

*

         CALL pdgemr2d( dblk, dblk, t, 1, 1, desct, a, i-dblk+1,

     $        i-dblk+1, desca, contxt )

*

*        Update T and Z.

*

         IF( mod( i-dblk, hbl )+dblk .LE. hbl ) THEN

*

*           Simplest case: the deflation window is located on one

*           processor.

*           Call DGEMM directly to perform the update.

*

            hstep = lwork / dblk

            vstep = hstep

*

*           Update horizontal slab in A.

*

            IF( wantt ) THEN

               CALL infog2l( i-dblk+1, i+1, desca, nprow, npcol, myrow,

     $              mycol, irow, icol, ii, jj )

               IF( myrow .EQ. ii ) THEN

                  icol1 = numroc( n, hbl, mycol, jafirst, npcol )

                  DO 10 kkcol = icol, icol1, hstep

                     kln = min( hstep, icol1-kkcol+1 )

                     CALL dgemm( 'T', 'N', dblk, kln, dblk, one, v,

     $                    ldv, a( irow+(kkcol-1)*lda ), lda, zero, work,

     $                    dblk )

                     CALL dlamov( 'A', dblk, kln, work, dblk,

     $                    a( irow+(kkcol-1)*lda ), lda )

   10             CONTINUE

               END IF

            END IF

*

*           Update vertical slab in A.

*

            CALL infog2l( ltop, i-dblk+1, desca, nprow, npcol, myrow,

     $           mycol, irow, icol, ii, jj )

            IF( mycol .EQ. jj ) THEN

               CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,

     $              myrow, mycol, irow1, icol1, itmp1, itmp2 )

               IF( myrow .NE. itmp1 ) irow1 = irow1-1

               DO 20 kkrow = irow, irow1, vstep

                  kln = min( vstep, irow1-kkrow+1 )

                  CALL dgemm( 'N', 'N', kln, dblk, dblk, one,

     $                 a( kkrow+(icol-1)*lda ), lda, v, ldv, zero, work,

     $                 kln )

                  CALL dlamov( 'A', kln, dblk, work, kln,

     $                 a( kkrow+(icol-1)*lda ), lda )

   20          CONTINUE

            END IF

*

*           Update vertical slab in Z.

*

            IF( wantz ) THEN

               CALL infog2l( iloz, i-dblk+1, descz, nprow, npcol, myrow,

     $              mycol, irow, icol, ii, jj )

               IF( mycol .EQ. jj ) THEN

                  CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,

     $                 myrow, mycol, irow1, icol1, itmp1, itmp2 )

                  IF( myrow .NE. itmp1 ) irow1 = irow1-1

                  DO 30 kkrow = irow, irow1, vstep

                     kln = min( vstep, irow1-kkrow+1 )

                     CALL dgemm( 'N', 'N', kln, dblk, dblk, one,

     $                    z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,

     $                    work, kln )

                     CALL dlamov( 'A', kln, dblk, work, kln,

     $                    z( kkrow+(icol-1)*ldz ), ldz )

   30             CONTINUE

               END IF

            END IF

*

         ELSE IF( mod( i-dblk, hbl )+dblk .LE. 2*hbl ) THEN

*

*           More complicated case: the deflation window lay on a 2x2

*           processor mesh.

*           Call DGEMM locally and communicate by pair.

*

            d1 = hbl - mod( i-dblk, hbl )

            d2 = dblk - d1

            hstep = lwork / dblk

            vstep = hstep

*

*           Update horizontal slab in A.

*

            IF( wantt ) THEN

               CALL infog2l( i-dblk+1, i+1, desca, nprow, npcol, myrow,

     $              mycol, irow, icol, ii, jj )

               IF( myrow .EQ. up ) THEN

                  IF( myrow .EQ. ii ) THEN

                     icol1 = numroc( n, hbl, mycol, jafirst, npcol )

                     DO 40 kkcol = icol, icol1, hstep

                        kln = min( hstep, icol1-kkcol+1 )

                        CALL dgemm( 'T', 'N', dblk, kln, dblk, one, v,

     $                       dblk, a( irow+(kkcol-1)*lda ), lda, zero,

     $                       work, dblk )

                        CALL dlamov( 'A', dblk, kln, work, dblk,

     $                       a( irow+(kkcol-1)*lda ), lda )

   40                CONTINUE

                  END IF

               ELSE

                  IF( myrow .EQ. ii ) THEN

                     icol1 = numroc( n, hbl, mycol, jafirst, npcol )

                     DO 50 kkcol = icol, icol1, hstep

                        kln = min( hstep, icol1-kkcol+1 )

                        CALL dgemm( 'T', 'N', d2, kln, d1, one,

     $                       v( 1, d1+1 ), ldv, a( irow+(kkcol-1)*lda ),

     $                       lda, zero, work( d1+1 ), dblk )

                        CALL dgesd2d( contxt, d2, kln, work( d1+1 ),

     $                       dblk, down, mycol )

                        CALL dgerv2d( contxt, d1, kln, work, dblk, down,

     $                       mycol )

                        CALL dgemm( 'T', 'N', d1, kln, d1, one,

     $                       v, ldv, a( irow+(kkcol-1)*lda ), lda, one,

     $                       work, dblk )

                        CALL dlamov( 'A', d1, kln, work, dblk,

     $                       a( irow+(kkcol-1)*lda ), lda )

   50                CONTINUE

                  ELSE IF( up .EQ. ii ) THEN

                     icol1 = numroc( n, hbl, mycol, jafirst, npcol )

                     DO 60 kkcol = icol, icol1, hstep

                        kln = min( hstep, icol1-kkcol+1 )

                        CALL dgemm( 'T', 'N', d1, kln, d2, one,

     $                       v( d1+1, 1 ), ldv, a( irow+(kkcol-1)*lda ),

     $                       lda, zero, work, dblk )

                        CALL dgesd2d( contxt, d1, kln, work, dblk, up,

     $                       mycol )

                        CALL dgerv2d( contxt, d2, kln, work( d1+1 ),

     $                       dblk, up, mycol )

                        CALL dgemm( 'T', 'N', d2, kln, d2, one,

     $                       v( d1+1, d1+1 ), ldv,

     $                       a( irow+(kkcol-1)*lda ), lda, one,

     $                       work( d1+1 ), dblk )

                        CALL dlamov( 'A', d2, kln, work( d1+1 ), dblk,

     $                       a( irow+(kkcol-1)*lda ), lda )

   60                CONTINUE

                  END IF

               END IF

            END IF

*

*           Update vertical slab in A.

*

            CALL infog2l( ltop, i-dblk+1, desca, nprow, npcol, myrow,

     $           mycol, irow, icol, ii, jj )

            IF( mycol .EQ. left ) THEN

               IF( mycol .EQ. jj ) THEN

                  CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,

     $                 myrow, mycol, irow1, icol1, itmp1, itmp2 )

                  IF( myrow .NE. itmp1 ) irow1 = irow1-1

                  DO 70 kkrow = irow, irow1, vstep

                     kln = min( vstep, irow1-kkrow+1 )

                     CALL dgemm( 'N', 'N', kln, dblk, dblk, one,

     $                    a( kkrow+(icol-1)*lda ), lda, v, ldv, zero,

     $                    work, kln )

                     CALL dlamov( 'A', kln, dblk, work, kln,

     $                    a( kkrow+(icol-1)*lda ), lda )

   70             CONTINUE

               END IF

            ELSE

               IF( mycol .EQ. jj ) THEN

                  CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,

     $                 myrow, mycol, irow1, icol1, itmp1, itmp2 )

                  IF( myrow .NE. itmp1 ) irow1 = irow1-1

                  DO 80 kkrow = irow, irow1, vstep

                     kln = min( vstep, irow1-kkrow+1 )

                     CALL dgemm( 'N', 'N', kln, d2, d1, one,

     $                    a( kkrow+(icol-1)*lda ), lda,

     $                    v( 1, d1+1 ), ldv, zero, work( 1+d1*kln ),

     $                    kln )

                     CALL dgesd2d( contxt, kln, d2, work( 1+d1*kln ),

     $                    kln, myrow, right )

                     CALL dgerv2d( contxt, kln, d1, work, kln, myrow,

     $                    right )

                     CALL dgemm( 'N', 'N', kln, d1, d1, one,

     $                    a( kkrow+(icol-1)*lda ), lda, v, ldv, one,

     $                    work, kln )

                     CALL dlamov( 'A', kln, d1, work, kln,

     $                    a( kkrow+(icol-1)*lda ), lda )

   80             CONTINUE

               ELSE IF ( left .EQ. jj ) THEN

                  CALL infog2l( i-dblk, i-dblk+1, desca, nprow, npcol,

     $                 myrow, mycol, irow1, icol1, itmp1, itmp2 )

                  IF( myrow .NE. itmp1 ) irow1 = irow1-1

                  DO 90 kkrow = irow, irow1, vstep

                     kln = min( vstep, irow1-kkrow+1 )

                     CALL dgemm( 'N', 'N', kln, d1, d2, one,

     $                    a( kkrow+(icol-1)*lda ), lda, v( d1+1, 1 ),

     $                    ldv, zero, work, kln )

                     CALL dgesd2d( contxt, kln, d1, work, kln, myrow,

     $                    left )

                     CALL dgerv2d( contxt, kln, d2, work( 1+d1*kln ),

     $                    kln, myrow, left )

                     CALL dgemm( 'N', 'N', kln, d2, d2, one,

     $                    a( kkrow+(icol-1)*lda ), lda, v( d1+1, d1+1 ),

     $                    ldv, one, work( 1+d1*kln ), kln )

                     CALL dlamov( 'A', kln, d2, work( 1+d1*kln ), kln,

     $                    a( kkrow+(icol-1)*lda ), lda )

   90             CONTINUE

               END IF

            END IF

*

*           Update vertical slab in Z.

*

            IF( wantz ) THEN

               CALL infog2l( iloz, i-dblk+1, descz, nprow, npcol, myrow,

     $              mycol, irow, icol, ii, jj )

               IF( mycol .EQ. left ) THEN

                  IF( mycol .EQ. jj ) THEN

                     CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,

     $                    myrow, mycol, irow1, icol1, itmp1, itmp2 )

                     IF( myrow .NE. itmp1 ) irow1 = irow1-1

                     DO 100 kkrow = irow, irow1, vstep

                        kln = min( vstep, irow1-kkrow+1 )

                        CALL dgemm( 'N', 'N', kln, dblk, dblk, one,

     $                       z( kkrow+(icol-1)*ldz ), ldz, v, ldv, zero,

     $                       work, kln )

                        CALL dlamov( 'A', kln, dblk, work, kln,

     $                       z( kkrow+(icol-1)*ldz ), ldz )

  100                CONTINUE

                  END IF

               ELSE

                  IF( mycol .EQ. jj ) THEN

                     CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,

     $                    myrow, mycol, irow1, icol1, itmp1, itmp2 )

                     IF( myrow .NE. itmp1 ) irow1 = irow1-1

                     DO 110 kkrow = irow, irow1, vstep

                        kln = min( vstep, irow1-kkrow+1 )

                        CALL dgemm( 'N', 'N', kln, d2, d1, one,

     $                       z( kkrow+(icol-1)*ldz ), ldz,

     $                       v( 1, d1+1 ), ldv, zero, work( 1+d1*kln ),

     $                       kln )

                        CALL dgesd2d( contxt, kln, d2, work( 1+d1*kln ),

     $                       kln, myrow, right )

                        CALL dgerv2d( contxt, kln, d1, work, kln, myrow,

     $                       right )

                        CALL dgemm( 'N', 'N', kln, d1, d1, one,

     $                       z( kkrow+(icol-1)*ldz ), ldz, v, ldv, one,

     $                       work, kln )

                        CALL dlamov( 'A', kln, d1, work, kln,

     $                       z( kkrow+(icol-1)*ldz ), ldz )

  110                CONTINUE

                  ELSE IF( left .EQ. jj ) THEN

                     CALL infog2l( ihiz, i-dblk+1, descz, nprow, npcol,

     $                    myrow, mycol, irow1, icol1, itmp1, itmp2 )

                     IF( myrow .NE. itmp1 ) irow1 = irow1-1

                     DO 120 kkrow = irow, irow1, vstep

                        kln = min( vstep, irow1-kkrow+1 )

                        CALL dgemm( 'N', 'N', kln, d1, d2, one,

     $                       z( kkrow+(icol-1)*ldz ), ldz,

     $                       v( d1+1, 1 ), ldv, zero, work, kln )

                        CALL dgesd2d( contxt, kln, d1, work, kln, myrow,

     $                       left )

                        CALL dgerv2d( contxt, kln, d2, work( 1+d1*kln ),

     $                       kln, myrow, left )

                        CALL dgemm( 'N', 'N', kln, d2, d2, one,

     $                       z( kkrow+(icol-1)*ldz ), ldz,

     $                       v( d1+1, d1+1 ), ldv, one,

     $                       work( 1+d1*kln ), kln )

                        CALL dlamov( 'A', kln, d2, work( 1+d1*kln ),

     $                       kln, z( kkrow+(icol-1)*ldz ), ldz )

  120                CONTINUE

                  END IF

               END IF

            END IF

*

         ELSE

*

*           Most complicated case: the deflation window lay across the

*           border of the processor mesh.

*           Treat V as a distributed matrix and call PDGEMM.

*

            hstep = lwork / dblk * npcol

            vstep = lwork / dblk * nprow

            lldtmp = numroc( dblk, dblk, myrow, 0, nprow )

            lldtmp = max( 1, lldtmp )

            CALL descinit( descv, dblk, dblk, dblk, dblk, 0, 0, contxt,

     $           lldtmp, info )

            CALL descinit( descwh, dblk, hstep, dblk, lwork / dblk, 0,

     $           0, contxt, lldtmp, info )

*

*           Update horizontal slab in A.

*

            IF( wantt ) THEN

               DO 130 kkcol = i+1, n, hstep

                  kln = min( hstep, n-kkcol+1 )

                  CALL pdgemm( 'T', 'N', dblk, kln, dblk, one, v, 1, 1,

     $                 descv, a, i-dblk+1, kkcol, desca, zero, work, 1,

     $                 1, descwh )

                  CALL pdgemr2d( dblk, kln, work, 1, 1, descwh, a,

     $                 i-dblk+1, kkcol, desca, contxt )

  130          CONTINUE

            END IF

*

*           Update vertical slab in A.

*

            DO 140 kkrow = ltop, i-dblk, vstep

               kln = min( vstep, i-dblk-kkrow+1 )

               lldtmp = numroc( kln, lwork / dblk, myrow, 0, nprow )

               lldtmp = max( 1, lldtmp )

               CALL descinit( descwv, kln, dblk, lwork / dblk, dblk, 0,

     $              0, contxt, lldtmp, info )

               CALL pdgemm( 'N', 'N', kln, dblk, dblk, one, a, kkrow,

     $              i-dblk+1, desca, v, 1, 1, descv, zero, work, 1, 1,

     $              descwv )

               CALL pdgemr2d( kln, dblk, work, 1, 1, descwv, a, kkrow,

     $              i-dblk+1, desca, contxt )

  140       CONTINUE

*

*           Update vertical slab in Z.

*

            IF( wantz ) THEN

               DO 150 kkrow = iloz, ihiz, vstep

                  kln = min( vstep, ihiz-kkrow+1 )

                  lldtmp = numroc( kln, lwork / dblk, myrow, 0, nprow )

                  lldtmp = max( 1, lldtmp )

                  CALL descinit( descwv, kln, dblk, lwork / dblk, dblk,

     $                 0, 0, contxt, lldtmp, info )

                  CALL pdgemm( 'N', 'N', kln, dblk, dblk, one, z, kkrow,

     $                 i-dblk+1, descz, v, 1, 1, descv, zero, work, 1,

     $                 1, descwv )

                  CALL pdgemr2d( kln, dblk, work, 1, 1, descwv, z,

     $                 kkrow, i-dblk+1, descz, contxt )

  150          CONTINUE

            END IF

         END IF

*

*        Extract converged eigenvalues.

*

         ii = 0

  160    CONTINUE

            IF( ii .EQ. nd-1 .OR. wi( dblk-ii ) .EQ. zero ) THEN

               IF( node .EQ. 0 ) THEN

                  sr( i-ii ) = wr( dblk-ii )

               ELSE

                  sr( i-ii ) = zero

               END IF

               si( i-ii ) = zero

               ii = ii + 1

            ELSE

               IF( node .EQ. 0 ) THEN

                  sr( i-ii-1 ) = wr( dblk-ii-1 )

                  sr( i-ii ) = wr( dblk-ii )

                  si( i-ii-1 ) = wi( dblk-ii-1 )

                  si( i-ii ) = wi( dblk-ii )

               ELSE

                  sr( i-ii-1 ) = zero

                  sr( i-ii ) = zero

                  si( i-ii-1 ) = zero

                  si( i-ii ) = zero

               END IF

               ii = ii + 2

            END IF

         IF( ii .LT. nd ) GOTO 160

      END IF

*

*     END OF PDLAQR2

*


      END

descinit
subroutine descinit(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld, info)
Definition descinit.f:3

infog2l
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3

max
#define max(A, B)
Definition pcgemr.c:180

min
#define min(A, B)
Definition pcgemr.c:181

pdlaqr2
subroutine pdlaqr2(wantt, wantz, n, ktop, kbot, nw, a, desca, iloz, ihiz, z, descz, ns, nd, sr, si, t, ldt, v, ldv, wr, wi, work, lwork)
Definition pdlaqr2.f:4