d4/d69/slarrv2_8f_source.html

      SUBROUTINE slarrv2( N, VL, VU, D, L, PIVMIN,

     $                   ISPLIT, M, DOL, DOU, NEEDIL, NEEDIU,

     $                   MINRGP, RTOL1, RTOL2, W, WERR, WGAP,

     $                   IBLOCK, INDEXW, GERS, SDIAM,

     $                   Z, LDZ, ISUPPZ,

     $                   WORK, IWORK, VSTART, FINISH,

     $                   MAXCLS, NDEPTH, PARITY, ZOFFSET, INFO )


*  -- ScaLAPACK auxiliary routine (version 2.0) --

*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver

*     July 4, 2010

*

      IMPLICIT NONE

*

*     .. Scalar Arguments ..

      INTEGER            DOL, DOU, INFO, LDZ, M, N, MAXCLS,

     $                   NDEPTH, NEEDIL, NEEDIU, PARITY, ZOFFSET

      REAL               MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU

      LOGICAL VSTART, FINISH

*     ..

*     .. Array Arguments ..

      INTEGER            IBLOCK( * ), INDEXW( * ), ISPLIT( * ),

     $                   ISUPPZ( * ), IWORK( * )

      REAL               D( * ), GERS( * ), L( * ), SDIAM( * ),

     $                   w( * ), werr( * ),

     $                   wgap( * ), work( * )

      REAL              Z( LDZ, * )

*

*  Purpose

*  =======

*

*  SLARRV2 computes the eigenvectors of the tridiagonal matrix

*  T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T.

*  The input eigenvalues should have been computed by SLARRE2A

*  or by precious calls to SLARRV2.

*

*  The major difference between the parallel and the sequential construction

*  of the representation tree is that in the parallel case, not all eigenvalues

*  of a given cluster might be computed locally. Other processors might "own"

*  and refine part of an eigenvalue cluster. This is crucial for scalability.

*  Thus there might be communication necessary before the current level of the

*  representation tree can be parsed.

*

*  Please note:

*  1. The calling sequence has two additional INTEGER parameters,

*     DOL and DOU, that should satisfy M>=DOU>=DOL>=1.

*     These parameters are only relevant for the case JOBZ = 'V'.

*     SLARRV2  ONLY computes the eigenVECTORS

*     corresponding to eigenvalues DOL through DOU in W. (That is,

*     instead of computing the eigenvectors belonging to W(1)

*     through W(M), only the eigenvectors belonging to eigenvalues

*     W(DOL) through W(DOU) are computed. In this case, only the

*     eigenvalues DOL:DOU are guaranteed to be accurately refined

*     to all figures by Rayleigh-Quotient iteration.

*

*  2. The additional arguments VSTART, FINISH, NDEPTH, PARITY, ZOFFSET

*     are included as a thread-safe implementation equivalent to SAVE variables.

*     These variables store details about the local representation tree which is

*     computed layerwise. For scalability reasons, eigenvalues belonging to the

*     locally relevant representation tree might be computed on other processors.

*     These need to be communicated before the inspection of the RRRs can proceed

*     on any given layer.

*     Note that only when the variable FINISH is true, the computation has ended

*     All eigenpairs between DOL and DOU have been computed. M is set = DOU - DOL + 1.

*

*  3. SLARRV2 needs more workspace in Z than the sequential SLARRV.

*     It is used to store the conformal embedding of the local representation tree.

*

*  Arguments

*  =========

*

*  N       (input) INTEGER

*          The order of the matrix.  N >= 0.

*

*  VL      (input) REAL

*  VU      (input) REAL

*          Lower and upper bounds of the interval that contains the desired

*          eigenvalues. VL < VU. Needed to compute gaps on the left or right

*          end of the extremal eigenvalues in the desired RANGE.

*          VU is currently not used but kept as parameter in case needed.

*

*  D       (input/output) REAL             array, dimension (N)

*          On entry, the N diagonal elements of the diagonal matrix D.

*          On exit, D is overwritten.

*

*  L       (input/output) REAL             array, dimension (N)

*          On entry, the (N-1) subdiagonal elements of the unit

*          bidiagonal matrix L are in elements 1 to N-1 of L

*          (if the matrix is not splitted.) At the end of each block

*          is stored the corresponding shift as given by SLARRE.

*          On exit, L is overwritten.

*

*  PIVMIN  (in) DOUBLE PRECISION

*          The minimum pivot allowed in the sturm sequence.

*

*  ISPLIT  (input) INTEGER array, dimension (N)

*          The splitting points, at which T breaks up into blocks.

*          The first block consists of rows/columns 1 to

*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1

*          through ISPLIT( 2 ), etc.

*

*  M       (input) INTEGER

*          The total number of input eigenvalues.  0 <= M <= N.

*

*  DOL     (input) INTEGER

*  DOU     (input) INTEGER

*          If the user wants to compute only selected eigenvectors from all

*          the eigenvalues supplied, he can specify an index range DOL:DOU.

*          Or else the setting DOL=1, DOU=M should be applied.

*          Note that DOL and DOU refer to the order in which the eigenvalues

*          are stored in W.

*          If the user wants to compute only selected eigenpairs, then

*          the columns DOL-1 to DOU+1 of the eigenvector space Z contain the

*          computed eigenvectors. All other columns of Z are set to zero.

*          If DOL > 1, then Z(:,DOL-1-ZOFFSET) is used.

*          If DOU < M, then Z(:,DOU+1-ZOFFSET) is used.

*

*

*  NEEDIL  (input/output) INTEGER

*  NEEDIU  (input/output) INTEGER

*          Describe which are the left and right outermost eigenvalues

*          that still need to be included in the computation. These indices

*          indicate whether eigenvalues from other processors are needed to

*          correctly compute the conformally embedded representation tree.

*          When DOL<=NEEDIL<=NEEDIU<=DOU, all required eigenvalues are local

*          to the processor and no communication is required to compute its

*          part of the representation tree.

*

*  MINRGP  (input) REAL

*          The minimum relativ gap threshold to decide whether an eigenvalue

*          or a cluster boundary is reached.

*

*  RTOL1   (input) REAL

*  RTOL2   (input) REAL

*           Parameters for bisection.

*           An interval [LEFT,RIGHT] has converged if

*           RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )

*

*  W       (input/output) REAL             array, dimension (N)

*          The first M elements of W contain the APPROXIMATE eigenvalues for

*          which eigenvectors are to be computed. The eigenvalues

*          should be grouped by split-off block and ordered from

*          smallest to largest within the block. (The output array

*          W from SSTEGR2A is expected here.) Furthermore, they are with

*          respect to the shift of the corresponding root representation

*          for their block. On exit,

*          W holds those UNshifted eigenvalues

*          for which eigenvectors have already been computed.

*

*  WERR    (input/output) REAL             array, dimension (N)

*          The first M elements contain the semiwidth of the uncertainty

*          interval of the corresponding eigenvalue in W

*

*  WGAP    (input/output) REAL             array, dimension (N)

*          The separation from the right neighbor eigenvalue in W.

*

*  IBLOCK  (input) INTEGER array, dimension (N)

*          The indices of the blocks (submatrices) associated with the

*          corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue

*          W(i) belongs to the first block from the top, =2 if W(i)

*          belongs to the second block, etc.

*

*  INDEXW  (input) INTEGER array, dimension (N)

*          The indices of the eigenvalues within each block (submatrix);

*          for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the

*          i-th eigenvalue W(i) is the 10-th eigenvalue in the second block.

*

*  GERS    (input) REAL             array, dimension (2*N)

*          The N Gerschgorin intervals (the i-th Gerschgorin interval

*          is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should

*          be computed from the original UNshifted matrix.

*          Currently NOT used but kept as parameter in case it becomes

*          needed in the future.

*

*  SDIAM   (input) REAL             array, dimension (N)

*          The spectral diameters for all unreduced blocks.

*

*  Z       (output) REAL             array, dimension (LDZ, max(1,M) )

*          If INFO = 0, the first M columns of Z contain the

*          orthonormal eigenvectors of the matrix T

*          corresponding to the input eigenvalues, with the i-th

*          column of Z holding the eigenvector associated with W(i).

*          In the distributed version, only a subset of columns

*          is accessed, see DOL,DOU and ZOFFSET.

*

*  LDZ     (input) INTEGER

*          The leading dimension of the array Z.  LDZ >= 1, and if

*          JOBZ = 'V', LDZ >= max(1,N).

*

*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) )

*          The support of the eigenvectors in Z, i.e., the indices

*          indicating the nonzero elements in Z. The I-th eigenvector

*          is nonzero only in elements ISUPPZ( 2*I-1 ) through

*          ISUPPZ( 2*I ).

*

*  WORK    (workspace) REAL             array, dimension (12*N)

*

*  IWORK   (workspace) INTEGER array, dimension (7*N)

*

*  VSTART  (input/output) LOGICAL

*          .TRUE. on initialization, set to .FALSE. afterwards.

*

*  FINISH  (input/output) LOGICAL

*          A flag that indicates whether all eigenpairs have been computed.

*

*  MAXCLS  (input/output) INTEGER

*          The largest cluster worked on by this processor in the

*          representation tree.

*

*  NDEPTH  (input/output) INTEGER

*          The current depth of the representation tree. Set to

*          zero on initial pass, changed when the deeper levels of

*          the representation tree are generated.

*

*  PARITY  (input/output) INTEGER

*          An internal parameter needed for the storage of the

*          clusters on the current level of the representation tree.

*

*  ZOFFSET (input) INTEGER

*          Offset for storing the eigenpairs when Z is distributed

*          in 1D-cyclic fashion.

*

*  INFO    (output) INTEGER

*          = 0:  successful exit

*

*          > 0:  A problem occured in SLARRV2.

*          < 0:  One of the called subroutines signaled an internal problem.

*                Needs inspection of the corresponding parameter IINFO

*                for further information.

*

*          =-1:  Problem in SLARRB2 when refining a child's eigenvalues.

*          =-2:  Problem in SLARRF2 when computing the RRR of a child.

*                When a child is inside a tight cluster, it can be difficult

*                to find an RRR. A partial remedy from the user's point of

*                view is to make the parameter MINRGP smaller and recompile.

*                However, as the orthogonality of the computed vectors is

*                proportional to 1/MINRGP, the user should be aware that

*                he might be trading in precision when he decreases MINRGP.

*          =-3:  Problem in SLARRB2 when refining a single eigenvalue

*                after the Rayleigh correction was rejected.

*          = 5:  The Rayleigh Quotient Iteration failed to converge to

*                full accuracy in MAXITR steps.

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            MAXITR, USE30, USE31, USE32A, USE32B

      PARAMETER          ( MAXITR = 10, use30=30, use31=31,

     $                     use32a=3210, use32b = 3211 )

      REAL               ZERO, ONE, TWO, THREE, FOUR, HALF

      PARAMETER          ( ZERO = 0.0e0, one = 1.0e0,

     $                     two = 2.0e0, three = 3.0e0,

     $                     four = 4.0e0, half = 0.5e0)

*     ..

*     .. Local Arrays ..

      INTEGER            SPLACE( 4 )

*     ..

*     .. Local Scalars ..

      LOGICAL            DELREF, ESKIP, NEEDBS, ONLYLC, STP2II, TRYMID,

     $                   TRYRQC, USEDBS, USEDRQ

      INTEGER            I, IBEGIN, IEND, II, IINCLS, IINDC1, IINDC2,

     $                   iindwk, iinfo, im, in, indeig, indld, indlld,

     $                   indwrk, isupmn, isupmx, iter, itmp1, itwist, j,

     $                   jblk, k, kk, miniwsize, minwsize, mywfst,

     $                   mywlst, nclus, negcnt, newcls, newfst, newftt,

     $                   newlst, newsiz, offset, oldcls, oldfst, oldien,

     $                   oldlst, oldncl, p, q, vrtree, wbegin, wend,

     $                   windex, windmn, windpl, zfrom, zindex, zto,

     $                   zusedl, zusedu, zusedw

      REAL               AVGAP, BSTRES, BSTW, ENUFGP, EPS, FUDGE, GAP,

     $                   GAPTOL, LAMBDA, LEFT, LGAP, LGPVMN, LGSPDM,

     $                   LOG_IN, MGAP, MINGMA, MYERR, NRMINV, NXTERR,

     $                   ORTOL, RESID, RGAP, RIGHT, RLTL30, RQCORR,

     $                   RQTOL, SAVEGP, SGNDEF, SIGMA, SPDIAM, SSIGMA,

     $                   TAU, TMP, TOL, ZTZ

*     ..

*     .. External Functions ..

      REAL              SLAMCH

      REAL               SDOT, SNRM2

      EXTERNAL           SDOT, SLAMCH, SNRM2

*     ..

*     .. External Subroutines ..

      EXTERNAL           SAXPY, SCOPY, SLAR1VA, SLARRB2,

     $                   slarrf2, slaset, sscal

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC abs, real, max, min, sqrt

*     ..

*     .. Executable Statements ..

*     ..


      info = 0

*     The first N entries of WORK are reserved for the eigenvalues

      indld = n+1

      indlld= 2*n+1

      indwrk= 3*n+1

      minwsize = 12 * n


*     IWORK(IINCLS+JBLK) holds the number of clusters on the current level

*     of the reptree for block JBLK

      iincls = 0

*     IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current

*     layer and the one above.

      iindc1 = n

      iindc2 = 2*n

      iindwk = 3*n + 1

      miniwsize = 7 * n


      eps = slamch( 'Precision' )

      rqtol = two * eps


      tryrqc = .true.

*     Decide which representation tree criterion to use

*     USE30 = Lapack 3.0 criterion

*     USE31 = LAPACK 3.1 criterion

*     USE32A = two criteria, determines singletons with USE31, and groups with avgap.

*     USE32B = two criteria, determines singletons with USE31, and groups with USE30.

      vrtree = use32a

*

      lgpvmn = log( pivmin )


      IF(vstart) THEN

*

*        PREPROCESSING, DONE ONLY IN THE FIRST CALL

*

         vstart = .false.

*

         maxcls = 1


*        Set delayed eigenvalue refinement

*        In order to enable more parallelism, refinement

*        must be done immediately and cannot be delayed until

*        the next representation tree level.

         delref = .false.


         DO 1 i= 1,minwsize

            work( i ) = zero

 1       CONTINUE


         DO 2 i= 1,miniwsize

            iwork( i ) = 0

 2       CONTINUE


         zusedl = 1

         IF(dol.GT.1) THEN

*           Set lower bound for use of Z

            zusedl = dol-1

         ENDIF

         zusedu = m

         IF(dou.LT.m) THEN

*           Set lower bound for use of Z

            zusedu = dou+1

         ENDIF

*        The width of the part of Z that is used

         zusedw = zusedu - zusedl + 1

*

         CALL slaset( 'Full', n, zusedw, zero, zero,

     $                    z(1,(zusedl-zoffset)), ldz )


*        Initialize NDEPTH, the current depth of the representation tree

         ndepth = 0

*        Initialize parity

         parity = 1


*        Go through blocks, initialize data structures

         ibegin = 1

         wbegin = 1

         DO 10 jblk = 1, iblock( m )

            iend = isplit( jblk )

            sigma = l( iend )

            wend = wbegin - 1

 3          CONTINUE

            IF( wend.LT.m ) THEN

               IF( iblock( wend+1 ).EQ.jblk ) THEN

                  wend = wend + 1

                  GO TO 3

               END IF

            END IF

            IF( wend.LT.wbegin ) THEN

               iwork( iincls + jblk ) = 0

               ibegin = iend + 1

               GO TO 10

            ELSEIF( (wend.LT.dol).OR.(wbegin.GT.dou) ) THEN

               iwork( iincls + jblk ) = 0

               ibegin = iend + 1

               wbegin = wend + 1

               GO TO 10

            END IF

*           The number of eigenvalues in the current block

            im = wend - wbegin + 1

*           This is for a 1x1 block

            IF( ibegin.EQ.iend ) THEN

               iwork( iincls + jblk ) = 0

               z( ibegin, (wbegin-zoffset) ) = one

               isuppz( 2*wbegin-1 ) = ibegin

               isuppz( 2*wbegin ) = ibegin

               w( wbegin ) = w( wbegin ) + sigma

               work( wbegin ) = w( wbegin )

               ibegin = iend + 1

               wbegin = wbegin + 1

               GO TO 10

            END IF

            CALL scopy( im, w( wbegin ), 1,

     &                work( wbegin ), 1 )

*           We store in W the eigenvalue approximations w.r.t. the original

*           matrix T.

            DO 5 i=1,im

               w(wbegin+i-1) = w(wbegin+i-1)+sigma

 5          CONTINUE


*           Initialize cluster counter for this block

            iwork( iincls + jblk ) = 1

            iwork( iindc1+ibegin ) = 1

            iwork( iindc1+ibegin+1 ) = im

*

            ibegin = iend + 1

            wbegin = wend + 1

10       CONTINUE

*

      ENDIF


*     Init NEEDIL and NEEDIU

      needil = dou

      neediu = dol


*     Here starts the main loop

*     Only one pass through the loop is done until no collaboration

*     with other processors is needed.

 40   CONTINUE


      parity = 1 - parity


*     For each block, build next level of representation tree

*     if there are still remaining clusters

      ibegin = 1

      wbegin = 1

      DO 170 jblk = 1, iblock( m )

         iend = isplit( jblk )

         sigma = l( iend )

*        Find the eigenvectors of the submatrix indexed IBEGIN

*        through IEND.

         IF(m.EQ.n) THEN

*           all eigenpairs are computed

            wend = iend

         ELSE

*           count how many wanted eigenpairs are in this block

            wend = wbegin - 1

 15         CONTINUE

            IF( wend.LT.m ) THEN

               IF( iblock( wend+1 ).EQ.jblk ) THEN

                  wend = wend + 1

                  GO TO 15

               END IF

            END IF

         ENDIF


         oldncl = iwork( iincls + jblk )

         IF( oldncl.EQ.0 ) THEN

            ibegin = iend + 1

            wbegin = wend + 1

            GO TO 170

         END IF

*        OLDIEN is the last index of the previous block

         oldien = ibegin - 1

*        Calculate the size of the current block

         in = iend - ibegin + 1

*        The number of eigenvalues in the current block

         im = wend - wbegin + 1


*        Find local spectral diameter of the block

         spdiam = sdiam(jblk)

         lgspdm = log( spdiam + pivmin )

*        Compute ORTOL parameter, similar to SSTEIN

         ortol = spdiam*1.0e-3

*        Compute average gap

         avgap = spdiam/real(in-1)

*        Compute the minimum of average gap and ORTOL parameter

*        This can used as a lower bound for acceptable separation

*        between eigenvalues

         enufgp = min(ortol,avgap)


*        Any 1x1 block has been treated before


*        loop while( OLDNCLS.GT.0 )

*        generate the next representation tree level for the current block

         IF( oldncl.GT.0 ) THEN

*           This is a crude protection against infinitely deep trees

            IF( ndepth.GT.m ) THEN

               info = -2

               RETURN

            ENDIF

*           breadth first processing of the current level of the representation

*           tree: OLDNCL = number of clusters on current level

*           NCLUS is the number of clusters for the next level of the reptree

*           reset NCLUS to count the number of child clusters

            nclus = 0

*

            log_in = log(real(in))

*

            rltl30 = min( 1.0e-2, one / real( in ) )

*

            IF( parity.EQ.0 ) THEN

               oldcls = iindc1+ibegin-1

               newcls = iindc2+ibegin-1

            ELSE

               oldcls = iindc2+ibegin-1

               newcls = iindc1+ibegin-1

            END IF

*           Process the clusters on the current level

            DO 150 i = 1, oldncl

               j = oldcls + 2*i

*              OLDFST, OLDLST = first, last index of current cluster.

*                               cluster indices start with 1 and are relative

*                               to WBEGIN when accessing W, WGAP, WERR, Z

               oldfst = iwork( j-1 )

               oldlst = iwork( j )

               IF( ndepth.GT.0 ) THEN

*                 Retrieve relatively robust representation (RRR) of cluster

*                 that has been computed at the previous level

*                 The RRR is stored in Z and overwritten once the eigenvectors

*                 have been computed or when the cluster is refined


                  IF((dol.EQ.1).AND.(dou.EQ.m)) THEN

*                    Get representation from location of the leftmost evalue

*                    of the cluster

                     j = wbegin + oldfst - 1

                  ELSE

                     IF(wbegin+oldfst-1.LT.dol) THEN

*                       Get representation from the left end of Z array

                        j = dol - 1

                     ELSEIF(wbegin+oldfst-1.GT.dou) THEN

*                       Get representation from the right end of Z array

                        j = dou

                     ELSE

                        j = wbegin + oldfst - 1

                     ENDIF

                  ENDIF

                  CALL scopy( in, z( ibegin, (j-zoffset) ),

     $               1, d( ibegin ), 1 )

                  CALL scopy( in-1, z( ibegin, (j+1-zoffset) ),

     $               1, l( ibegin ),1 )

                  sigma = z( iend, (j+1-zoffset) )

*                 Set the corresponding entries in Z to zero

                  CALL slaset( 'Full', in, 2, zero, zero,

     $                         z( ibegin, (j-zoffset) ), ldz )

               END IF


*              Compute DL and DLL of current RRR

               DO 50 j = ibegin, iend-1

                  tmp = d( j )*l( j )

                  work( indld-1+j ) = tmp

                  work( indlld-1+j ) = tmp*l( j )

   50          CONTINUE


               IF( ndepth.GT.0 .AND. delref ) THEN

*                 P and Q are index of the first and last eigenvalue to compute

*                 within the current block

                  p = indexw( wbegin-1+oldfst )

                  q = indexw( wbegin-1+oldlst )

*                 Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET

*                 thru' Q-OFFSET elements of these arrays are to be used.

C                  OFFSET = P-OLDFST

                  offset = indexw( wbegin ) - 1

*                 perform limited bisection (if necessary) to get approximate

*                 eigenvalues to the precision needed.

                  CALL slarrb2( in, d( ibegin ),

     $                         work(indlld+ibegin-1),

     $                         p, q, rtol1, rtol2, offset,

     $                         work(wbegin),wgap(wbegin),werr(wbegin),

     $                         work( indwrk ), iwork( iindwk ),

     $                         pivmin, lgpvmn, lgspdm, in, iinfo )

                  IF( iinfo.NE.0 ) THEN

                     info = -1

                     RETURN

                  ENDIF

*                 We also recompute the extremal gaps. W holds all eigenvalues

*                 of the unshifted matrix and must be used for computation

*                 of WGAP, the entries of WORK might stem from RRRs with

*                 different shifts. The gaps from WBEGIN-1+OLDFST to

*                 WBEGIN-1+OLDLST are correctly computed in SLARRB2.

*                 However, we only allow the gaps to become greater since

*                 this is what should happen when we decrease WERR

                  IF( oldfst.GT.1) THEN

                     wgap( wbegin+oldfst-2 ) =

     $             max(wgap(wbegin+oldfst-2),

     $                 w(wbegin+oldfst-1)-werr(wbegin+oldfst-1)

     $                 - w(wbegin+oldfst-2)-werr(wbegin+oldfst-2) )

                  ENDIF

                  IF( wbegin + oldlst -1 .LT. wend ) THEN

                     wgap( wbegin+oldlst-1 ) =

     $               max(wgap(wbegin+oldlst-1),

     $                   w(wbegin+oldlst)-werr(wbegin+oldlst)

     $                   - w(wbegin+oldlst-1)-werr(wbegin+oldlst-1) )

                  ENDIF

*                 Each time the eigenvalues in WORK get refined, we store

*                 the newly found approximation with all shifts applied in W

                  DO 53 j=oldfst,oldlst

                     w(wbegin+j-1) = work(wbegin+j-1)+sigma

 53               CONTINUE

               ELSEIF( (ndepth.EQ.0) .OR. (.NOT.delref) ) THEN

*                 Some of the eigenvalues might have been computed on

*                 other processors

*                 Recompute gaps for this cluster

*                 (all eigenvalues have the same

*                 representation, i.e. the same shift, so this is easy)

                  DO 54 j = oldfst, oldlst-1

                     myerr = werr(wbegin + j - 1)

                     nxterr = werr(wbegin + j )

                     wgap(wbegin+j-1) = max(wgap(wbegin+j-1),

     $                    (   work(wbegin+j) - nxterr )

     $                  - ( work(wbegin+j-1) + myerr )

     $                                     )

 54               CONTINUE

               END IF

*

*              Process the current node.

*

               newfst = oldfst

               DO 140 j = oldfst, oldlst

                  IF( j.EQ.oldlst ) THEN

*                    we are at the right end of the cluster, this is also the

*                    boundary of the child cluster

                     newlst = j

                  ELSE

                     IF (vrtree.EQ.use30) THEN

                        IF(wgap( wbegin + j -1).GE.

     $                     rltl30 * abs(work(wbegin + j -1)) ) THEN

*                          the right relgap is big enough by the Lapack 3.0 criterion

                           newlst = j

                        ELSE

*                          inside a child cluster, the relative gap is not

*                          big enough.

                           GOTO 140

            ENDIF

                     ELSE IF (vrtree.EQ.use31) THEN

                        IF ( wgap( wbegin + j -1).GE.

     $                      minrgp* abs( work(wbegin + j -1) ) ) THEN

*                          the right relgap is big enough by the Lapack 3.1 criterion

*                          (NEWFST,..,NEWLST) is well separated from the following

                           newlst = j

                        ELSE

*                          inside a child cluster, the relative gap is not

*                          big enough.

                           GOTO 140

            ENDIF

                     ELSE IF (vrtree.EQ.use32a) THEN

                        IF( (j.EQ.oldfst).AND.( wgap(wbegin+j-1).GE.

     $                      minrgp* abs(work(wbegin+j-1)) ) ) THEN

*                          the right relgap is big enough by the Lapack 3.1 criterion

*                          Found a singleton

                           newlst = j

                        ELSE IF( (j.GT.oldfst).AND.(j.EQ.newfst).AND.

     $                           ( wgap(wbegin+j-2).GE.

     $                             minrgp* abs(work(wbegin+j-1)) ).AND.

     $                           ( wgap(wbegin+j-1).GE.

     $                             minrgp* abs(work(wbegin+j-1)) )

     $                     ) THEN

*                          Found a singleton

                           newlst = j

                        ELSE IF( (j.GT.newfst).AND.wgap(wbegin+j-1).GE.

     $                     (minrgp*abs(work(wbegin+j-1)) ) )

     $                     THEN

*                          the right relgap is big enough by the Lapack 3.1 criterion

                           newlst = j

                        ELSE IF((j.GT.newfst).AND.(j+1.LT.oldlst).AND.

     $                     (wgap(wbegin+j-1).GE.enufgp))

     $                     THEN

*                          the right gap is bigger than ENUFGP

*                          Care needs to be taken with this criterion to make

*                          sure it does not create a remaining `false' singleton

                           newlst = j

                        ELSE

*                          inside a child cluster, the relative gap is not

*                          big enough.

                           GOTO 140

            ENDIF

                     ELSE IF (vrtree.EQ.use32b) THEN

                        IF( (j.EQ.oldfst).AND.( wgap(wbegin+j-1).GE.

     $                      minrgp* abs(work(wbegin+j-1)) ) ) THEN

*                          the right relgap is big enough by the Lapack 3.1 criterion

*                          Found a singleton

                           newlst = j

                        ELSE IF( (j.GT.oldfst).AND.(j.EQ.newfst).AND.

     $                           ( wgap(wbegin+j-2).GE.

     $                             minrgp* abs(work(wbegin+j-1)) ).AND.

     $                           ( wgap(wbegin+j-1).GE.

     $                             minrgp* abs(work(wbegin+j-1)) )

     $                     ) THEN

*                          Found a singleton

                           newlst = j

                        ELSE IF( (j.GT.newfst).AND.wgap(wbegin+j-1).GE.

     $                     (minrgp*abs(work(wbegin+j-1)) ) )

     $                     THEN

*                          the right relgap is big enough by the Lapack 3.1 criterion

                           newlst = j

                        ELSE IF((j.GT.newfst).AND.(j+1.LT.oldlst).AND.

     $                     (wgap( wbegin + j -1).GE.

     $                     rltl30 * abs(work(wbegin + j -1)) ))

     $                     THEN

*                          the right relgap is big enough by the Lapack 3.0 criterion

*                          Care needs to be taken with this criterion to make

*                          sure it does not create a remaining `false' singleton

                           newlst = j

                        ELSE

*                          inside a child cluster, the relative gap is not

*                          big enough.

                           GOTO 140

            ENDIF

                     END IF

                  END IF


*                 Compute size of child cluster found

                  newsiz = newlst - newfst + 1

                  maxcls = max( newsiz, maxcls )


*                 NEWFTT is the place in Z where the new RRR or the computed

*                 eigenvector is to be stored

                  IF((dol.EQ.1).AND.(dou.EQ.m)) THEN

*                    Store representation at location of the leftmost evalue

*                    of the cluster

                     newftt = wbegin + newfst - 1

                  ELSE

                     IF(wbegin+newfst-1.LT.dol) THEN

*                       Store representation at the left end of Z array

                        newftt = dol - 1

                     ELSEIF(wbegin+newfst-1.GT.dou) THEN

*                       Store representation at the right end of Z array

                        newftt = dou

                     ELSE

                        newftt = wbegin + newfst - 1

                     ENDIF

                  ENDIF

*                 FOR 1D-DISTRIBUTED Z, COMPUTE NEWFTT SHIFTED BY ZOFFSET

                  newftt = newftt - zoffset


                  IF( newsiz.GT.1) THEN

*

*                    Current child is not a singleton but a cluster.

*

*

                     IF((wbegin+newlst-1.LT.dol).OR.

     $                  (wbegin+newfst-1.GT.dou)) THEN

*                       if the cluster contains no desired eigenvalues

*                       skip the computation of that branch of the rep. tree

                        GOTO 139

                     ENDIF


*                    Compute left and right cluster gap.

*

                     IF( newfst.EQ.1 ) THEN

                        lgap = max( zero,

     $                       w(wbegin)-werr(wbegin) - vl )

                     ELSE

                        lgap = wgap( wbegin+newfst-2 )

                     ENDIF

                     rgap = wgap( wbegin+newlst-1 )

*

*                    For larger clusters, record the largest gap observed

*                    somewhere near the middle of the cluster as a possible

*                    alternative position for a shift when TRYMID is TRUE

*

                     mgap = zero

                     IF(newsiz.GE.50) THEN

                        kk = newfst

                        DO 545 k =newfst+newsiz/3,newlst-newsiz/3

                   IF(mgap.LT.wgap( wbegin+k-1 )) THEN

                      kk = k

                      mgap = wgap( wbegin+k-1 )

                           ENDIF

 545                    CONTINUE

                     ENDIF


*

*                    Record the left- and right-most eigenvalues needed

*                    for the next level of the representation tree

                     needil = min(needil,wbegin+newfst-1)

                     neediu = max(neediu,wbegin+newlst-1)


*

*                    Check if middle gap is large enough to shift there

*

                     gap = min(lgap,rgap)

             trymid = (mgap.GT.gap)


             splace(1) = newfst

             splace(2) = newlst

             IF(trymid) THEN

                splace(3) = kk

                        splace(4) = kk+1

             ELSE

                splace(3) = newfst

                splace(4) = newlst

             ENDIF

*

*                    Compute left- and rightmost eigenvalue of child

*                    to high precision in order to shift as close

*                    as possible and obtain as large relative gaps

*                    as possible

*


                     DO 55 k =1,4

                        p = indexw( wbegin-1+splace(k) )

                        offset = indexw( wbegin ) - 1

                        CALL slarrb2( in, d(ibegin),

     $                       work( indlld+ibegin-1 ),p,p,

     $                       rqtol, rqtol, offset,

     $                       work(wbegin),wgap(wbegin),

     $                       werr(wbegin),work( indwrk ),

     $                       iwork( iindwk ),

     $                       pivmin, lgpvmn, lgspdm, in, iinfo )

 55                  CONTINUE

*

*                    Compute RRR of child cluster.

*                    Note that the new RRR is stored in Z

*

C                    SLARRF2 needs LWORK = 2*N

                     CALL slarrf2( in, d( ibegin ), l( ibegin ),

     $                         work(indld+ibegin-1),

     $                         splace(1), splace(2),

     $                         splace(3), splace(4), work(wbegin),

     $                         wgap(wbegin), werr(wbegin), trymid,

     $                         spdiam, lgap, rgap, pivmin, tau,

     $                         z( ibegin, newftt ),

     $                         z( ibegin, newftt+1 ),

     $                         work( indwrk ), iinfo )

                     IF( iinfo.EQ.0 ) THEN

*                       a new RRR for the cluster was found by SLARRF2

*                       update shift and store it

                        ssigma = sigma + tau

                        z( iend, newftt+1 ) = ssigma

*                       WORK() are the midpoints and WERR() the semi-width

*                       Note that the entries in W are unchanged.

                        DO 116 k = newfst, newlst

                           fudge =

     $                          three*eps*abs(work(wbegin+k-1))

                           work( wbegin + k - 1 ) =

     $                          work( wbegin + k - 1) - tau

                           fudge = fudge +

     $                          four*eps*abs(work(wbegin+k-1))

*                          Fudge errors

                           werr( wbegin + k - 1 ) =

     $                          werr( wbegin + k - 1 ) + fudge

 116                    CONTINUE


                        nclus = nclus + 1

                        k = newcls + 2*nclus

                        iwork( k-1 ) = newfst

                        iwork( k ) = newlst

*

                        IF(.NOT.delref) THEN

                           onlylc = .true.

*

                           IF(onlylc) THEN

                              mywfst = max(wbegin-1+newfst,dol-1)

                              mywlst = min(wbegin-1+newlst,dou+1)

                           ELSE

                              mywfst = wbegin-1+newfst

                              mywlst = wbegin-1+newlst

                           ENDIF


*                          Compute LLD of new RRR

                           DO 5000 k = ibegin, iend-1

                              work( indwrk-1+k ) =

     $                        z(k,newftt)*

     $                       (z(k,newftt+1)**2)

 5000                      CONTINUE

*                          P and Q are index of the first and last

*                          eigenvalue to compute within the new cluster

                           p = indexw( mywfst )

                           q = indexw( mywlst )

*                          Offset for the arrays WORK, WGAP and WERR

                           offset = indexw( wbegin ) - 1

*                          perform limited bisection (if necessary) to get approximate

*                          eigenvalues to the precision needed.

                           CALL slarrb2( in,

     $                         z(ibegin, newftt ),

     $                         work(indwrk+ibegin-1),

     $                         p, q, rtol1, rtol2, offset,

     $                         work(wbegin),wgap(wbegin),werr(wbegin),

     $                         work( indwrk+n ), iwork( iindwk ),

     $                         pivmin, lgpvmn, lgspdm, in, iinfo )

                           IF( iinfo.NE.0 ) THEN

                              info = -1

                              RETURN

                           ENDIF

*                          Each time the eigenvalues in WORK get refined, we store

*                          the newly found approximation with all shifts applied in W

                           DO 5003 k=newfst,newlst

                              w(wbegin+k-1) = work(wbegin+k-1)+ssigma

 5003                      CONTINUE

                        ENDIF

*

                     ELSE

                        info = -2

                        RETURN

                     ENDIF

              ELSE

*

*                    Compute eigenvector of singleton

*

                     iter = 0

*

                     tol = four * log_in * eps

*

                     k = newfst

                     windex = wbegin + k - 1

                     zindex = windex - zoffset

                     windmn = max(windex - 1,1)

                     windpl = min(windex + 1,m)

                     lambda = work( windex )

*                    Check if eigenvector computation is to be skipped

                     IF((windex.LT.dol).OR.

     $                  (windex.GT.dou)) THEN

                        eskip = .true.

                        GOTO 125

                     ELSE

                        eskip = .false.

                     ENDIF

                     left = work( windex ) - werr( windex )

                     right = work( windex ) + werr( windex )

                     indeig = indexw( windex )

                     IF( k .EQ. 1) THEN

                        lgap = eps*max(abs(left),abs(right))

                     ELSE

                        lgap = wgap(windmn)

                     ENDIF

                     IF( k .EQ. im) THEN

                        rgap = eps*max(abs(left),abs(right))

                     ELSE

                        rgap = wgap(windex)

                     ENDIF

                     gap = min( lgap, rgap )

                     IF(( k .EQ. 1).OR.(k .EQ. im)) THEN

                        gaptol = zero

                     ELSE

                        gaptol = gap * eps

                     ENDIF

                     isupmn = in

                     isupmx = 1

*                    Update WGAP so that it holds the minimum gap

*                    to the left or the right. This is crucial in the

*                    case where bisection is used to ensure that the

*                    eigenvalue is refined up to the required precision.

*                    The correct value is restored afterwards.

                     savegp = wgap(windex)

                     wgap(windex) = gap

*                    We want to use the Rayleigh Quotient Correction

*                    as often as possible since it converges quadratically

*                    when we are close enough to the desired eigenvalue.

*                    However, the Rayleigh Quotient can have the wrong sign

*                    and lead us away from the desired eigenvalue. In this

*                    case, the best we can do is to use bisection.

                     usedbs = .false.

                     usedrq = .false.

*                    Bisection is initially turned off unless it is forced

                     needbs =  .NOT.tryrqc

*                    Reset ITWIST

                     itwist = 0

 120                 CONTINUE

*                    Check if bisection should be used to refine eigenvalue

                     IF(needbs) THEN

*                       Take the bisection as new iterate

                        usedbs = .true.

*                       Temporary copy of twist index needed

                        itmp1 = itwist

                        offset = indexw( wbegin ) - 1

                        CALL slarrb2( in, d(ibegin),

     $                       work(indlld+ibegin-1),indeig,indeig,

     $                       zero, two*eps, offset,

     $                       work(wbegin),wgap(wbegin),

     $                       werr(wbegin),work( indwrk ),

     $                       iwork( iindwk ),

     $                       pivmin, lgpvmn, lgspdm, itmp1, iinfo )

                        IF( iinfo.NE.0 ) THEN

                           info = -3

                           RETURN

                        ENDIF

                        lambda = work( windex )

*                       Reset twist index from inaccurate LAMBDA to

*                       force computation of true MINGMA

                        itwist = 0

                     ENDIF

*                    Given LAMBDA, compute the eigenvector.

                     CALL slar1va( in, 1, in, lambda, d(ibegin),

     $                    l( ibegin ), work(indld+ibegin-1),

     $                    work(indlld+ibegin-1),

     $                    pivmin, gaptol, z( ibegin, zindex),

     $                    .NOT.usedbs, negcnt, ztz, mingma,

     $                    itwist, isuppz( 2*windex-1 ),

     $                    nrminv, resid, rqcorr, work( indwrk ) )

                     IF(iter .EQ. 0) THEN

                        bstres = resid

                        bstw = lambda

                     ELSEIF(resid.LT.bstres) THEN

                        bstres = resid

                        bstw = lambda

                     ENDIF

                     isupmn = min(isupmn,isuppz( 2*windex-1 ))

                     isupmx = max(isupmx,isuppz( 2*windex ))

                     iter = iter + 1

*

*                    Convergence test for Rayleigh-Quotient iteration

*                    (omitted when Bisection has been used)

*

                     IF( resid.GT.tol*gap .AND. abs( rqcorr ).GT.

     $                    rqtol*abs( lambda ) .AND. .NOT. usedbs)

     $                    THEN

*                       We need to check that the RQCORR update doesn't

*                       move the eigenvalue away from the desired one and

*                       towards a neighbor. -> protection with bisection

                        IF(indeig.LE.negcnt) THEN

*                          The wanted eigenvalue lies to the left

                           sgndef = -one

                        ELSE

*                          The wanted eigenvalue lies to the right

                           sgndef = one

                        ENDIF

*                       We only use the RQCORR if it improves the

*                       the iterate reasonably.

                        IF( ( rqcorr*sgndef.GE.zero )

     $                       .AND.( lambda + rqcorr.LE. right)

     $                       .AND.( lambda + rqcorr.GE. left)

     $                       ) THEN

                           usedrq = .true.

*                          Store new midpoint of bisection interval in WORK

                           IF(sgndef.EQ.one) THEN

*                             The current LAMBDA is on the left of the true

*                             eigenvalue

                              left = lambda

                           ELSE

*                             The current LAMBDA is on the right of the true

*                             eigenvalue

                              right = lambda

                           ENDIF

                           work( windex ) =

     $                       half * (right + left)

*                          Take RQCORR since it has the correct sign and

*                          improves the iterate reasonably

                           lambda = lambda + rqcorr

*                          Update width of error interval

                           werr( windex ) =

     $                             half * (right-left)

                        ELSE

                           needbs = .true.

                        ENDIF

                        IF(right-left.LT.rqtol*abs(lambda)) THEN

*                             The eigenvalue is computed to bisection accuracy

*                             compute eigenvector and stop

                           usedbs = .true.

                           GOTO 120

                        ELSEIF( iter.LT.maxitr ) THEN

                           GOTO 120

                        ELSEIF( iter.EQ.maxitr ) THEN

                           needbs = .true.

                           GOTO 120

                        ELSE

                           info = 5

                           RETURN

                        END IF

                     ELSE

                        stp2ii = .false.

                        IF(usedrq .AND. usedbs .AND.

     $                     bstres.LE.resid) THEN

                           lambda = bstw

                           stp2ii = .true.

                        ENDIF

                        IF (stp2ii) THEN

                           CALL slar1va( in, 1, in, lambda,

     $                          d( ibegin ), l( ibegin ),

     $                          work(indld+ibegin-1),

     $                          work(indlld+ibegin-1),

     $                          pivmin, gaptol,

     $                          z( ibegin, zindex ),

     $                          .NOT.usedbs, negcnt, ztz, mingma,

     $                          itwist,

     $                          isuppz( 2*windex-1 ),

     $                          nrminv, resid, rqcorr, work( indwrk ) )

                        ENDIF

                        work( windex ) = lambda

                     END IF

*

*                    Compute FP-vector support w.r.t. whole matrix

*

                     isuppz( 2*windex-1 ) = isuppz( 2*windex-1 )+oldien

                     isuppz( 2*windex ) = isuppz( 2*windex )+oldien

                     zfrom = isuppz( 2*windex-1 )

                     zto = isuppz( 2*windex )

                     isupmn = isupmn + oldien

                     isupmx = isupmx + oldien

*                    Ensure vector is ok if support in the RQI has changed

                     IF(isupmn.LT.zfrom) THEN

                        DO 122 ii = isupmn,zfrom-1

                           z( ii, zindex ) = zero

 122                    CONTINUE

                     ENDIF

                     IF(isupmx.GT.zto) THEN

                        DO 123 ii = zto+1,isupmx

                           z( ii, zindex ) = zero

 123                    CONTINUE

                     ENDIF

                     CALL sscal( zto-zfrom+1, nrminv,

     $                       z( zfrom, zindex ), 1 )

 125                 CONTINUE

*                    Update W

                     w( windex ) = lambda+sigma

*                    Recompute the gaps on the left and right

*                    But only allow them to become larger and not

*                    smaller (which can only happen through "bad"

*                    cancellation and doesn't reflect the theory

*                    where the initial gaps are underestimated due

*                    to WERR being too crude.)

                     IF(.NOT.eskip) THEN

                        IF( k.GT.1) THEN

                           wgap( windmn ) = max( wgap(windmn),

     $                          w(windex)-werr(windex)

     $                          - w(windmn)-werr(windmn) )

                        ENDIF

                        IF( windex.LT.wend ) THEN

                           wgap( windex ) = max( savegp,

     $                          w( windpl )-werr( windpl )

     $                          - w( windex )-werr( windex) )

                        ENDIF

                     ENDIF

                  ENDIF

*                 here ends the code for the current child

*

 139              CONTINUE

*                 Proceed to any remaining child nodes

                  newfst = j + 1

 140           CONTINUE

 150        CONTINUE

*           Store number of clusters

            iwork( iincls + jblk ) = nclus

*

         END IF

         ibegin = iend + 1

         wbegin = wend + 1

 170  CONTINUE

*

*     Check if everything is done: no clusters left for

*     this processor in any block

*

      finish = .true.

      DO 180 jblk = 1, iblock( m )

         finish = finish .AND. (iwork(iincls + jblk).EQ.0)

 180  CONTINUE


      IF(.NOT.finish) THEN

         ndepth = ndepth + 1

         IF((needil.GE.dol).AND.(neediu.LE.dou)) THEN

*           Once this processor's part of the

*           representation tree consists exclusively of eigenvalues

*           between DOL and DOU, it can work independently from all

*           others.

            GOTO 40

         ENDIF

      ENDIF

*


      RETURN

*

*     End of SLARRV2

*

      END