ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
slarrv2.f
Go to the documentation of this file.
1  SUBROUTINE slarrv2( N, VL, VU, D, L, PIVMIN,
2  $ ISPLIT, M, DOL, DOU, NEEDIL, NEEDIU,
3  $ MINRGP, RTOL1, RTOL2, W, WERR, WGAP,
4  $ IBLOCK, INDEXW, GERS, SDIAM,
5  $ Z, LDZ, ISUPPZ,
6  $ WORK, IWORK, VSTART, FINISH,
7  $ MAXCLS, NDEPTH, PARITY, ZOFFSET, INFO )
8 
9 * -- ScaLAPACK auxiliary routine (version 2.0) --
10 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
11 * July 4, 2010
12 *
13  IMPLICIT NONE
14 *
15 * .. Scalar Arguments ..
16  INTEGER DOL, DOU, INFO, LDZ, M, N, MAXCLS,
17  $ NDEPTH, NEEDIL, NEEDIU, PARITY, ZOFFSET
18  REAL MINRGP, PIVMIN, RTOL1, RTOL2, VL, VU
19  LOGICAL VSTART, FINISH
20 * ..
21 * .. Array Arguments ..
22  INTEGER IBLOCK( * ), INDEXW( * ), ISPLIT( * ),
23  $ ISUPPZ( * ), IWORK( * )
24  REAL D( * ), GERS( * ), L( * ), SDIAM( * ),
25  $ w( * ), werr( * ),
26  $ wgap( * ), work( * )
27  REAL Z( LDZ, * )
28 *
29 * Purpose
30 * =======
31 *
32 * SLARRV2 computes the eigenvectors of the tridiagonal matrix
33 * T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T.
34 * The input eigenvalues should have been computed by SLARRE2A
35 * or by precious calls to SLARRV2.
36 *
37 * The major difference between the parallel and the sequential construction
38 * of the representation tree is that in the parallel case, not all eigenvalues
39 * of a given cluster might be computed locally. Other processors might "own"
40 * and refine part of an eigenvalue cluster. This is crucial for scalability.
41 * Thus there might be communication necessary before the current level of the
42 * representation tree can be parsed.
43 *
44 * Please note:
45 * 1. The calling sequence has two additional INTEGER parameters,
46 * DOL and DOU, that should satisfy M>=DOU>=DOL>=1.
47 * These parameters are only relevant for the case JOBZ = 'V'.
48 * SLARRV2 ONLY computes the eigenVECTORS
49 * corresponding to eigenvalues DOL through DOU in W. (That is,
50 * instead of computing the eigenvectors belonging to W(1)
51 * through W(M), only the eigenvectors belonging to eigenvalues
52 * W(DOL) through W(DOU) are computed. In this case, only the
53 * eigenvalues DOL:DOU are guaranteed to be accurately refined
54 * to all figures by Rayleigh-Quotient iteration.
55 *
56 * 2. The additional arguments VSTART, FINISH, NDEPTH, PARITY, ZOFFSET
57 * are included as a thread-safe implementation equivalent to SAVE variables.
58 * These variables store details about the local representation tree which is
59 * computed layerwise. For scalability reasons, eigenvalues belonging to the
60 * locally relevant representation tree might be computed on other processors.
61 * These need to be communicated before the inspection of the RRRs can proceed
62 * on any given layer.
63 * Note that only when the variable FINISH is true, the computation has ended
64 * All eigenpairs between DOL and DOU have been computed. M is set = DOU - DOL + 1.
65 *
66 * 3. SLARRV2 needs more workspace in Z than the sequential SLARRV.
67 * It is used to store the conformal embedding of the local representation tree.
68 *
69 * Arguments
70 * =========
71 *
72 * N (input) INTEGER
73 * The order of the matrix. N >= 0.
74 *
75 * VL (input) REAL
76 * VU (input) REAL
77 * Lower and upper bounds of the interval that contains the desired
78 * eigenvalues. VL < VU. Needed to compute gaps on the left or right
79 * end of the extremal eigenvalues in the desired RANGE.
80 * VU is currently not used but kept as parameter in case needed.
81 *
82 * D (input/output) REAL array, dimension (N)
83 * On entry, the N diagonal elements of the diagonal matrix D.
84 * On exit, D is overwritten.
85 *
86 * L (input/output) REAL array, dimension (N)
87 * On entry, the (N-1) subdiagonal elements of the unit
88 * bidiagonal matrix L are in elements 1 to N-1 of L
89 * (if the matrix is not splitted.) At the end of each block
90 * is stored the corresponding shift as given by SLARRE.
91 * On exit, L is overwritten.
92 *
93 * PIVMIN (in) DOUBLE PRECISION
94 * The minimum pivot allowed in the sturm sequence.
95 *
96 * ISPLIT (input) INTEGER array, dimension (N)
97 * The splitting points, at which T breaks up into blocks.
98 * The first block consists of rows/columns 1 to
99 * ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
100 * through ISPLIT( 2 ), etc.
101 *
102 * M (input) INTEGER
103 * The total number of input eigenvalues. 0 <= M <= N.
104 *
105 * DOL (input) INTEGER
106 * DOU (input) INTEGER
107 * If the user wants to compute only selected eigenvectors from all
108 * the eigenvalues supplied, he can specify an index range DOL:DOU.
109 * Or else the setting DOL=1, DOU=M should be applied.
110 * Note that DOL and DOU refer to the order in which the eigenvalues
111 * are stored in W.
112 * If the user wants to compute only selected eigenpairs, then
113 * the columns DOL-1 to DOU+1 of the eigenvector space Z contain the
114 * computed eigenvectors. All other columns of Z are set to zero.
115 * If DOL > 1, then Z(:,DOL-1-ZOFFSET) is used.
116 * If DOU < M, then Z(:,DOU+1-ZOFFSET) is used.
117 *
118 *
119 * NEEDIL (input/output) INTEGER
120 * NEEDIU (input/output) INTEGER
121 * Describe which are the left and right outermost eigenvalues
122 * that still need to be included in the computation. These indices
123 * indicate whether eigenvalues from other processors are needed to
124 * correctly compute the conformally embedded representation tree.
125 * When DOL<=NEEDIL<=NEEDIU<=DOU, all required eigenvalues are local
126 * to the processor and no communication is required to compute its
127 * part of the representation tree.
128 *
129 * MINRGP (input) REAL
130 * The minimum relativ gap threshold to decide whether an eigenvalue
131 * or a cluster boundary is reached.
132 *
133 * RTOL1 (input) REAL
134 * RTOL2 (input) REAL
135 * Parameters for bisection.
136 * An interval [LEFT,RIGHT] has converged if
137 * RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) )
138 *
139 * W (input/output) REAL array, dimension (N)
140 * The first M elements of W contain the APPROXIMATE eigenvalues for
141 * which eigenvectors are to be computed. The eigenvalues
142 * should be grouped by split-off block and ordered from
143 * smallest to largest within the block. (The output array
144 * W from SSTEGR2A is expected here.) Furthermore, they are with
145 * respect to the shift of the corresponding root representation
146 * for their block. On exit,
147 * W holds those UNshifted eigenvalues
148 * for which eigenvectors have already been computed.
149 *
150 * WERR (input/output) REAL array, dimension (N)
151 * The first M elements contain the semiwidth of the uncertainty
152 * interval of the corresponding eigenvalue in W
153 *
154 * WGAP (input/output) REAL array, dimension (N)
155 * The separation from the right neighbor eigenvalue in W.
156 *
157 * IBLOCK (input) INTEGER array, dimension (N)
158 * The indices of the blocks (submatrices) associated with the
159 * corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue
160 * W(i) belongs to the first block from the top, =2 if W(i)
161 * belongs to the second block, etc.
162 *
163 * INDEXW (input) INTEGER array, dimension (N)
164 * The indices of the eigenvalues within each block (submatrix);
165 * for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the
166 * i-th eigenvalue W(i) is the 10-th eigenvalue in the second block.
167 *
168 * GERS (input) REAL array, dimension (2*N)
169 * The N Gerschgorin intervals (the i-th Gerschgorin interval
170 * is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should
171 * be computed from the original UNshifted matrix.
172 * Currently NOT used but kept as parameter in case it becomes
173 * needed in the future.
174 *
175 * SDIAM (input) REAL array, dimension (N)
176 * The spectral diameters for all unreduced blocks.
177 *
178 * Z (output) REAL array, dimension (LDZ, max(1,M) )
179 * If INFO = 0, the first M columns of Z contain the
180 * orthonormal eigenvectors of the matrix T
181 * corresponding to the input eigenvalues, with the i-th
182 * column of Z holding the eigenvector associated with W(i).
183 * In the distributed version, only a subset of columns
184 * is accessed, see DOL,DOU and ZOFFSET.
185 *
186 * LDZ (input) INTEGER
187 * The leading dimension of the array Z. LDZ >= 1, and if
188 * JOBZ = 'V', LDZ >= max(1,N).
189 *
190 * ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) )
191 * The support of the eigenvectors in Z, i.e., the indices
192 * indicating the nonzero elements in Z. The I-th eigenvector
193 * is nonzero only in elements ISUPPZ( 2*I-1 ) through
194 * ISUPPZ( 2*I ).
195 *
196 * WORK (workspace) REAL array, dimension (12*N)
197 *
198 * IWORK (workspace) INTEGER array, dimension (7*N)
199 *
200 * VSTART (input/output) LOGICAL
201 * .TRUE. on initialization, set to .FALSE. afterwards.
202 *
203 * FINISH (input/output) LOGICAL
204 * A flag that indicates whether all eigenpairs have been computed.
205 *
206 * MAXCLS (input/output) INTEGER
207 * The largest cluster worked on by this processor in the
208 * representation tree.
209 *
210 * NDEPTH (input/output) INTEGER
211 * The current depth of the representation tree. Set to
212 * zero on initial pass, changed when the deeper levels of
213 * the representation tree are generated.
214 *
215 * PARITY (input/output) INTEGER
216 * An internal parameter needed for the storage of the
217 * clusters on the current level of the representation tree.
218 *
219 * ZOFFSET (input) INTEGER
220 * Offset for storing the eigenpairs when Z is distributed
221 * in 1D-cyclic fashion.
222 *
223 * INFO (output) INTEGER
224 * = 0: successful exit
225 *
226 * > 0: A problem occured in SLARRV2.
227 * < 0: One of the called subroutines signaled an internal problem.
228 * Needs inspection of the corresponding parameter IINFO
229 * for further information.
230 *
231 * =-1: Problem in SLARRB2 when refining a child's eigenvalues.
232 * =-2: Problem in SLARRF2 when computing the RRR of a child.
233 * When a child is inside a tight cluster, it can be difficult
234 * to find an RRR. A partial remedy from the user's point of
235 * view is to make the parameter MINRGP smaller and recompile.
236 * However, as the orthogonality of the computed vectors is
237 * proportional to 1/MINRGP, the user should be aware that
238 * he might be trading in precision when he decreases MINRGP.
239 * =-3: Problem in SLARRB2 when refining a single eigenvalue
240 * after the Rayleigh correction was rejected.
241 * = 5: The Rayleigh Quotient Iteration failed to converge to
242 * full accuracy in MAXITR steps.
243 *
244 * =====================================================================
245 *
246 * .. Parameters ..
247  INTEGER MAXITR, USE30, USE31, USE32A, USE32B
248  PARAMETER ( MAXITR = 10, use30=30, use31=31,
249  $ use32a=3210, use32b = 3211 )
250  REAL ZERO, ONE, TWO, THREE, FOUR, HALF
251  PARAMETER ( ZERO = 0.0e0, one = 1.0e0,
252  $ two = 2.0e0, three = 3.0e0,
253  $ four = 4.0e0, half = 0.5e0)
254 * ..
255 * .. Local Arrays ..
256  INTEGER SPLACE( 4 )
257 * ..
258 * .. Local Scalars ..
259  LOGICAL DELREF, ESKIP, NEEDBS, ONLYLC, STP2II, TRYMID,
260  $ TRYRQC, USEDBS, USEDRQ
261  INTEGER I, IBEGIN, IEND, II, IINCLS, IINDC1, IINDC2,
262  $ iindwk, iinfo, im, in, indeig, indld, indlld,
263  $ indwrk, isupmn, isupmx, iter, itmp1, itwist, j,
264  $ jblk, k, kk, miniwsize, minwsize, mywfst,
265  $ mywlst, nclus, negcnt, newcls, newfst, newftt,
266  $ newlst, newsiz, offset, oldcls, oldfst, oldien,
267  $ oldlst, oldncl, p, q, vrtree, wbegin, wend,
268  $ windex, windmn, windpl, zfrom, zindex, zto,
269  $ zusedl, zusedu, zusedw
270  REAL AVGAP, BSTRES, BSTW, ENUFGP, EPS, FUDGE, GAP,
271  $ GAPTOL, LAMBDA, LEFT, LGAP, LGPVMN, LGSPDM,
272  $ LOG_IN, MGAP, MINGMA, MYERR, NRMINV, NXTERR,
273  $ ORTOL, RESID, RGAP, RIGHT, RLTL30, RQCORR,
274  $ RQTOL, SAVEGP, SGNDEF, SIGMA, SPDIAM, SSIGMA,
275  $ TAU, TMP, TOL, ZTZ
276 * ..
277 * .. External Functions ..
278  REAL SLAMCH
279  REAL SDOT, SNRM2
280  EXTERNAL SDOT, SLAMCH, SNRM2
281 * ..
282 * .. External Subroutines ..
283  EXTERNAL SAXPY, SCOPY, SLAR1VA, SLARRB2,
284  $ slarrf2, slaset, sscal
285 * ..
286 * .. Intrinsic Functions ..
287  INTRINSIC abs, real, max, min, sqrt
288 * ..
289 * .. Executable Statements ..
290 * ..
291 
292 
293  info = 0
294 * The first N entries of WORK are reserved for the eigenvalues
295  indld = n+1
296  indlld= 2*n+1
297  indwrk= 3*n+1
298  minwsize = 12 * n
299 
300 * IWORK(IINCLS+JBLK) holds the number of clusters on the current level
301 * of the reptree for block JBLK
302  iincls = 0
303 * IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current
304 * layer and the one above.
305  iindc1 = n
306  iindc2 = 2*n
307  iindwk = 3*n + 1
308  miniwsize = 7 * n
309 
310  eps = slamch( 'Precision' )
311  rqtol = two * eps
312 
313  tryrqc = .true.
314 * Decide which representation tree criterion to use
315 * USE30 = Lapack 3.0 criterion
316 * USE31 = LAPACK 3.1 criterion
317 * USE32A = two criteria, determines singletons with USE31, and groups with avgap.
318 * USE32B = two criteria, determines singletons with USE31, and groups with USE30.
319  vrtree = use32a
320 *
321  lgpvmn = log( pivmin )
322 
323 
324  IF(vstart) THEN
325 *
326 * PREPROCESSING, DONE ONLY IN THE FIRST CALL
327 *
328  vstart = .false.
329 *
330  maxcls = 1
331 
332 * Set delayed eigenvalue refinement
333 * In order to enable more parallelism, refinement
334 * must be done immediately and cannot be delayed until
335 * the next representation tree level.
336  delref = .false.
337 
338  DO 1 i= 1,minwsize
339  work( i ) = zero
340  1 CONTINUE
341 
342  DO 2 i= 1,miniwsize
343  iwork( i ) = 0
344  2 CONTINUE
345 
346  zusedl = 1
347  IF(dol.GT.1) THEN
348 * Set lower bound for use of Z
349  zusedl = dol-1
350  ENDIF
351  zusedu = m
352  IF(dou.LT.m) THEN
353 * Set lower bound for use of Z
354  zusedu = dou+1
355  ENDIF
356 * The width of the part of Z that is used
357  zusedw = zusedu - zusedl + 1
358 *
359  CALL slaset( 'Full', n, zusedw, zero, zero,
360  $ z(1,(zusedl-zoffset)), ldz )
361 
362 * Initialize NDEPTH, the current depth of the representation tree
363  ndepth = 0
364 * Initialize parity
365  parity = 1
366 
367 * Go through blocks, initialize data structures
368  ibegin = 1
369  wbegin = 1
370  DO 10 jblk = 1, iblock( m )
371  iend = isplit( jblk )
372  sigma = l( iend )
373  wend = wbegin - 1
374  3 CONTINUE
375  IF( wend.LT.m ) THEN
376  IF( iblock( wend+1 ).EQ.jblk ) THEN
377  wend = wend + 1
378  GO TO 3
379  END IF
380  END IF
381  IF( wend.LT.wbegin ) THEN
382  iwork( iincls + jblk ) = 0
383  ibegin = iend + 1
384  GO TO 10
385  ELSEIF( (wend.LT.dol).OR.(wbegin.GT.dou) ) THEN
386  iwork( iincls + jblk ) = 0
387  ibegin = iend + 1
388  wbegin = wend + 1
389  GO TO 10
390  END IF
391 * The number of eigenvalues in the current block
392  im = wend - wbegin + 1
393 * This is for a 1x1 block
394  IF( ibegin.EQ.iend ) THEN
395  iwork( iincls + jblk ) = 0
396  z( ibegin, (wbegin-zoffset) ) = one
397  isuppz( 2*wbegin-1 ) = ibegin
398  isuppz( 2*wbegin ) = ibegin
399  w( wbegin ) = w( wbegin ) + sigma
400  work( wbegin ) = w( wbegin )
401  ibegin = iend + 1
402  wbegin = wbegin + 1
403  GO TO 10
404  END IF
405  CALL scopy( im, w( wbegin ), 1,
406  & work( wbegin ), 1 )
407 * We store in W the eigenvalue approximations w.r.t. the original
408 * matrix T.
409  DO 5 i=1,im
410  w(wbegin+i-1) = w(wbegin+i-1)+sigma
411  5 CONTINUE
412 
413 * Initialize cluster counter for this block
414  iwork( iincls + jblk ) = 1
415  iwork( iindc1+ibegin ) = 1
416  iwork( iindc1+ibegin+1 ) = im
417 *
418  ibegin = iend + 1
419  wbegin = wend + 1
420 10 CONTINUE
421 *
422  ENDIF
423 
424 * Init NEEDIL and NEEDIU
425  needil = dou
426  neediu = dol
427 
428 * Here starts the main loop
429 * Only one pass through the loop is done until no collaboration
430 * with other processors is needed.
431  40 CONTINUE
432 
433  parity = 1 - parity
434 
435 * For each block, build next level of representation tree
436 * if there are still remaining clusters
437  ibegin = 1
438  wbegin = 1
439  DO 170 jblk = 1, iblock( m )
440  iend = isplit( jblk )
441  sigma = l( iend )
442 * Find the eigenvectors of the submatrix indexed IBEGIN
443 * through IEND.
444  IF(m.EQ.n) THEN
445 * all eigenpairs are computed
446  wend = iend
447  ELSE
448 * count how many wanted eigenpairs are in this block
449  wend = wbegin - 1
450  15 CONTINUE
451  IF( wend.LT.m ) THEN
452  IF( iblock( wend+1 ).EQ.jblk ) THEN
453  wend = wend + 1
454  GO TO 15
455  END IF
456  END IF
457  ENDIF
458 
459  oldncl = iwork( iincls + jblk )
460  IF( oldncl.EQ.0 ) THEN
461  ibegin = iend + 1
462  wbegin = wend + 1
463  GO TO 170
464  END IF
465 * OLDIEN is the last index of the previous block
466  oldien = ibegin - 1
467 * Calculate the size of the current block
468  in = iend - ibegin + 1
469 * The number of eigenvalues in the current block
470  im = wend - wbegin + 1
471 
472 * Find local spectral diameter of the block
473  spdiam = sdiam(jblk)
474  lgspdm = log( spdiam + pivmin )
475 * Compute ORTOL parameter, similar to SSTEIN
476  ortol = spdiam*1.0e-3
477 * Compute average gap
478  avgap = spdiam/real(in-1)
479 * Compute the minimum of average gap and ORTOL parameter
480 * This can used as a lower bound for acceptable separation
481 * between eigenvalues
482  enufgp = min(ortol,avgap)
483 
484 * Any 1x1 block has been treated before
485 
486 * loop while( OLDNCLS.GT.0 )
487 * generate the next representation tree level for the current block
488  IF( oldncl.GT.0 ) THEN
489 * This is a crude protection against infinitely deep trees
490  IF( ndepth.GT.m ) THEN
491  info = -2
492  RETURN
493  ENDIF
494 * breadth first processing of the current level of the representation
495 * tree: OLDNCL = number of clusters on current level
496 * NCLUS is the number of clusters for the next level of the reptree
497 * reset NCLUS to count the number of child clusters
498  nclus = 0
499 *
500  log_in = log(real(in))
501 *
502  rltl30 = min( 1.0e-2, one / real( in ) )
503 *
504  IF( parity.EQ.0 ) THEN
505  oldcls = iindc1+ibegin-1
506  newcls = iindc2+ibegin-1
507  ELSE
508  oldcls = iindc2+ibegin-1
509  newcls = iindc1+ibegin-1
510  END IF
511 * Process the clusters on the current level
512  DO 150 i = 1, oldncl
513  j = oldcls + 2*i
514 * OLDFST, OLDLST = first, last index of current cluster.
515 * cluster indices start with 1 and are relative
516 * to WBEGIN when accessing W, WGAP, WERR, Z
517  oldfst = iwork( j-1 )
518  oldlst = iwork( j )
519  IF( ndepth.GT.0 ) THEN
520 * Retrieve relatively robust representation (RRR) of cluster
521 * that has been computed at the previous level
522 * The RRR is stored in Z and overwritten once the eigenvectors
523 * have been computed or when the cluster is refined
524 
525  IF((dol.EQ.1).AND.(dou.EQ.m)) THEN
526 * Get representation from location of the leftmost evalue
527 * of the cluster
528  j = wbegin + oldfst - 1
529  ELSE
530  IF(wbegin+oldfst-1.LT.dol) THEN
531 * Get representation from the left end of Z array
532  j = dol - 1
533  ELSEIF(wbegin+oldfst-1.GT.dou) THEN
534 * Get representation from the right end of Z array
535  j = dou
536  ELSE
537  j = wbegin + oldfst - 1
538  ENDIF
539  ENDIF
540  CALL scopy( in, z( ibegin, (j-zoffset) ),
541  $ 1, d( ibegin ), 1 )
542  CALL scopy( in-1, z( ibegin, (j+1-zoffset) ),
543  $ 1, l( ibegin ),1 )
544  sigma = z( iend, (j+1-zoffset) )
545 * Set the corresponding entries in Z to zero
546  CALL slaset( 'Full', in, 2, zero, zero,
547  $ z( ibegin, (j-zoffset) ), ldz )
548  END IF
549 
550 * Compute DL and DLL of current RRR
551  DO 50 j = ibegin, iend-1
552  tmp = d( j )*l( j )
553  work( indld-1+j ) = tmp
554  work( indlld-1+j ) = tmp*l( j )
555  50 CONTINUE
556 
557  IF( ndepth.GT.0 .AND. delref ) THEN
558 * P and Q are index of the first and last eigenvalue to compute
559 * within the current block
560  p = indexw( wbegin-1+oldfst )
561  q = indexw( wbegin-1+oldlst )
562 * Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET
563 * thru' Q-OFFSET elements of these arrays are to be used.
564 C OFFSET = P-OLDFST
565  offset = indexw( wbegin ) - 1
566 * perform limited bisection (if necessary) to get approximate
567 * eigenvalues to the precision needed.
568  CALL slarrb2( in, d( ibegin ),
569  $ work(indlld+ibegin-1),
570  $ p, q, rtol1, rtol2, offset,
571  $ work(wbegin),wgap(wbegin),werr(wbegin),
572  $ work( indwrk ), iwork( iindwk ),
573  $ pivmin, lgpvmn, lgspdm, in, iinfo )
574  IF( iinfo.NE.0 ) THEN
575  info = -1
576  RETURN
577  ENDIF
578 * We also recompute the extremal gaps. W holds all eigenvalues
579 * of the unshifted matrix and must be used for computation
580 * of WGAP, the entries of WORK might stem from RRRs with
581 * different shifts. The gaps from WBEGIN-1+OLDFST to
582 * WBEGIN-1+OLDLST are correctly computed in SLARRB2.
583 * However, we only allow the gaps to become greater since
584 * this is what should happen when we decrease WERR
585  IF( oldfst.GT.1) THEN
586  wgap( wbegin+oldfst-2 ) =
587  $ max(wgap(wbegin+oldfst-2),
588  $ w(wbegin+oldfst-1)-werr(wbegin+oldfst-1)
589  $ - w(wbegin+oldfst-2)-werr(wbegin+oldfst-2) )
590  ENDIF
591  IF( wbegin + oldlst -1 .LT. wend ) THEN
592  wgap( wbegin+oldlst-1 ) =
593  $ max(wgap(wbegin+oldlst-1),
594  $ w(wbegin+oldlst)-werr(wbegin+oldlst)
595  $ - w(wbegin+oldlst-1)-werr(wbegin+oldlst-1) )
596  ENDIF
597 * Each time the eigenvalues in WORK get refined, we store
598 * the newly found approximation with all shifts applied in W
599  DO 53 j=oldfst,oldlst
600  w(wbegin+j-1) = work(wbegin+j-1)+sigma
601  53 CONTINUE
602  ELSEIF( (ndepth.EQ.0) .OR. (.NOT.delref) ) THEN
603 * Some of the eigenvalues might have been computed on
604 * other processors
605 * Recompute gaps for this cluster
606 * (all eigenvalues have the same
607 * representation, i.e. the same shift, so this is easy)
608  DO 54 j = oldfst, oldlst-1
609  myerr = werr(wbegin + j - 1)
610  nxterr = werr(wbegin + j )
611  wgap(wbegin+j-1) = max(wgap(wbegin+j-1),
612  $ ( work(wbegin+j) - nxterr )
613  $ - ( work(wbegin+j-1) + myerr )
614  $ )
615  54 CONTINUE
616  END IF
617 *
618 * Process the current node.
619 *
620  newfst = oldfst
621  DO 140 j = oldfst, oldlst
622  IF( j.EQ.oldlst ) THEN
623 * we are at the right end of the cluster, this is also the
624 * boundary of the child cluster
625  newlst = j
626  ELSE
627  IF (vrtree.EQ.use30) THEN
628  IF(wgap( wbegin + j -1).GE.
629  $ rltl30 * abs(work(wbegin + j -1)) ) THEN
630 * the right relgap is big enough by the Lapack 3.0 criterion
631  newlst = j
632  ELSE
633 * inside a child cluster, the relative gap is not
634 * big enough.
635  GOTO 140
636  ENDIF
637  ELSE IF (vrtree.EQ.use31) THEN
638  IF ( wgap( wbegin + j -1).GE.
639  $ minrgp* abs( work(wbegin + j -1) ) ) THEN
640 * the right relgap is big enough by the Lapack 3.1 criterion
641 * (NEWFST,..,NEWLST) is well separated from the following
642  newlst = j
643  ELSE
644 * inside a child cluster, the relative gap is not
645 * big enough.
646  GOTO 140
647  ENDIF
648  ELSE IF (vrtree.EQ.use32a) THEN
649  IF( (j.EQ.oldfst).AND.( wgap(wbegin+j-1).GE.
650  $ minrgp* abs(work(wbegin+j-1)) ) ) THEN
651 * the right relgap is big enough by the Lapack 3.1 criterion
652 * Found a singleton
653  newlst = j
654  ELSE IF( (j.GT.oldfst).AND.(j.EQ.newfst).AND.
655  $ ( wgap(wbegin+j-2).GE.
656  $ minrgp* abs(work(wbegin+j-1)) ).AND.
657  $ ( wgap(wbegin+j-1).GE.
658  $ minrgp* abs(work(wbegin+j-1)) )
659  $ ) THEN
660 * Found a singleton
661  newlst = j
662  ELSE IF( (j.GT.newfst).AND.wgap(wbegin+j-1).GE.
663  $ (minrgp*abs(work(wbegin+j-1)) ) )
664  $ THEN
665 * the right relgap is big enough by the Lapack 3.1 criterion
666  newlst = j
667  ELSE IF((j.GT.newfst).AND.(j+1.LT.oldlst).AND.
668  $ (wgap(wbegin+j-1).GE.enufgp))
669  $ THEN
670 * the right gap is bigger than ENUFGP
671 * Care needs to be taken with this criterion to make
672 * sure it does not create a remaining `false' singleton
673  newlst = j
674  ELSE
675 * inside a child cluster, the relative gap is not
676 * big enough.
677  GOTO 140
678  ENDIF
679  ELSE IF (vrtree.EQ.use32b) THEN
680  IF( (j.EQ.oldfst).AND.( wgap(wbegin+j-1).GE.
681  $ minrgp* abs(work(wbegin+j-1)) ) ) THEN
682 * the right relgap is big enough by the Lapack 3.1 criterion
683 * Found a singleton
684  newlst = j
685  ELSE IF( (j.GT.oldfst).AND.(j.EQ.newfst).AND.
686  $ ( wgap(wbegin+j-2).GE.
687  $ minrgp* abs(work(wbegin+j-1)) ).AND.
688  $ ( wgap(wbegin+j-1).GE.
689  $ minrgp* abs(work(wbegin+j-1)) )
690  $ ) THEN
691 * Found a singleton
692  newlst = j
693  ELSE IF( (j.GT.newfst).AND.wgap(wbegin+j-1).GE.
694  $ (minrgp*abs(work(wbegin+j-1)) ) )
695  $ THEN
696 * the right relgap is big enough by the Lapack 3.1 criterion
697  newlst = j
698  ELSE IF((j.GT.newfst).AND.(j+1.LT.oldlst).AND.
699  $ (wgap( wbegin + j -1).GE.
700  $ rltl30 * abs(work(wbegin + j -1)) ))
701  $ THEN
702 * the right relgap is big enough by the Lapack 3.0 criterion
703 * Care needs to be taken with this criterion to make
704 * sure it does not create a remaining `false' singleton
705  newlst = j
706  ELSE
707 * inside a child cluster, the relative gap is not
708 * big enough.
709  GOTO 140
710  ENDIF
711  END IF
712  END IF
713 
714 * Compute size of child cluster found
715  newsiz = newlst - newfst + 1
716  maxcls = max( newsiz, maxcls )
717 
718 * NEWFTT is the place in Z where the new RRR or the computed
719 * eigenvector is to be stored
720  IF((dol.EQ.1).AND.(dou.EQ.m)) THEN
721 * Store representation at location of the leftmost evalue
722 * of the cluster
723  newftt = wbegin + newfst - 1
724  ELSE
725  IF(wbegin+newfst-1.LT.dol) THEN
726 * Store representation at the left end of Z array
727  newftt = dol - 1
728  ELSEIF(wbegin+newfst-1.GT.dou) THEN
729 * Store representation at the right end of Z array
730  newftt = dou
731  ELSE
732  newftt = wbegin + newfst - 1
733  ENDIF
734  ENDIF
735 * FOR 1D-DISTRIBUTED Z, COMPUTE NEWFTT SHIFTED BY ZOFFSET
736  newftt = newftt - zoffset
737 
738  IF( newsiz.GT.1) THEN
739 *
740 * Current child is not a singleton but a cluster.
741 *
742 *
743  IF((wbegin+newlst-1.LT.dol).OR.
744  $ (wbegin+newfst-1.GT.dou)) THEN
745 * if the cluster contains no desired eigenvalues
746 * skip the computation of that branch of the rep. tree
747  GOTO 139
748  ENDIF
749 
750 * Compute left and right cluster gap.
751 *
752  IF( newfst.EQ.1 ) THEN
753  lgap = max( zero,
754  $ w(wbegin)-werr(wbegin) - vl )
755  ELSE
756  lgap = wgap( wbegin+newfst-2 )
757  ENDIF
758  rgap = wgap( wbegin+newlst-1 )
759 *
760 * For larger clusters, record the largest gap observed
761 * somewhere near the middle of the cluster as a possible
762 * alternative position for a shift when TRYMID is TRUE
763 *
764  mgap = zero
765  IF(newsiz.GE.50) THEN
766  kk = newfst
767  DO 545 k =newfst+newsiz/3,newlst-newsiz/3
768  IF(mgap.LT.wgap( wbegin+k-1 )) THEN
769  kk = k
770  mgap = wgap( wbegin+k-1 )
771  ENDIF
772  545 CONTINUE
773  ENDIF
774 
775 *
776 * Record the left- and right-most eigenvalues needed
777 * for the next level of the representation tree
778  needil = min(needil,wbegin+newfst-1)
779  neediu = max(neediu,wbegin+newlst-1)
780 
781 *
782 * Check if middle gap is large enough to shift there
783 *
784  gap = min(lgap,rgap)
785  trymid = (mgap.GT.gap)
786 
787  splace(1) = newfst
788  splace(2) = newlst
789  IF(trymid) THEN
790  splace(3) = kk
791  splace(4) = kk+1
792  ELSE
793  splace(3) = newfst
794  splace(4) = newlst
795  ENDIF
796 *
797 * Compute left- and rightmost eigenvalue of child
798 * to high precision in order to shift as close
799 * as possible and obtain as large relative gaps
800 * as possible
801 *
802 
803  DO 55 k =1,4
804  p = indexw( wbegin-1+splace(k) )
805  offset = indexw( wbegin ) - 1
806  CALL slarrb2( in, d(ibegin),
807  $ work( indlld+ibegin-1 ),p,p,
808  $ rqtol, rqtol, offset,
809  $ work(wbegin),wgap(wbegin),
810  $ werr(wbegin),work( indwrk ),
811  $ iwork( iindwk ),
812  $ pivmin, lgpvmn, lgspdm, in, iinfo )
813  55 CONTINUE
814 *
815 * Compute RRR of child cluster.
816 * Note that the new RRR is stored in Z
817 *
818 C SLARRF2 needs LWORK = 2*N
819  CALL slarrf2( in, d( ibegin ), l( ibegin ),
820  $ work(indld+ibegin-1),
821  $ splace(1), splace(2),
822  $ splace(3), splace(4), work(wbegin),
823  $ wgap(wbegin), werr(wbegin), trymid,
824  $ spdiam, lgap, rgap, pivmin, tau,
825  $ z( ibegin, newftt ),
826  $ z( ibegin, newftt+1 ),
827  $ work( indwrk ), iinfo )
828  IF( iinfo.EQ.0 ) THEN
829 * a new RRR for the cluster was found by SLARRF2
830 * update shift and store it
831  ssigma = sigma + tau
832  z( iend, newftt+1 ) = ssigma
833 * WORK() are the midpoints and WERR() the semi-width
834 * Note that the entries in W are unchanged.
835  DO 116 k = newfst, newlst
836  fudge =
837  $ three*eps*abs(work(wbegin+k-1))
838  work( wbegin + k - 1 ) =
839  $ work( wbegin + k - 1) - tau
840  fudge = fudge +
841  $ four*eps*abs(work(wbegin+k-1))
842 * Fudge errors
843  werr( wbegin + k - 1 ) =
844  $ werr( wbegin + k - 1 ) + fudge
845  116 CONTINUE
846 
847  nclus = nclus + 1
848  k = newcls + 2*nclus
849  iwork( k-1 ) = newfst
850  iwork( k ) = newlst
851 *
852  IF(.NOT.delref) THEN
853  onlylc = .true.
854 *
855  IF(onlylc) THEN
856  mywfst = max(wbegin-1+newfst,dol-1)
857  mywlst = min(wbegin-1+newlst,dou+1)
858  ELSE
859  mywfst = wbegin-1+newfst
860  mywlst = wbegin-1+newlst
861  ENDIF
862 
863 * Compute LLD of new RRR
864  DO 5000 k = ibegin, iend-1
865  work( indwrk-1+k ) =
866  $ z(k,newftt)*
867  $ (z(k,newftt+1)**2)
868  5000 CONTINUE
869 * P and Q are index of the first and last
870 * eigenvalue to compute within the new cluster
871  p = indexw( mywfst )
872  q = indexw( mywlst )
873 * Offset for the arrays WORK, WGAP and WERR
874  offset = indexw( wbegin ) - 1
875 * perform limited bisection (if necessary) to get approximate
876 * eigenvalues to the precision needed.
877  CALL slarrb2( in,
878  $ z(ibegin, newftt ),
879  $ work(indwrk+ibegin-1),
880  $ p, q, rtol1, rtol2, offset,
881  $ work(wbegin),wgap(wbegin),werr(wbegin),
882  $ work( indwrk+n ), iwork( iindwk ),
883  $ pivmin, lgpvmn, lgspdm, in, iinfo )
884  IF( iinfo.NE.0 ) THEN
885  info = -1
886  RETURN
887  ENDIF
888 * Each time the eigenvalues in WORK get refined, we store
889 * the newly found approximation with all shifts applied in W
890  DO 5003 k=newfst,newlst
891  w(wbegin+k-1) = work(wbegin+k-1)+ssigma
892  5003 CONTINUE
893  ENDIF
894 *
895  ELSE
896  info = -2
897  RETURN
898  ENDIF
899  ELSE
900 *
901 * Compute eigenvector of singleton
902 *
903  iter = 0
904 *
905  tol = four * log_in * eps
906 *
907  k = newfst
908  windex = wbegin + k - 1
909  zindex = windex - zoffset
910  windmn = max(windex - 1,1)
911  windpl = min(windex + 1,m)
912  lambda = work( windex )
913 * Check if eigenvector computation is to be skipped
914  IF((windex.LT.dol).OR.
915  $ (windex.GT.dou)) THEN
916  eskip = .true.
917  GOTO 125
918  ELSE
919  eskip = .false.
920  ENDIF
921  left = work( windex ) - werr( windex )
922  right = work( windex ) + werr( windex )
923  indeig = indexw( windex )
924  IF( k .EQ. 1) THEN
925  lgap = eps*max(abs(left),abs(right))
926  ELSE
927  lgap = wgap(windmn)
928  ENDIF
929  IF( k .EQ. im) THEN
930  rgap = eps*max(abs(left),abs(right))
931  ELSE
932  rgap = wgap(windex)
933  ENDIF
934  gap = min( lgap, rgap )
935  IF(( k .EQ. 1).OR.(k .EQ. im)) THEN
936  gaptol = zero
937  ELSE
938  gaptol = gap * eps
939  ENDIF
940  isupmn = in
941  isupmx = 1
942 * Update WGAP so that it holds the minimum gap
943 * to the left or the right. This is crucial in the
944 * case where bisection is used to ensure that the
945 * eigenvalue is refined up to the required precision.
946 * The correct value is restored afterwards.
947  savegp = wgap(windex)
948  wgap(windex) = gap
949 * We want to use the Rayleigh Quotient Correction
950 * as often as possible since it converges quadratically
951 * when we are close enough to the desired eigenvalue.
952 * However, the Rayleigh Quotient can have the wrong sign
953 * and lead us away from the desired eigenvalue. In this
954 * case, the best we can do is to use bisection.
955  usedbs = .false.
956  usedrq = .false.
957 * Bisection is initially turned off unless it is forced
958  needbs = .NOT.tryrqc
959 * Reset ITWIST
960  itwist = 0
961  120 CONTINUE
962 * Check if bisection should be used to refine eigenvalue
963  IF(needbs) THEN
964 * Take the bisection as new iterate
965  usedbs = .true.
966 * Temporary copy of twist index needed
967  itmp1 = itwist
968  offset = indexw( wbegin ) - 1
969  CALL slarrb2( in, d(ibegin),
970  $ work(indlld+ibegin-1),indeig,indeig,
971  $ zero, two*eps, offset,
972  $ work(wbegin),wgap(wbegin),
973  $ werr(wbegin),work( indwrk ),
974  $ iwork( iindwk ),
975  $ pivmin, lgpvmn, lgspdm, itmp1, iinfo )
976  IF( iinfo.NE.0 ) THEN
977  info = -3
978  RETURN
979  ENDIF
980  lambda = work( windex )
981 * Reset twist index from inaccurate LAMBDA to
982 * force computation of true MINGMA
983  itwist = 0
984  ENDIF
985 * Given LAMBDA, compute the eigenvector.
986  CALL slar1va( in, 1, in, lambda, d(ibegin),
987  $ l( ibegin ), work(indld+ibegin-1),
988  $ work(indlld+ibegin-1),
989  $ pivmin, gaptol, z( ibegin, zindex),
990  $ .NOT.usedbs, negcnt, ztz, mingma,
991  $ itwist, isuppz( 2*windex-1 ),
992  $ nrminv, resid, rqcorr, work( indwrk ) )
993  IF(iter .EQ. 0) THEN
994  bstres = resid
995  bstw = lambda
996  ELSEIF(resid.LT.bstres) THEN
997  bstres = resid
998  bstw = lambda
999  ENDIF
1000  isupmn = min(isupmn,isuppz( 2*windex-1 ))
1001  isupmx = max(isupmx,isuppz( 2*windex ))
1002  iter = iter + 1
1003 *
1004 * Convergence test for Rayleigh-Quotient iteration
1005 * (omitted when Bisection has been used)
1006 *
1007  IF( resid.GT.tol*gap .AND. abs( rqcorr ).GT.
1008  $ rqtol*abs( lambda ) .AND. .NOT. usedbs)
1009  $ THEN
1010 * We need to check that the RQCORR update doesn't
1011 * move the eigenvalue away from the desired one and
1012 * towards a neighbor. -> protection with bisection
1013  IF(indeig.LE.negcnt) THEN
1014 * The wanted eigenvalue lies to the left
1015  sgndef = -one
1016  ELSE
1017 * The wanted eigenvalue lies to the right
1018  sgndef = one
1019  ENDIF
1020 * We only use the RQCORR if it improves the
1021 * the iterate reasonably.
1022  IF( ( rqcorr*sgndef.GE.zero )
1023  $ .AND.( lambda + rqcorr.LE. right)
1024  $ .AND.( lambda + rqcorr.GE. left)
1025  $ ) THEN
1026  usedrq = .true.
1027 * Store new midpoint of bisection interval in WORK
1028  IF(sgndef.EQ.one) THEN
1029 * The current LAMBDA is on the left of the true
1030 * eigenvalue
1031  left = lambda
1032  ELSE
1033 * The current LAMBDA is on the right of the true
1034 * eigenvalue
1035  right = lambda
1036  ENDIF
1037  work( windex ) =
1038  $ half * (right + left)
1039 * Take RQCORR since it has the correct sign and
1040 * improves the iterate reasonably
1041  lambda = lambda + rqcorr
1042 * Update width of error interval
1043  werr( windex ) =
1044  $ half * (right-left)
1045  ELSE
1046  needbs = .true.
1047  ENDIF
1048  IF(right-left.LT.rqtol*abs(lambda)) THEN
1049 * The eigenvalue is computed to bisection accuracy
1050 * compute eigenvector and stop
1051  usedbs = .true.
1052  GOTO 120
1053  ELSEIF( iter.LT.maxitr ) THEN
1054  GOTO 120
1055  ELSEIF( iter.EQ.maxitr ) THEN
1056  needbs = .true.
1057  GOTO 120
1058  ELSE
1059  info = 5
1060  RETURN
1061  END IF
1062  ELSE
1063  stp2ii = .false.
1064  IF(usedrq .AND. usedbs .AND.
1065  $ bstres.LE.resid) THEN
1066  lambda = bstw
1067  stp2ii = .true.
1068  ENDIF
1069  IF (stp2ii) THEN
1070  CALL slar1va( in, 1, in, lambda,
1071  $ d( ibegin ), l( ibegin ),
1072  $ work(indld+ibegin-1),
1073  $ work(indlld+ibegin-1),
1074  $ pivmin, gaptol,
1075  $ z( ibegin, zindex ),
1076  $ .NOT.usedbs, negcnt, ztz, mingma,
1077  $ itwist,
1078  $ isuppz( 2*windex-1 ),
1079  $ nrminv, resid, rqcorr, work( indwrk ) )
1080  ENDIF
1081  work( windex ) = lambda
1082  END IF
1083 *
1084 * Compute FP-vector support w.r.t. whole matrix
1085 *
1086  isuppz( 2*windex-1 ) = isuppz( 2*windex-1 )+oldien
1087  isuppz( 2*windex ) = isuppz( 2*windex )+oldien
1088  zfrom = isuppz( 2*windex-1 )
1089  zto = isuppz( 2*windex )
1090  isupmn = isupmn + oldien
1091  isupmx = isupmx + oldien
1092 * Ensure vector is ok if support in the RQI has changed
1093  IF(isupmn.LT.zfrom) THEN
1094  DO 122 ii = isupmn,zfrom-1
1095  z( ii, zindex ) = zero
1096  122 CONTINUE
1097  ENDIF
1098  IF(isupmx.GT.zto) THEN
1099  DO 123 ii = zto+1,isupmx
1100  z( ii, zindex ) = zero
1101  123 CONTINUE
1102  ENDIF
1103  CALL sscal( zto-zfrom+1, nrminv,
1104  $ z( zfrom, zindex ), 1 )
1105  125 CONTINUE
1106 * Update W
1107  w( windex ) = lambda+sigma
1108 * Recompute the gaps on the left and right
1109 * But only allow them to become larger and not
1110 * smaller (which can only happen through "bad"
1111 * cancellation and doesn't reflect the theory
1112 * where the initial gaps are underestimated due
1113 * to WERR being too crude.)
1114  IF(.NOT.eskip) THEN
1115  IF( k.GT.1) THEN
1116  wgap( windmn ) = max( wgap(windmn),
1117  $ w(windex)-werr(windex)
1118  $ - w(windmn)-werr(windmn) )
1119  ENDIF
1120  IF( windex.LT.wend ) THEN
1121  wgap( windex ) = max( savegp,
1122  $ w( windpl )-werr( windpl )
1123  $ - w( windex )-werr( windex) )
1124  ENDIF
1125  ENDIF
1126  ENDIF
1127 * here ends the code for the current child
1128 *
1129  139 CONTINUE
1130 * Proceed to any remaining child nodes
1131  newfst = j + 1
1132  140 CONTINUE
1133  150 CONTINUE
1134 * Store number of clusters
1135  iwork( iincls + jblk ) = nclus
1136 *
1137  END IF
1138  ibegin = iend + 1
1139  wbegin = wend + 1
1140  170 CONTINUE
1141 *
1142 * Check if everything is done: no clusters left for
1143 * this processor in any block
1144 *
1145  finish = .true.
1146  DO 180 jblk = 1, iblock( m )
1147  finish = finish .AND. (iwork(iincls + jblk).EQ.0)
1148  180 CONTINUE
1149 
1150  IF(.NOT.finish) THEN
1151  ndepth = ndepth + 1
1152  IF((needil.GE.dol).AND.(neediu.LE.dou)) THEN
1153 * Once this processor's part of the
1154 * representation tree consists exclusively of eigenvalues
1155 * between DOL and DOU, it can work independently from all
1156 * others.
1157  GOTO 40
1158  ENDIF
1159  ENDIF
1160 *
1161 
1162  RETURN
1163 *
1164 * End of SLARRV2
1165 *
1166  END
max
#define max(A, B)
Definition: pcgemr.c:180
slarrv2
subroutine slarrv2(N, VL, VU, D, L, PIVMIN, ISPLIT, M, DOL, DOU, NEEDIL, NEEDIU, MINRGP, RTOL1, RTOL2, W, WERR, WGAP, IBLOCK, INDEXW, GERS, SDIAM, Z, LDZ, ISUPPZ, WORK, IWORK, VSTART, FINISH, MAXCLS, NDEPTH, PARITY, ZOFFSET, INFO)
Definition: slarrv2.f:8
slarrf2
subroutine slarrf2(N, D, L, LD, CLSTRT, CLEND, CLMID1, CLMID2, W, WGAP, WERR, TRYMID, SPDIAM, CLGAPL, CLGAPR, PIVMIN, SIGMA, DPLUS, LPLUS, WORK, INFO)
Definition: slarrf2.f:5
min
#define min(A, B)
Definition: pcgemr.c:181