SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pssyevx.f
Go to the documentation of this file.
1 SUBROUTINE pssyevx( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL,
2 $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ,
3 $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK, IFAIL,
4 $ ICLUSTR, GAP, INFO )
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* May 25, 2001
10*
11* .. Scalar Arguments ..
12 CHARACTER JOBZ, RANGE, UPLO
13 INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M,
14 $ N, NZ
15 REAL ABSTOL, ORFAC, VL, VU
16* ..
17* .. Array Arguments ..
18 INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ),
19 $ IFAIL( * ), IWORK( * )
20 REAL A( * ), GAP( * ), W( * ), WORK( * ), Z( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PSSYEVX computes selected eigenvalues and, optionally, eigenvectors
27* of a real symmetric matrix A by calling the recommended sequence
28* of ScaLAPACK routines. Eigenvalues/vectors can be selected by
29* specifying a range of values or a range of indices for the desired
30* eigenvalues.
31*
32* Notes
33* =====
34*
35* Each global data object is described by an associated description
36* vector. This vector stores the information required to establish
37* the mapping between an object element and its corresponding process
38* and memory location.
39*
40* Let A be a generic term for any 2D block cyclicly distributed array.
41* Such a global array has an associated description vector DESCA.
42* In the following comments, the character _ should be read as
43* "of the global array".
44*
45* NOTATION STORED IN EXPLANATION
46* --------------- -------------- --------------------------------------
47* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
48* DTYPE_A = 1.
49* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
50* the BLACS process grid A is distribu-
51* ted over. The context itself is glo-
52* bal, but the handle (the integer
53* value) may vary.
54* M_A (global) DESCA( M_ ) The number of rows in the global
55* array A.
56* N_A (global) DESCA( N_ ) The number of columns in the global
57* array A.
58* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
59* the rows of the array.
60* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
61* the columns of the array.
62* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
63* row of the array A is distributed.
64* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
65* first column of the array A is
66* distributed.
67* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
68* array. LLD_A >= MAX(1,LOCr(M_A)).
69*
70* Let K be the number of rows or columns of a distributed matrix,
71* and assume that its process grid has dimension p x q.
72* LOCr( K ) denotes the number of elements of K that a process
73* would receive if K were distributed over the p processes of its
74* process column.
75* Similarly, LOCc( K ) denotes the number of elements of K that a
76* process would receive if K were distributed over the q processes of
77* its process row.
78* The values of LOCr() and LOCc() may be determined via a call to the
79* ScaLAPACK tool function, NUMROC:
80* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
81* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
82* An upper bound for these quantities may be computed by:
83* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
84* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
85*
86* PSSYEVX assumes IEEE 754 standard compliant arithmetic. To port
87* to a system which does not have IEEE 754 arithmetic, modify
88* the appropriate SLmake.inc file to include the compiler switch
89* -DNO_IEEE. This switch only affects the compilation of pslaiect.c.
90*
91* Arguments
92* =========
93*
94* NP = the number of rows local to a given process.
95* NQ = the number of columns local to a given process.
96*
97* JOBZ (global input) CHARACTER*1
98* Specifies whether or not to compute the eigenvectors:
99* = 'N': Compute eigenvalues only.
100* = 'V': Compute eigenvalues and eigenvectors.
101*
102* RANGE (global input) CHARACTER*1
103* = 'A': all eigenvalues will be found.
104* = 'V': all eigenvalues in the interval [VL,VU] will be found.
105* = 'I': the IL-th through IU-th eigenvalues will be found.
106*
107* UPLO (global input) CHARACTER*1
108* Specifies whether the upper or lower triangular part of the
109* symmetric matrix A is stored:
110* = 'U': Upper triangular
111* = 'L': Lower triangular
112*
113* N (global input) INTEGER
114* The number of rows and columns of the matrix A. N >= 0.
115*
116* A (local input/workspace) block cyclic REAL array,
117* global dimension (N, N),
118* local dimension ( LLD_A, LOCc(JA+N-1) )
119*
120* On entry, the symmetric matrix A. If UPLO = 'U', only the
121* upper triangular part of A is used to define the elements of
122* the symmetric matrix. If UPLO = 'L', only the lower
123* triangular part of A is used to define the elements of the
124* symmetric matrix.
125*
126* On exit, the lower triangle (if UPLO='L') or the upper
127* triangle (if UPLO='U') of A, including the diagonal, is
128* destroyed.
129*
130* IA (global input) INTEGER
131* A's global row index, which points to the beginning of the
132* submatrix which is to be operated on.
133*
134* JA (global input) INTEGER
135* A's global column index, which points to the beginning of
136* the submatrix which is to be operated on.
137*
138* DESCA (global and local input) INTEGER array of dimension DLEN_.
139* The array descriptor for the distributed matrix A.
140* If DESCA( CTXT_ ) is incorrect, PSSYEVX cannot guarantee
141* correct error reporting.
142*
143* VL (global input) REAL
144* If RANGE='V', the lower bound of the interval to be searched
145* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
146*
147* VU (global input) REAL
148* If RANGE='V', the upper bound of the interval to be searched
149* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
150*
151* IL (global input) INTEGER
152* If RANGE='I', the index (from smallest to largest) of the
153* smallest eigenvalue to be returned. IL >= 1.
154* Not referenced if RANGE = 'A' or 'V'.
155*
156* IU (global input) INTEGER
157* If RANGE='I', the index (from smallest to largest) of the
158* largest eigenvalue to be returned. min(IL,N) <= IU <= N.
159* Not referenced if RANGE = 'A' or 'V'.
160*
161* ABSTOL (global input) REAL
162* If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields
163* the most orthogonal eigenvectors.
164*
165* The absolute error tolerance for the eigenvalues.
166* An approximate eigenvalue is accepted as converged
167* when it is determined to lie in an interval [a,b]
168* of width less than or equal to
169*
170* ABSTOL + EPS * max( |a|,|b| ) ,
171*
172* where EPS is the machine precision. If ABSTOL is less than
173* or equal to zero, then EPS*norm(T) will be used in its place,
174* where norm(T) is the 1-norm of the tridiagonal matrix
175* obtained by reducing A to tridiagonal form.
176*
177* Eigenvalues will be computed most accurately when ABSTOL is
178* set to twice the underflow threshold 2*PSLAMCH('S') not zero.
179* If this routine returns with ((MOD(INFO,2).NE.0) .OR.
180* (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
181* eigenvectors did not converge, try setting ABSTOL to
182* 2*PSLAMCH('S').
183*
184* See "Computing Small Singular Values of Bidiagonal Matrices
185* with Guaranteed High Relative Accuracy," by Demmel and
186* Kahan, LAPACK Working Note #3.
187*
188* See "On the correctness of Parallel Bisection in Floating
189* Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
190*
191* M (global output) INTEGER
192* Total number of eigenvalues found. 0 <= M <= N.
193*
194* NZ (global output) INTEGER
195* Total number of eigenvectors computed. 0 <= NZ <= M.
196* The number of columns of Z that are filled.
197* If JOBZ .NE. 'V', NZ is not referenced.
198* If JOBZ .EQ. 'V', NZ = M unless the user supplies
199* insufficient space and PSSYEVX is not able to detect this
200* before beginning computation. To get all the eigenvectors
201* requested, the user must supply both sufficient
202* space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
203* and sufficient workspace to compute them. (See LWORK below.)
204* PSSYEVX is always able to detect insufficient space without
205* computation unless RANGE .EQ. 'V'.
206*
207* W (global output) REAL array, dimension (N)
208* On normal exit, the first M entries contain the selected
209* eigenvalues in ascending order.
210*
211* ORFAC (global input) REAL
212* Specifies which eigenvectors should be reorthogonalized.
213* Eigenvectors that correspond to eigenvalues which are within
214* tol=ORFAC*norm(A) of each other are to be reorthogonalized.
215* However, if the workspace is insufficient (see LWORK),
216* tol may be decreased until all eigenvectors to be
217* reorthogonalized can be stored in one process.
218* No reorthogonalization will be done if ORFAC equals zero.
219* A default value of 10^-3 is used if ORFAC is negative.
220* ORFAC should be identical on all processes.
221*
222* Z (local output) REAL array,
223* global dimension (N, N),
224* local dimension ( LLD_Z, LOCc(JZ+N-1) )
225* If JOBZ = 'V', then on normal exit the first M columns of Z
226* contain the orthonormal eigenvectors of the matrix
227* corresponding to the selected eigenvalues. If an eigenvector
228* fails to converge, then that column of Z contains the latest
229* approximation to the eigenvector, and the index of the
230* eigenvector is returned in IFAIL.
231* If JOBZ = 'N', then Z is not referenced.
232*
233* IZ (global input) INTEGER
234* Z's global row index, which points to the beginning of the
235* submatrix which is to be operated on.
236*
237* JZ (global input) INTEGER
238* Z's global column index, which points to the beginning of
239* the submatrix which is to be operated on.
240*
241* DESCZ (global and local input) INTEGER array of dimension DLEN_.
242* The array descriptor for the distributed matrix Z.
243* DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
244*
245* WORK (local workspace/output) REAL array,
246* dimension max(3,LWORK)
247* On return, WORK(1) contains the optimal amount of
248* workspace required for efficient execution.
249* if JOBZ='N' WORK(1) = optimal amount of workspace
250* required to compute eigenvalues efficiently
251* if JOBZ='V' WORK(1) = optimal amount of workspace
252* required to compute eigenvalues and eigenvectors
253* efficiently with no guarantee on orthogonality.
254* If RANGE='V', it is assumed that all eigenvectors
255* may be required.
256*
257* LWORK (local input) INTEGER
258* Size of WORK
259* See below for definitions of variables used to define LWORK.
260* If no eigenvectors are requested (JOBZ = 'N') then
261* LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
262* If eigenvectors are requested (JOBZ = 'V' ) then
263* the amount of workspace required to guarantee that all
264* eigenvectors are computed is:
265* LWORK >= 5*N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
266* ICEIL( NEIG, NPROW*NPCOL)*NN
267*
268* The computed eigenvectors may not be orthogonal if the
269* minimal workspace is supplied and ORFAC is too small.
270* If you want to guarantee orthogonality (at the cost
271* of potentially poor performance) you should add
272* the following to LWORK:
273* (CLUSTERSIZE-1)*N
274* where CLUSTERSIZE is the number of eigenvalues in the
275* largest cluster, where a cluster is defined as a set of
276* close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
277* W(J+1) <= W(J) + ORFAC*2*norm(A) }
278* Variable definitions:
279* NEIG = number of eigenvectors requested
280* NB = DESCA( MB_ ) = DESCA( NB_ ) =
281* DESCZ( MB_ ) = DESCZ( NB_ )
282* NN = MAX( N, NB, 2 )
283* DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
284* DESCZ( CSRC_ ) = 0
285* NP0 = NUMROC( NN, NB, 0, 0, NPROW )
286* MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
287* ICEIL( X, Y ) is a ScaLAPACK function returning
288* ceiling(X/Y)
289*
290* When LWORK is too small:
291* If LWORK is too small to guarantee orthogonality,
292* PSSYEVX attempts to maintain orthogonality in
293* the clusters with the smallest
294* spacing between the eigenvalues.
295* If LWORK is too small to compute all the eigenvectors
296* requested, no computation is performed and INFO=-23
297* is returned. Note that when RANGE='V', PSSYEVX does
298* not know how many eigenvectors are requested until
299* the eigenvalues are computed. Therefore, when RANGE='V'
300* and as long as LWORK is large enough to allow PSSYEVX to
301* compute the eigenvalues, PSSYEVX will compute the
302* eigenvalues and as many eigenvectors as it can.
303*
304* Relationship between workspace, orthogonality & performance:
305* Greater performance can be achieved if adequate workspace
306* is provided. On the other hand, in some situations,
307* performance can decrease as the workspace provided
308* increases above the workspace amount shown below:
309*
310* For optimal performance, greater workspace may be
311* needed, i.e.
312* LWORK >= MAX( LWORK, 5*N + NSYTRD_LWOPT )
313* Where:
314* LWORK, as defined previously, depends upon the number
315* of eigenvectors requested, and
316* NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
317* ( NPS + 3 ) * NPS
318*
319* ANB = PJLAENV( DESCA( CTXT_), 3, 'PSSYTTRD', 'L',
320* 0, 0, 0, 0)
321* SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
322* NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
323*
324* NUMROC is a ScaLAPACK tool functions;
325* PJLAENV is a ScaLAPACK envionmental inquiry function
326* MYROW, MYCOL, NPROW and NPCOL can be determined by
327* calling the subroutine BLACS_GRIDINFO.
328*
329* For large N, no extra workspace is needed, however the
330* biggest boost in performance comes for small N, so it
331* is wise to provide the extra workspace (typically less
332* than a Megabyte per process).
333*
334* If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
335* enough space to compute all the eigenvectors
336* orthogonally will cause serious degradation in
337* performance. In the limit (i.e. CLUSTERSIZE = N-1)
338* PSSTEIN will perform no better than SSTEIN on 1
339* processor.
340* For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
341* all eigenvectors will increase the total execution time
342* by a factor of 2 or more.
343* For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
344* grow as the square of the cluster size, all other factors
345* remaining equal and assuming enough workspace. Less
346* workspace means less reorthogonalization but faster
347* execution.
348*
349* If LWORK = -1, then LWORK is global input and a workspace
350* query is assumed; the routine only calculates the size
351* required for optimal performance for all work arrays. Each of
352* these values is returned in the first entry of the
353* corresponding work arrays, and no error message is issued by
354* PXERBLA.
355*
356* IWORK (local workspace) INTEGER array
357* On return, IWORK(1) contains the amount of integer workspace
358* required.
359*
360* LIWORK (local input) INTEGER
361* size of IWORK
362* LIWORK >= 6 * NNP
363* Where:
364* NNP = MAX( N, NPROW*NPCOL + 1, 4 )
365* If LIWORK = -1, then LIWORK is global input and a workspace
366* query is assumed; the routine only calculates the minimum
367* and optimal size for all work arrays. Each of these
368* values is returned in the first entry of the corresponding
369* work array, and no error message is issued by PXERBLA.
370*
371* IFAIL (global output) INTEGER array, dimension (N)
372* If JOBZ = 'V', then on normal exit, the first M elements of
373* IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then
374* IFAIL contains the
375* indices of the eigenvectors that failed to converge.
376* If JOBZ = 'N', then IFAIL is not referenced.
377*
378* ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
379* This array contains indices of eigenvectors corresponding to
380* a cluster of eigenvalues that could not be reorthogonalized
381* due to insufficient workspace (see LWORK, ORFAC and INFO).
382* Eigenvectors corresponding to clusters of eigenvalues indexed
383* ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
384* reorthogonalized due to lack of workspace. Hence the
385* eigenvectors corresponding to these clusters may not be
386* orthogonal. ICLUSTR() is a zero terminated array.
387* (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
388* K is the number of clusters
389* ICLUSTR is not referenced if JOBZ = 'N'
390*
391* GAP (global output) REAL array,
392* dimension (NPROW*NPCOL)
393* This array contains the gap between eigenvalues whose
394* eigenvectors could not be reorthogonalized. The output
395* values in this array correspond to the clusters indicated
396* by the array ICLUSTR. As a result, the dot product between
397* eigenvectors correspoding to the I^th cluster may be as high
398* as ( C * n ) / GAP(I) where C is a small constant.
399*
400* INFO (global output) INTEGER
401* = 0: successful exit
402* < 0: If the i-th argument is an array and the j-entry had
403* an illegal value, then INFO = -(i*100+j), if the i-th
404* argument is a scalar and had an illegal value, then
405* INFO = -i.
406* > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
407* failed to converge. Their indices are stored
408* in IFAIL. Ensure ABSTOL=2.0*PSLAMCH( 'U' )
409* Send e-mail to scalapack@cs.utk.edu
410* if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
411* to one or more clusters of eigenvalues could not be
412* reorthogonalized because of insufficient workspace.
413* The indices of the clusters are stored in the array
414* ICLUSTR.
415* if (MOD(INFO/4,2).NE.0), then space limit prevented
416* PSSYEVX from computing all of the eigenvectors
417* between VL and VU. The number of eigenvectors
418* computed is returned in NZ.
419* if (MOD(INFO/8,2).NE.0), then PSSTEBZ failed to compute
420* eigenvalues. Ensure ABSTOL=2.0*PSLAMCH( 'U' )
421* Send e-mail to scalapack@cs.utk.edu
422*
423* Alignment requirements
424* ======================
425*
426* The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1)
427* must verify some alignment properties, namely the following
428* expressions should be true:
429*
430* ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND.
431* IAROW.EQ.IZROW )
432* where
433* IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
434*
435* =====================================================================
436*
437* Differences between PSSYEVX and SSYEVX
438* ======================================
439*
440* A, LDA -> A, IA, JA, DESCA
441* Z, LDZ -> Z, IZ, JZ, DESCZ
442* WORKSPACE needs are larger for PSSYEVX.
443* LIWORK parameter added
444*
445* ORFAC, ICLUSTER() and GAP() parameters added
446* meaning of INFO is changed
447*
448* Functional differences:
449* PSSYEVX does not promise orthogonality for eigenvectors associated
450* with tighly clustered eigenvalues.
451* PSSYEVX does not reorthogonalize eigenvectors
452* that are on different processes. The extent of reorthogonalization
453* is controlled by the input parameter LWORK.
454*
455* Version 1.4 limitations:
456* DESCA(MB_) = DESCA(NB_)
457* DESCA(M_) = DESCZ(M_)
458* DESCA(N_) = DESCZ(N_)
459* DESCA(MB_) = DESCZ(MB_)
460* DESCA(NB_) = DESCZ(NB_)
461* DESCA(RSRC_) = DESCZ(RSRC_)
462*
463* .. Parameters ..
464 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
465 $ MB_, NB_, RSRC_, CSRC_, LLD_
466 PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
467 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
468 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
469 REAL ZERO, ONE, TEN, FIVE
470 PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0, ten = 10.0e+0,
471 $ five = 5.0e+0 )
472 INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ
473 parameter( ierrein = 1, ierrcls = 2, ierrspc = 4,
474 $ ierrebz = 8 )
475* ..
476* .. Local Scalars ..
477 LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN,
478 $ VALEIG, WANTZ
479 CHARACTER ORDER
480 INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO,
481 $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP,
482 $ indtau, indwork, iroffa, iroffz, iscale,
483 $ isizestebz, isizestein, izrow, lallwork,
484 $ liwmin, llwork, lwmin, lwopt, maxeigs, mb_a,
485 $ mq0, mycol, myrow, nb, nb_a, neig, nn, nnp,
486 $ np0, npcol, nprocs, nprow, nps, nsplit,
487 $ nsytrd_lwopt, nzz, offset, rsrc_a, rsrc_z,
488 $ sizeormtr, sizestein, sizesyevx, sqnpc
489 REAL ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
490 $ SIGMA, SMLNUM, VLL, VUU
491* ..
492* .. Local Arrays ..
493 INTEGER IDUM1( 4 ), IDUM2( 4 )
494* ..
495* .. External Functions ..
496 LOGICAL LSAME
497 INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
498 REAL PSLAMCH, PSLANSY
499 EXTERNAL lsame, iceil, indxg2p, numroc, pjlaenv,
500 $ pslamch, pslansy
501* ..
502* .. External Subroutines ..
503 EXTERNAL blacs_gridinfo, chk1mat, igamn2d, pchk1mat,
505 $ psstebz, psstein, pssyntrd, pxerbla, sgebr2d,
506 $ sgebs2d, slasrt, sscal
507* ..
508* .. Intrinsic Functions ..
509 INTRINSIC abs, dble, ichar, int, max, min, mod, real,
510 $ sqrt
511* ..
512* .. Executable Statements ..
513* This is just to keep ftnchek and toolpack/1 happy
514 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
515 $ rsrc_.LT.0 )RETURN
516*
517 quickreturn = ( n.EQ.0 )
518*
519* Test the input arguments.
520*
521 ictxt = desca( ctxt_ )
522 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
523 info = 0
524*
525 wantz = lsame( jobz, 'V' )
526 IF( nprow.EQ.-1 ) THEN
527 info = -( 800+ctxt_ )
528 ELSE IF( wantz ) THEN
529 IF( ictxt.NE.descz( ctxt_ ) ) THEN
530 info = -( 2100+ctxt_ )
531 END IF
532 END IF
533 IF( info.EQ.0 ) THEN
534 CALL chk1mat( n, 4, n, 4, ia, ja, desca, 8, info )
535 IF( wantz )
536 $ CALL chk1mat( n, 4, n, 4, iz, jz, descz, 21, info )
537*
538 IF( info.EQ.0 ) THEN
539*
540* Get machine constants.
541*
542 safmin = pslamch( ictxt, 'Safe minimum' )
543 eps = pslamch( ictxt, 'Precision' )
544 smlnum = safmin / eps
545 bignum = one / smlnum
546 rmin = sqrt( smlnum )
547 rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) )
548*
549 nprocs = nprow*npcol
550 lower = lsame( uplo, 'L' )
551 alleig = lsame( range, 'A' )
552 valeig = lsame( range, 'V' )
553 indeig = lsame( range, 'I' )
554*
555* Set up pointers into the WORK array
556*
557 indtau = 1
558 inde = indtau + n
559 indd = inde + n
560 indd2 = indd + n
561 inde2 = indd2 + n
562 indwork = inde2 + n
563 llwork = lwork - indwork + 1
564*
565* Set up pointers into the IWORK array
566*
567 isizestein = 3*n + nprocs + 1
568 isizestebz = max( 4*n, 14, nprocs )
569 indibl = ( max( isizestein, isizestebz ) ) + 1
570 indisp = indibl + n
571*
572* Compute the total amount of space needed
573*
574 lquery = .false.
575 IF( lwork.EQ.-1 .OR. liwork.EQ.-1 )
576 $ lquery = .true.
577*
578 nnp = max( n, nprocs+1, 4 )
579 liwmin = 6*nnp
580*
581 nprocs = nprow*npcol
582 nb_a = desca( nb_ )
583 mb_a = desca( mb_ )
584 nb = nb_a
585 nn = max( n, nb, 2 )
586*
587 rsrc_a = desca( rsrc_ )
588 csrc_a = desca( csrc_ )
589 iroffa = mod( ia-1, mb_a )
590 icoffa = mod( ja-1, nb_a )
591 iarow = indxg2p( 1, nb_a, myrow, rsrc_a, nprow )
592 np0 = numroc( n+iroffa, nb, 0, 0, nprow )
593 mq0 = numroc( n+icoffa, nb, 0, 0, npcol )
594 IF( wantz ) THEN
595 rsrc_z = descz( rsrc_ )
596 iroffz = mod( iz-1, mb_a )
597 izrow = indxg2p( 1, nb_a, myrow, rsrc_z, nprow )
598 ELSE
599 iroffz = 0
600 izrow = 0
601 END IF
602*
603 IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
604 $ THEN
605 lwmin = 5*n + max( 5*nn, nb*( np0+1 ) )
606 IF( wantz ) THEN
607 mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
608 lwopt = 5*n + max( 5*nn, np0*mq0+2*nb*nb )
609 ELSE
610 lwopt = lwmin
611 END IF
612 neig = 0
613 ELSE
614 IF( alleig .OR. valeig ) THEN
615 neig = n
616 ELSE IF( indeig ) THEN
617 neig = iu - il + 1
618 END IF
619 mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
620 lwmin = 5*n + max( 5*nn, np0*mq0+2*nb*nb ) +
621 $ iceil( neig, nprow*npcol )*nn
622 lwopt = lwmin
623*
624 END IF
625*
626* Compute how much workspace is needed to use the
627* new TRD code
628*
629 anb = pjlaenv( ictxt, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
630 sqnpc = int( sqrt( dble( nprow*npcol ) ) )
631 nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
632 nsytrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
633 lwopt = max( lwopt, 5*n+nsytrd_lwopt )
634*
635 END IF
636 IF( info.EQ.0 ) THEN
637 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
638 work( 1 ) = abstol
639 IF( valeig ) THEN
640 work( 2 ) = vl
641 work( 3 ) = vu
642 ELSE
643 work( 2 ) = zero
644 work( 3 ) = zero
645 END IF
646 CALL sgebs2d( ictxt, 'ALL', ' ', 3, 1, work, 3 )
647 ELSE
648 CALL sgebr2d( ictxt, 'ALL', ' ', 3, 1, work, 3, 0, 0 )
649 END IF
650 IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
651 info = -1
652 ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
653 info = -2
654 ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
655 info = -3
656 ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
657 info = -10
658 ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
659 $ THEN
660 info = -11
661 ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
662 $ THEN
663 info = -12
664 ELSE IF( lwork.LT.lwmin .AND. lwork.NE.-1 ) THEN
665 info = -23
666 ELSE IF( liwork.LT.liwmin .AND. liwork.NE.-1 ) THEN
667 info = -25
668 ELSE IF( valeig .AND. ( abs( work( 2 )-vl ).GT.five*eps*
669 $ abs( vl ) ) ) THEN
670 info = -9
671 ELSE IF( valeig .AND. ( abs( work( 3 )-vu ).GT.five*eps*
672 $ abs( vu ) ) ) THEN
673 info = -10
674 ELSE IF( abs( work( 1 )-abstol ).GT.five*eps*abs( abstol ) )
675 $ THEN
676 info = -13
677 ELSE IF( iroffa.NE.0 ) THEN
678 info = -6
679 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
680 info = -( 800+nb_ )
681 END IF
682 IF( wantz ) THEN
683 IF( iroffa.NE.iroffz ) THEN
684 info = -19
685 ELSE IF( iarow.NE.izrow ) THEN
686 info = -19
687 ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
688 info = -( 2100+m_ )
689 ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
690 info = -( 2100+n_ )
691 ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
692 info = -( 2100+mb_ )
693 ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
694 info = -( 2100+nb_ )
695 ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
696 info = -( 2100+rsrc_ )
697 ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
698 info = -( 2100+csrc_ )
699 ELSE IF( ictxt.NE.descz( ctxt_ ) ) THEN
700 info = -( 2100+ctxt_ )
701 END IF
702 END IF
703 END IF
704 IF( wantz ) THEN
705 idum1( 1 ) = ichar( 'V' )
706 ELSE
707 idum1( 1 ) = ichar( 'N' )
708 END IF
709 idum2( 1 ) = 1
710 IF( lower ) THEN
711 idum1( 2 ) = ichar( 'L' )
712 ELSE
713 idum1( 2 ) = ichar( 'U' )
714 END IF
715 idum2( 2 ) = 2
716 IF( alleig ) THEN
717 idum1( 3 ) = ichar( 'A' )
718 ELSE IF( indeig ) THEN
719 idum1( 3 ) = ichar( 'I' )
720 ELSE
721 idum1( 3 ) = ichar( 'V' )
722 END IF
723 idum2( 3 ) = 3
724 IF( lquery ) THEN
725 idum1( 4 ) = -1
726 ELSE
727 idum1( 4 ) = 1
728 END IF
729 idum2( 4 ) = 4
730 IF( wantz ) THEN
731 CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 8, n, 4, n, 4, iz,
732 $ jz, descz, 21, 4, idum1, idum2, info )
733 ELSE
734 CALL pchk1mat( n, 4, n, 4, ia, ja, desca, 8, 4, idum1,
735 $ idum2, info )
736 END IF
737 work( 1 ) = real( lwopt )
738 iwork( 1 ) = liwmin
739 END IF
740*
741 IF( info.NE.0 ) THEN
742 CALL pxerbla( ictxt, 'PSSYEVX', -info )
743 RETURN
744 ELSE IF( lquery ) THEN
745 RETURN
746 END IF
747*
748* Quick return if possible
749*
750 IF( quickreturn ) THEN
751 IF( wantz ) THEN
752 nz = 0
753 iclustr( 1 ) = 0
754 END IF
755 m = 0
756 work( 1 ) = real( lwopt )
757 iwork( 1 ) = liwmin
758 RETURN
759 END IF
760*
761* Scale matrix to allowable range, if necessary.
762*
763 abstll = abstol
764 iscale = 0
765 IF( valeig ) THEN
766 vll = vl
767 vuu = vu
768 ELSE
769 vll = zero
770 vuu = zero
771 END IF
772*
773 anrm = pslansy( 'M', uplo, n, a, ia, ja, desca, work( indwork ) )
774*
775 IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
776 iscale = 1
777 sigma = rmin / anrm
778 anrm = anrm*sigma
779 ELSE IF( anrm.GT.rmax ) THEN
780 iscale = 1
781 sigma = rmax / anrm
782 anrm = anrm*sigma
783 END IF
784*
785 IF( iscale.EQ.1 ) THEN
786 CALL pslascl( uplo, one, sigma, n, n, a, ia, ja, desca, iinfo )
787 IF( abstol.GT.0 )
788 $ abstll = abstol*sigma
789 IF( valeig ) THEN
790 vll = vl*sigma
791 vuu = vu*sigma
792 IF( vuu.EQ.vll ) THEN
793 vuu = vuu + 2*max( abs( vuu )*eps, safmin )
794 END IF
795 END IF
796 END IF
797*
798* Call PSSYNTRD to reduce symmetric matrix to tridiagonal form.
799*
800 lallwork = llwork
801*
802 CALL pssyntrd( uplo, n, a, ia, ja, desca, work( indd ),
803 $ work( inde ), work( indtau ), work( indwork ),
804 $ llwork, iinfo )
805*
806*
807* Copy the values of D, E to all processes
808*
809* Here PxLARED1D is used to redistribute the tridiagonal matrix.
810* PxLARED1D, however, doesn't yet work with arbritary matrix
811* distributions so we have PxELGET as a backup.
812*
813 offset = 0
814 IF( ia.EQ.1 .AND. ja.EQ.1 .AND. rsrc_a.EQ.0 .AND. csrc_a.EQ.0 )
815 $ THEN
816 CALL pslared1d( n, ia, ja, desca, work( indd ), work( indd2 ),
817 $ work( indwork ), llwork )
818*
819 CALL pslared1d( n, ia, ja, desca, work( inde ), work( inde2 ),
820 $ work( indwork ), llwork )
821 IF( .NOT.lower )
822 $ offset = 1
823 ELSE
824 DO 10 i = 1, n
825 CALL pselget( 'A', ' ', work( indd2+i-1 ), a, i+ia-1,
826 $ i+ja-1, desca )
827 10 CONTINUE
828 IF( lsame( uplo, 'U' ) ) THEN
829 DO 20 i = 1, n - 1
830 CALL pselget( 'A', ' ', work( inde2+i-1 ), a, i+ia-1,
831 $ i+ja, desca )
832 20 CONTINUE
833 ELSE
834 DO 30 i = 1, n - 1
835 CALL pselget( 'A', ' ', work( inde2+i-1 ), a, i+ia,
836 $ i+ja-1, desca )
837 30 CONTINUE
838 END IF
839 END IF
840*
841* Call PSSTEBZ and, if eigenvectors are desired, PSSTEIN.
842*
843 IF( wantz ) THEN
844 order = 'B'
845 ELSE
846 order = 'E'
847 END IF
848*
849 CALL psstebz( ictxt, range, order, n, vll, vuu, il, iu, abstll,
850 $ work( indd2 ), work( inde2+offset ), m, nsplit, w,
851 $ iwork( indibl ), iwork( indisp ), work( indwork ),
852 $ llwork, iwork( 1 ), isizestebz, iinfo )
853*
854*
855* IF PSSTEBZ fails, the error propogates to INFO, but
856* we do not propogate the eigenvalue(s) which failed because:
857* 1) This should never happen if the user specifies
858* ABSTOL = 2 * PSLAMCH( 'U' )
859* 2) PSSTEIN will confirm/deny whether the eigenvalues are
860* close enough.
861*
862 IF( iinfo.NE.0 ) THEN
863 info = info + ierrebz
864 DO 40 i = 1, m
865 iwork( indibl+i-1 ) = abs( iwork( indibl+i-1 ) )
866 40 CONTINUE
867 END IF
868 IF( wantz ) THEN
869*
870 IF( valeig ) THEN
871*
872* Compute the maximum number of eigenvalues that we can
873* compute in the
874* workspace that we have, and that we can store in Z.
875*
876* Loop through the possibilities looking for the largest
877* NZ that we can feed to PSSTEIN and PSORMTR
878*
879* Since all processes must end up with the same value
880* of NZ, we first compute the minimum of LALLWORK
881*
882 CALL igamn2d( ictxt, 'A', ' ', 1, 1, lallwork, 1, 1, 1, -1,
883 $ -1, -1 )
884*
885 maxeigs = descz( n_ )
886*
887 DO 50 nz = min( maxeigs, m ), 0, -1
888 mq0 = numroc( nz, nb, 0, 0, npcol )
889 sizestein = iceil( nz, nprocs )*n + max( 5*n, np0*mq0 )
890 sizeormtr = max( ( nb*( nb-1 ) ) / 2, ( mq0+np0 )*nb ) +
891 $ nb*nb
892*
893 sizesyevx = max( sizestein, sizeormtr )
894 IF( sizesyevx.LE.lallwork )
895 $ GO TO 60
896 50 CONTINUE
897 60 CONTINUE
898 ELSE
899 nz = m
900 END IF
901 nz = max( nz, 0 )
902 IF( nz.NE.m ) THEN
903 info = info + ierrspc
904*
905 DO 70 i = 1, m
906 ifail( i ) = 0
907 70 CONTINUE
908*
909* The following code handles a rare special case
910* - NZ .NE. M means that we don't have enough room to store
911* all the vectors.
912* - NSPLIT .GT. 1 means that the matrix split
913* In this case, we cannot simply take the first NZ eigenvalues
914* because PSSTEBZ sorts the eigenvalues by block when
915* a split occurs. So, we have to make another call to
916* PSSTEBZ with a new upper limit - VUU.
917*
918 IF( nsplit.GT.1 ) THEN
919 CALL slasrt( 'I', m, w, iinfo )
920 nzz = 0
921 IF( nz.GT.0 ) THEN
922*
923 vuu = w( nz ) - ten*( eps*anrm+safmin )
924 IF( vll.GE.vuu ) THEN
925 nzz = 0
926 ELSE
927 CALL psstebz( ictxt, range, order, n, vll, vuu, il,
928 $ iu, abstll, work( indd2 ),
929 $ work( inde2+offset ), nzz, nsplit, w,
930 $ iwork( indibl ), iwork( indisp ),
931 $ work( indwork ), llwork, iwork( 1 ),
932 $ isizestebz, iinfo )
933 END IF
934*
935 IF( mod( info / ierrebz, 1 ).EQ.0 ) THEN
936 IF( nzz.GT.nz .OR. iinfo.NE.0 ) THEN
937 info = info + ierrebz
938 END IF
939 END IF
940 END IF
941 nz = min( nz, nzz )
942*
943 END IF
944 END IF
945 CALL psstein( n, work( indd2 ), work( inde2+offset ), nz, w,
946 $ iwork( indibl ), iwork( indisp ), orfac, z, iz,
947 $ jz, descz, work( indwork ), lallwork, iwork( 1 ),
948 $ isizestein, ifail, iclustr, gap, iinfo )
949*
950 IF( iinfo.GE.nz+1 )
951 $ info = info + ierrcls
952 IF( mod( iinfo, nz+1 ).NE.0 )
953 $ info = info + ierrein
954*
955* Z = Q * Z
956*
957*
958 IF( nz.GT.0 ) THEN
959 CALL psormtr( 'L', uplo, 'N', n, nz, a, ia, ja, desca,
960 $ work( indtau ), z, iz, jz, descz,
961 $ work( indwork ), llwork, iinfo )
962 END IF
963*
964 END IF
965*
966* If matrix was scaled, then rescale eigenvalues appropriately.
967*
968 IF( iscale.EQ.1 ) THEN
969 CALL sscal( m, one / sigma, w, 1 )
970 END IF
971*
972 work( 1 ) = real( lwopt )
973 iwork( 1 ) = liwmin
974*
975 RETURN
976*
977* End of PSSYEVX
978*
979 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
subroutine pselget(scope, top, alpha, a, ia, ja, desca)
Definition pselget.f:2
subroutine pslared1d(n, ia, ja, desc, bycol, byall, work, lwork)
Definition pslared1d.f:2
subroutine pslascl(type, cfrom, cto, m, n, a, ia, ja, desca, info)
Definition pslascl.f:3
subroutine psormtr(side, uplo, trans, m, n, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition psormtr.f:3
subroutine psstebz(ictxt, range, order, n, vl, vu, il, iu, abstol, d, e, m, nsplit, w, iblock, isplit, work, lwork, iwork, liwork, info)
Definition psstebz.f:4
subroutine psstein(n, d, e, m, w, iblock, isplit, orfac, z, iz, jz, descz, work, lwork, iwork, liwork, ifail, iclustr, gap, info)
Definition psstein.f:4
subroutine pssyevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work, lwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pssyevx.f:5
subroutine pssyntrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pssyntrd.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2