SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pssygvx.f
Go to the documentation of this file.
1 SUBROUTINE pssygvx( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA,
2 $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU,
3 $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ,
4 $ WORK, LWORK, IWORK, LIWORK, IFAIL, ICLUSTR,
5 $ GAP, INFO )
6*
7* -- ScaLAPACK routine (version 1.7) --
8* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9* and University of California, Berkeley.
10* October 15, 1999
11*
12* .. Scalar Arguments ..
13 CHARACTER JOBZ, RANGE, UPLO
14 INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ,
15 $ LIWORK, LWORK, M, N, NZ
16 REAL ABSTOL, ORFAC, VL, VU
17* ..
18* .. Array Arguments ..
19*
20 INTEGER DESCA( * ), DESCB( * ), DESCZ( * ),
21 $ ICLUSTR( * ), IFAIL( * ), IWORK( * )
22 REAL A( * ), B( * ), GAP( * ), W( * ), WORK( * ),
23 $ Z( * )
24* ..
25*
26* Purpose
27*
28* =======
29*
30* PSSYGVX computes all the eigenvalues, and optionally,
31* the eigenvectors
32* of a real generalized SY-definite eigenproblem, of the form
33* sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or
34* sub( B )*sub( A )*x=(lambda)*x.
35* Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be
36* SY, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed
37* to be symmetric positive definite.
38*
39* Notes
40* =====
41*
42*
43* Each global data object is described by an associated description
44* vector. This vector stores the information required to establish
45* the mapping between an object element and its corresponding process
46* and memory location.
47*
48* Let A be a generic term for any 2D block cyclicly distributed array.
49* Such a global array has an associated description vector DESCA.
50* In the following comments, the character _ should be read as
51* "of the global array".
52*
53* NOTATION STORED IN EXPLANATION
54* --------------- -------------- --------------------------------------
55* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
56* DTYPE_A = 1.
57* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
58* the BLACS process grid A is distribu-
59* ted over. The context itself is glo-
60* bal, but the handle (the integer
61* value) may vary.
62* M_A (global) DESCA( M_ ) The number of rows in the global
63* array A.
64* N_A (global) DESCA( N_ ) The number of columns in the global
65* array A.
66* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
67* the rows of the array.
68* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
69* the columns of the array.
70* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
71* row of the array A is distributed.
72* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
73* first column of the array A is
74* distributed.
75* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
76* array. LLD_A >= MAX(1,LOCr(M_A)).
77*
78* Let K be the number of rows or columns of a distributed matrix,
79* and assume that its process grid has dimension p x q.
80* LOCr( K ) denotes the number of elements of K that a process
81* would receive if K were distributed over the p processes of its
82* process column.
83* Similarly, LOCc( K ) denotes the number of elements of K that a
84* process would receive if K were distributed over the q processes of
85* its process row.
86* The values of LOCr() and LOCc() may be determined via a call to the
87* ScaLAPACK tool function, NUMROC:
88* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
89* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
90* An upper bound for these quantities may be computed by:
91* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
92* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
93*
94*
95* Arguments
96* =========
97*
98* IBTYPE (global input) INTEGER
99* Specifies the problem type to be solved:
100* = 1: sub( A )*x = (lambda)*sub( B )*x
101* = 2: sub( A )*sub( B )*x = (lambda)*x
102* = 3: sub( B )*sub( A )*x = (lambda)*x
103*
104* JOBZ (global input) CHARACTER*1
105* = 'N': Compute eigenvalues only;
106* = 'V': Compute eigenvalues and eigenvectors.
107*
108* RANGE (global input) CHARACTER*1
109* = 'A': all eigenvalues will be found.
110* = 'V': all eigenvalues in the interval [VL,VU] will be found.
111* = 'I': the IL-th through IU-th eigenvalues will be found.
112*
113* UPLO (global input) CHARACTER*1
114* = 'U': Upper triangles of sub( A ) and sub( B ) are stored;
115* = 'L': Lower triangles of sub( A ) and sub( B ) are stored.
116*
117* N (global input) INTEGER
118* The order of the matrices sub( A ) and sub( B ). N >= 0.
119*
120* A (local input/local output) REAL pointer into the
121* local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122* On entry, this array contains the local pieces of the
123* N-by-N symmetric distributed matrix sub( A ). If UPLO = 'U',
124* the leading N-by-N upper triangular part of sub( A ) contains
125* the upper triangular part of the matrix. If UPLO = 'L', the
126* leading N-by-N lower triangular part of sub( A ) contains
127* the lower triangular part of the matrix.
128*
129* On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains
130* the distributed matrix Z of eigenvectors. The eigenvectors
131* are normalized as follows:
132* if IBTYPE = 1 or 2, Z**T*sub( B )*Z = I;
133* if IBTYPE = 3, Z**T*inv( sub( B ) )*Z = I.
134* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
135* or the lower triangle (if UPLO='L') of sub( A ), including
136* the diagonal, is destroyed.
137*
138* IA (global input) INTEGER
139* The row index in the global array A indicating the first
140* row of sub( A ).
141*
142* JA (global input) INTEGER
143* The column index in the global array A indicating the
144* first column of sub( A ).
145*
146* DESCA (global and local input) INTEGER array of dimension DLEN_.
147* The array descriptor for the distributed matrix A.
148* If DESCA( CTXT_ ) is incorrect, PSSYGVX cannot guarantee
149* correct error reporting.
150*
151* B (local input/local output) REAL pointer into the
152* local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
153* On entry, this array contains the local pieces of the
154* N-by-N symmetric distributed matrix sub( B ). If UPLO = 'U',
155* the leading N-by-N upper triangular part of sub( B ) contains
156* the upper triangular part of the matrix. If UPLO = 'L', the
157* leading N-by-N lower triangular part of sub( B ) contains
158* the lower triangular part of the matrix.
159*
160* On exit, if INFO <= N, the part of sub( B ) containing the
161* matrix is overwritten by the triangular factor U or L from
162* the Cholesky factorization sub( B ) = U**T*U or
163* sub( B ) = L*L**T.
164*
165* IB (global input) INTEGER
166* The row index in the global array B indicating the first
167* row of sub( B ).
168*
169* JB (global input) INTEGER
170* The column index in the global array B indicating the
171* first column of sub( B ).
172*
173* DESCB (global and local input) INTEGER array of dimension DLEN_.
174* The array descriptor for the distributed matrix B.
175* DESCB( CTXT_ ) must equal DESCA( CTXT_ )
176*
177* VL (global input) REAL
178* If RANGE='V', the lower bound of the interval to be searched
179* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
180*
181* VU (global input) REAL
182* If RANGE='V', the upper bound of the interval to be searched
183* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
184*
185* IL (global input) INTEGER
186* If RANGE='I', the index (from smallest to largest) of the
187* smallest eigenvalue to be returned. IL >= 1.
188* Not referenced if RANGE = 'A' or 'V'.
189*
190* IU (global input) INTEGER
191* If RANGE='I', the index (from smallest to largest) of the
192* largest eigenvalue to be returned. min(IL,N) <= IU <= N.
193* Not referenced if RANGE = 'A' or 'V'.
194*
195* ABSTOL (global input) REAL
196* If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields
197* the most orthogonal eigenvectors.
198*
199* The absolute error tolerance for the eigenvalues.
200* An approximate eigenvalue is accepted as converged
201* when it is determined to lie in an interval [a,b]
202* of width less than or equal to
203*
204* ABSTOL + EPS * max( |a|,|b| ) ,
205*
206* where EPS is the machine precision. If ABSTOL is less than
207* or equal to zero, then EPS*norm(T) will be used in its place,
208* where norm(T) is the 1-norm of the tridiagonal matrix
209* obtained by reducing A to tridiagonal form.
210*
211* Eigenvalues will be computed most accurately when ABSTOL is
212* set to twice the underflow threshold 2*PSLAMCH('S') not zero.
213* If this routine returns with ((MOD(INFO,2).NE.0) .OR.
214* (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
215* eigenvectors did not converge, try setting ABSTOL to
216* 2*PSLAMCH('S').
217*
218* See "Computing Small Singular Values of Bidiagonal Matrices
219* with Guaranteed High Relative Accuracy," by Demmel and
220* Kahan, LAPACK Working Note #3.
221*
222* See "On the correctness of Parallel Bisection in Floating
223* Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
224*
225* M (global output) INTEGER
226* Total number of eigenvalues found. 0 <= M <= N.
227*
228* NZ (global output) INTEGER
229* Total number of eigenvectors computed. 0 <= NZ <= M.
230* The number of columns of Z that are filled.
231* If JOBZ .NE. 'V', NZ is not referenced.
232* If JOBZ .EQ. 'V', NZ = M unless the user supplies
233* insufficient space and PSSYGVX is not able to detect this
234* before beginning computation. To get all the eigenvectors
235* requested, the user must supply both sufficient
236* space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
237* and sufficient workspace to compute them. (See LWORK below.)
238* PSSYGVX is always able to detect insufficient space without
239* computation unless RANGE .EQ. 'V'.
240*
241* W (global output) REAL array, dimension (N)
242* On normal exit, the first M entries contain the selected
243* eigenvalues in ascending order.
244*
245* ORFAC (global input) REAL
246* Specifies which eigenvectors should be reorthogonalized.
247* Eigenvectors that correspond to eigenvalues which are within
248* tol=ORFAC*norm(A) of each other are to be reorthogonalized.
249* However, if the workspace is insufficient (see LWORK),
250* tol may be decreased until all eigenvectors to be
251* reorthogonalized can be stored in one process.
252* No reorthogonalization will be done if ORFAC equals zero.
253* A default value of 10^-3 is used if ORFAC is negative.
254* ORFAC should be identical on all processes.
255*
256* Z (local output) REAL array,
257* global dimension (N, N),
258* local dimension ( LLD_Z, LOCc(JZ+N-1) )
259* If JOBZ = 'V', then on normal exit the first M columns of Z
260* contain the orthonormal eigenvectors of the matrix
261* corresponding to the selected eigenvalues. If an eigenvector
262* fails to converge, then that column of Z contains the latest
263* approximation to the eigenvector, and the index of the
264* eigenvector is returned in IFAIL.
265* If JOBZ = 'N', then Z is not referenced.
266*
267* IZ (global input) INTEGER
268* The row index in the global array Z indicating the first
269* row of sub( Z ).
270*
271* JZ (global input) INTEGER
272* The column index in the global array Z indicating the
273* first column of sub( Z ).
274*
275* DESCZ (global and local input) INTEGER array of dimension DLEN_.
276* The array descriptor for the distributed matrix Z.
277* DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
278*
279* WORK (local workspace/output) REAL array,
280* dimension max(3,LWORK)
281* if JOBZ='N' WORK(1) = optimal amount of workspace
282* required to compute eigenvalues efficiently
283* if JOBZ='V' WORK(1) = optimal amount of workspace
284* required to compute eigenvalues and eigenvectors
285* efficiently with no guarantee on orthogonality.
286* If RANGE='V', it is assumed that all eigenvectors
287* may be required.
288*
289* LWORK (local input) INTEGER
290* See below for definitions of variables used to define LWORK.
291* If no eigenvectors are requested (JOBZ = 'N') then
292* LWORK >= 5 * N + MAX( 5 * NN, NB * ( NP0 + 1 ) )
293* If eigenvectors are requested (JOBZ = 'V' ) then
294* the amount of workspace required to guarantee that all
295* eigenvectors are computed is:
296* LWORK >= 5 * N + MAX( 5*NN, NP0 * MQ0 + 2 * NB * NB ) +
297* ICEIL( NEIG, NPROW*NPCOL)*NN
298*
299* The computed eigenvectors may not be orthogonal if the
300* minimal workspace is supplied and ORFAC is too small.
301* If you want to guarantee orthogonality (at the cost
302* of potentially poor performance) you should add
303* the following to LWORK:
304* (CLUSTERSIZE-1)*N
305* where CLUSTERSIZE is the number of eigenvalues in the
306* largest cluster, where a cluster is defined as a set of
307* close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
308* W(J+1) <= W(J) + ORFAC*2*norm(A) }
309* Variable definitions:
310* NEIG = number of eigenvectors requested
311* NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) =
312* DESCZ( NB_ )
313* NN = MAX( N, NB, 2 )
314* DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
315* DESCZ( CSRC_ ) = 0
316* NP0 = NUMROC( NN, NB, 0, 0, NPROW )
317* MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
318* ICEIL( X, Y ) is a ScaLAPACK function returning
319* ceiling(X/Y)
320*
321* When LWORK is too small:
322* If LWORK is too small to guarantee orthogonality,
323* PSSYGVX attempts to maintain orthogonality in
324* the clusters with the smallest
325* spacing between the eigenvalues.
326* If LWORK is too small to compute all the eigenvectors
327* requested, no computation is performed and INFO=-23
328* is returned. Note that when RANGE='V', PSSYGVX does
329* not know how many eigenvectors are requested until
330* the eigenvalues are computed. Therefore, when RANGE='V'
331* and as long as LWORK is large enough to allow PSSYGVX to
332* compute the eigenvalues, PSSYGVX will compute the
333* eigenvalues and as many eigenvectors as it can.
334*
335* Relationship between workspace, orthogonality & performance:
336* Greater performance can be achieved if adequate workspace
337* is provided. On the other hand, in some situations,
338* performance can decrease as the workspace provided
339* increases above the workspace amount shown below:
340*
341* For optimal performance, greater workspace may be
342* needed, i.e.
343* LWORK >= MAX( LWORK, 5 * N + NSYTRD_LWOPT,
344* NSYGST_LWOPT )
345* Where:
346* LWORK, as defined previously, depends upon the number
347* of eigenvectors requested, and
348* NSYTRD_LWOPT = N + 2*( ANB+1 )*( 4*NPS+2 ) +
349* ( NPS + 3 ) * NPS
350* NSYGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB
351*
352* ANB = PJLAENV( DESCA( CTXT_), 3, 'PSSYTTRD', 'L',
353* 0, 0, 0, 0)
354* SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
355* NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
356* NB = DESCA( MB_ )
357* NP0 = NUMROC( N, NB, 0, 0, NPROW )
358* NQ0 = NUMROC( N, NB, 0, 0, NPCOL )
359*
360* NUMROC is a ScaLAPACK tool functions;
361* PJLAENV is a ScaLAPACK envionmental inquiry function
362* MYROW, MYCOL, NPROW and NPCOL can be determined by
363* calling the subroutine BLACS_GRIDINFO.
364*
365* For large N, no extra workspace is needed, however the
366* biggest boost in performance comes for small N, so it
367* is wise to provide the extra workspace (typically less
368* than a Megabyte per process).
369*
370* If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
371* enough space to compute all the eigenvectors
372* orthogonally will cause serious degradation in
373* performance. In the limit (i.e. CLUSTERSIZE = N-1)
374* PSSTEIN will perform no better than SSTEIN on 1 processor.
375* For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
376* all eigenvectors will increase the total execution time
377* by a factor of 2 or more.
378* For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
379* grow as the square of the cluster size, all other factors
380* remaining equal and assuming enough workspace. Less
381* workspace means less reorthogonalization but faster
382* execution.
383*
384* If LWORK = -1, then LWORK is global input and a workspace
385* query is assumed; the routine only calculates the size
386* required for optimal performance on all work arrays.
387* Each of these values is returned in the first entry of the
388* corresponding work array, and no error message is issued by
389* PXERBLA.
390*
391*
392* IWORK (local workspace) INTEGER array
393* On return, IWORK(1) contains the amount of integer workspace
394* required.
395*
396* LIWORK (local input) INTEGER
397* size of IWORK
398* LIWORK >= 6 * NNP
399* Where:
400* NNP = MAX( N, NPROW*NPCOL + 1, 4 )
401*
402* If LIWORK = -1, then LIWORK is global input and a workspace
403* query is assumed; the routine only calculates the minimum
404* and optimal size for all work arrays. Each of these
405* values is returned in the first entry of the corresponding
406* work array, and no error message is issued by PXERBLA.
407*
408* IFAIL (output) INTEGER array, dimension (N)
409* IFAIL provides additional information when INFO .NE. 0
410* If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of
411* the smallest minor which is not positive definite.
412* If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the
413* indices of the eigenvectors that failed to converge.
414*
415* If neither of the above error conditions hold and JOBZ = 'V',
416* then the first M elements of IFAIL are set to zero.
417*
418* ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
419* This array contains indices of eigenvectors corresponding to
420* a cluster of eigenvalues that could not be reorthogonalized
421* due to insufficient workspace (see LWORK, ORFAC and INFO).
422* Eigenvectors corresponding to clusters of eigenvalues indexed
423* ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
424* reorthogonalized due to lack of workspace. Hence the
425* eigenvectors corresponding to these clusters may not be
426* orthogonal. ICLUSTR() is a zero terminated array.
427* (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
428* K is the number of clusters
429* ICLUSTR is not referenced if JOBZ = 'N'
430*
431* GAP (global output) REAL array,
432* dimension (NPROW*NPCOL)
433* This array contains the gap between eigenvalues whose
434* eigenvectors could not be reorthogonalized. The output
435* values in this array correspond to the clusters indicated
436* by the array ICLUSTR. As a result, the dot product between
437* eigenvectors correspoding to the I^th cluster may be as high
438* as ( C * n ) / GAP(I) where C is a small constant.
439*
440* INFO (global output) INTEGER
441* = 0: successful exit
442* < 0: If the i-th argument is an array and the j-entry had
443* an illegal value, then INFO = -(i*100+j), if the i-th
444* argument is a scalar and had an illegal value, then
445* INFO = -i.
446* > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
447* failed to converge. Their indices are stored
448* in IFAIL. Send e-mail to scalapack@cs.utk.edu
449* if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
450* to one or more clusters of eigenvalues could not be
451* reorthogonalized because of insufficient workspace.
452* The indices of the clusters are stored in the array
453* ICLUSTR.
454* if (MOD(INFO/4,2).NE.0), then space limit prevented
455* PSSYGVX from computing all of the eigenvectors
456* between VL and VU. The number of eigenvectors
457* computed is returned in NZ.
458* if (MOD(INFO/8,2).NE.0), then PSSTEBZ failed to
459* compute eigenvalues.
460* Send e-mail to scalapack@cs.utk.edu
461* if (MOD(INFO/16,2).NE.0), then B was not positive
462* definite. IFAIL(1) indicates the order of
463* the smallest minor which is not positive definite.
464*
465* Alignment requirements
466* ======================
467*
468* The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1),
469* and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties,
470* namely the following expressions should be true:
471*
472* DESCA(MB_) = DESCA(NB_)
473* IA = IB = IZ
474* JA = IB = JZ
475* DESCA(M_) = DESCB(M_) =DESCZ(M_)
476* DESCA(N_) = DESCB(N_)= DESCZ(N_)
477* DESCA(MB_) = DESCB(MB_) = DESCZ(MB_)
478* DESCA(NB_) = DESCB(NB_) = DESCZ(NB_)
479* DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_)
480* DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_)
481* MOD( IA-1, DESCA( MB_ ) ) = 0
482* MOD( JA-1, DESCA( NB_ ) ) = 0
483* MOD( IB-1, DESCB( MB_ ) ) = 0
484* MOD( JB-1, DESCB( NB_ ) ) = 0
485*
486* =====================================================================
487*
488* .. Parameters ..
489 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
490 $ MB_, NB_, RSRC_, CSRC_, LLD_
491 PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
492 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
493 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
494 REAL ONE
495 parameter( one = 1.0e+0 )
496 REAL FIVE, ZERO
497 PARAMETER ( FIVE = 5.0e+0, zero = 0.0e+0 )
498 INTEGER IERRNPD
499 parameter( ierrnpd = 16 )
500* ..
501* .. Local Scalars ..
502 LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
503 CHARACTER TRANS
504 INTEGER ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA,
505 $ ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LWMIN,
506 $ lwopt, mq0, mycol, myrow, nb, neig, nn, np0,
507 $ npcol, nprow, nps, nq0, nsygst_lwopt,
508 $ nsytrd_lwopt, sqnpc
509 REAL EPS, SCALE
510* ..
511* .. Local Arrays ..
512 INTEGER IDUM1( 5 ), IDUM2( 5 )
513* ..
514* .. External Functions ..
515 LOGICAL LSAME
516 INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
517 REAL PSLAMCH
518 EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PSLAMCH
519* ..
520* .. External Subroutines ..
521 EXTERNAL blacs_gridinfo, chk1mat, pchk1mat, pchk2mat,
522 $ pspotrf, pssyevx, pssyngst, pstrmm, pstrsm,
523 $ pxerbla, sgebr2d, sgebs2d, sscal
524* ..
525* .. Intrinsic Functions ..
526 INTRINSIC abs, dble, ichar, int, max, min, mod, real,
527 $ sqrt
528* ..
529* .. Executable Statements ..
530* This is just to keep ftnchek and toolpack/1 happy
531 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
532 $ rsrc_.LT.0 )RETURN
533*
534* Get grid parameters
535*
536 ictxt = desca( ctxt_ )
537 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
538*
539* Test the input parameters
540*
541 info = 0
542 IF( nprow.EQ.-1 ) THEN
543 info = -( 900+ctxt_ )
544 ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
545 info = -( 1300+ctxt_ )
546 ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
547 info = -( 2600+ctxt_ )
548 ELSE
549*
550* Get machine constants.
551*
552 eps = pslamch( desca( ctxt_ ), 'Precision' )
553*
554 wantz = lsame( jobz, 'V' )
555 upper = lsame( uplo, 'U' )
556 alleig = lsame( range, 'A' )
557 valeig = lsame( range, 'V' )
558 indeig = lsame( range, 'I' )
559 CALL chk1mat( n, 4, n, 4, ia, ja, desca, 9, info )
560 CALL chk1mat( n, 4, n, 4, ib, jb, descb, 13, info )
561 CALL chk1mat( n, 4, n, 4, iz, jz, descz, 26, info )
562 IF( info.EQ.0 ) THEN
563 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
564 work( 1 ) = abstol
565 IF( valeig ) THEN
566 work( 2 ) = vl
567 work( 3 ) = vu
568 ELSE
569 work( 2 ) = zero
570 work( 3 ) = zero
571 END IF
572 CALL sgebs2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, work, 3 )
573 ELSE
574 CALL sgebr2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, work, 3,
575 $ 0, 0 )
576 END IF
577 iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
578 $ nprow )
579 ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
580 $ nprow )
581 iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
582 $ npcol )
583 ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
584 $ npcol )
585 iroffa = mod( ia-1, desca( mb_ ) )
586 icoffa = mod( ja-1, desca( nb_ ) )
587 iroffb = mod( ib-1, descb( mb_ ) )
588 icoffb = mod( jb-1, descb( nb_ ) )
589*
590* Compute the total amount of space needed
591*
592 lquery = .false.
593 IF( lwork.EQ.-1 .OR. liwork.EQ.-1 )
594 $ lquery = .true.
595*
596 liwmin = 6*max( n, ( nprow*npcol )+1, 4 )
597*
598 nb = desca( mb_ )
599 nn = max( n, nb, 2 )
600 np0 = numroc( nn, nb, 0, 0, nprow )
601*
602 IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
603 $ THEN
604 lwmin = 5*n + max( 5*nn, nb*( np0+1 ) )
605 IF( wantz ) THEN
606 mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
607 lwopt = 5*n + max( 5*nn, np0*mq0+2*nb*nb )
608 ELSE
609 lwopt = lwmin
610 END IF
611 neig = 0
612 ELSE
613 IF( alleig .OR. valeig ) THEN
614 neig = n
615 ELSE IF( indeig ) THEN
616 neig = iu - il + 1
617 END IF
618 mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
619 lwmin = 5*n + max( 5*nn, np0*mq0+2*nb*nb ) +
620 $ iceil( neig, nprow*npcol )*nn
621 lwopt = lwmin
622*
623 END IF
624*
625* Compute how much workspace is needed to use the
626* new TRD and GST algorithms
627*
628 anb = pjlaenv( ictxt, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
629 sqnpc = int( sqrt( dble( nprow*npcol ) ) )
630 nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
631 nsytrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
632 nb = desca( mb_ )
633 np0 = numroc( n, nb, 0, 0, nprow )
634 nq0 = numroc( n, nb, 0, 0, npcol )
635 nsygst_lwopt = 2*np0*nb + nq0*nb + nb*nb
636 lwopt = max( lwopt, n+nsytrd_lwopt, nsygst_lwopt )
637*
638* Version 1.0 Limitations
639*
640 IF( ibtype.LT.1 .OR. ibtype.GT.3 ) THEN
641 info = -1
642 ELSE IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
643 info = -2
644 ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
645 info = -3
646 ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
647 info = -4
648 ELSE IF( n.LT.0 ) THEN
649 info = -5
650 ELSE IF( iroffa.NE.0 ) THEN
651 info = -7
652 ELSE IF( icoffa.NE.0 ) THEN
653 info = -8
654 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
655 info = -( 900+nb_ )
656 ELSE IF( desca( m_ ).NE.descb( m_ ) ) THEN
657 info = -( 1300+m_ )
658 ELSE IF( desca( n_ ).NE.descb( n_ ) ) THEN
659 info = -( 1300+n_ )
660 ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
661 info = -( 1300+mb_ )
662 ELSE IF( desca( nb_ ).NE.descb( nb_ ) ) THEN
663 info = -( 1300+nb_ )
664 ELSE IF( desca( rsrc_ ).NE.descb( rsrc_ ) ) THEN
665 info = -( 1300+rsrc_ )
666 ELSE IF( desca( csrc_ ).NE.descb( csrc_ ) ) THEN
667 info = -( 1300+csrc_ )
668 ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
669 info = -( 1300+ctxt_ )
670 ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
671 info = -( 2200+m_ )
672 ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
673 info = -( 2200+n_ )
674 ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
675 info = -( 2200+mb_ )
676 ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
677 info = -( 2200+nb_ )
678 ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
679 info = -( 2200+rsrc_ )
680 ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
681 info = -( 2200+csrc_ )
682 ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
683 info = -( 2200+ctxt_ )
684 ELSE IF( iroffb.NE.0 .OR. ibrow.NE.iarow ) THEN
685 info = -11
686 ELSE IF( icoffb.NE.0 .OR. ibcol.NE.iacol ) THEN
687 info = -12
688 ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
689 info = -15
690 ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
691 $ THEN
692 info = -16
693 ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
694 $ THEN
695 info = -17
696 ELSE IF( valeig .AND. ( abs( work( 2 )-vl ).GT.five*eps*
697 $ abs( vl ) ) ) THEN
698 info = -14
699 ELSE IF( valeig .AND. ( abs( work( 3 )-vu ).GT.five*eps*
700 $ abs( vu ) ) ) THEN
701 info = -15
702 ELSE IF( abs( work( 1 )-abstol ).GT.five*eps*abs( abstol ) )
703 $ THEN
704 info = -18
705 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
706 info = -28
707 ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
708 info = -30
709 END IF
710 END IF
711 idum1( 1 ) = ibtype
712 idum2( 1 ) = 1
713 IF( wantz ) THEN
714 idum1( 2 ) = ichar( 'V' )
715 ELSE
716 idum1( 2 ) = ichar( 'N' )
717 END IF
718 idum2( 2 ) = 2
719 IF( upper ) THEN
720 idum1( 3 ) = ichar( 'U' )
721 ELSE
722 idum1( 3 ) = ichar( 'L' )
723 END IF
724 idum2( 3 ) = 3
725 IF( alleig ) THEN
726 idum1( 4 ) = ichar( 'A' )
727 ELSE IF( indeig ) THEN
728 idum1( 4 ) = ichar( 'I' )
729 ELSE
730 idum1( 4 ) = ichar( 'V' )
731 END IF
732 idum2( 4 ) = 4
733 IF( lquery ) THEN
734 idum1( 5 ) = -1
735 ELSE
736 idum1( 5 ) = 1
737 END IF
738 idum2( 5 ) = 5
739 CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 9, n, 4, n, 4, ib,
740 $ jb, descb, 13, 5, idum1, idum2, info )
741 CALL pchk1mat( n, 4, n, 4, iz, jz, descz, 26, 0, idum1, idum2,
742 $ info )
743 END IF
744*
745 iwork( 1 ) = liwmin
746 work( 1 ) = real( lwopt )
747*
748 IF( info.NE.0 ) THEN
749 CALL pxerbla( ictxt, 'PSSYGVX ', -info )
750 RETURN
751 ELSE IF( lquery ) THEN
752 RETURN
753 END IF
754*
755* Form a Cholesky factorization of sub( B ).
756*
757 CALL pspotrf( uplo, n, b, ib, jb, descb, info )
758 IF( info.NE.0 ) THEN
759 iwork( 1 ) = liwmin
760 work( 1 ) = real( lwopt )
761 ifail( 1 ) = info
762 info = ierrnpd
763 RETURN
764 END IF
765*
766* Transform problem to standard eigenvalue problem and solve.
767*
768 CALL pssyngst( ibtype, uplo, n, a, ia, ja, desca, b, ib, jb,
769 $ descb, scale, work, lwork, info )
770 CALL pssyevx( jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il,
771 $ iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work,
772 $ lwork, iwork, liwork, ifail, iclustr, gap, info )
773*
774 IF( wantz ) THEN
775*
776* Backtransform eigenvectors to the original problem.
777*
778 neig = m
779 IF( ibtype.EQ.1 .OR. ibtype.EQ.2 ) THEN
780*
781* For sub( A )*x=(lambda)*sub( B )*x and
782* sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors:
783* x = inv(L)'*y or inv(U)*y
784*
785 IF( upper ) THEN
786 trans = 'N'
787 ELSE
788 trans = 'T'
789 END IF
790*
791 CALL pstrsm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
792 $ b, ib, jb, descb, z, iz, jz, descz )
793*
794 ELSE IF( ibtype.EQ.3 ) THEN
795*
796* For sub( B )*sub( A )*x=(lambda)*x;
797* backtransform eigenvectors: x = L*y or U'*y
798*
799 IF( upper ) THEN
800 trans = 'T'
801 ELSE
802 trans = 'N'
803 END IF
804*
805 CALL pstrmm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
806 $ b, ib, jb, descb, z, iz, jz, descz )
807 END IF
808 END IF
809*
810 IF( scale.NE.one ) THEN
811 CALL sscal( n, scale, w, 1 )
812 END IF
813*
814 iwork( 1 ) = liwmin
815 work( 1 ) = real( lwopt )
816 RETURN
817*
818* End of PSSYGVX
819*
820 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
subroutine pspotrf(uplo, n, a, ia, ja, desca, info)
Definition pspotrf.f:2
subroutine pssyevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work, lwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pssyevx.f:5
subroutine pssygvx(ibtype, jobz, range, uplo, n, a, ia, ja, desca, b, ib, jb, descb, vl, vu, il, iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work, lwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pssygvx.f:6
subroutine pssyngst(ibtype, uplo, n, a, ia, ja, desca, b, ib, jb, descb, scale, work, lwork, info)
Definition pssyngst.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2