SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pchegvx.f
Go to the documentation of this file.
1 SUBROUTINE pchegvx( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA,
2 $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU,
3 $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ,
4 $ WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK,
5 $ IFAIL, ICLUSTR, GAP, INFO )
6*
7* -- ScaLAPACK routine (version 1.7) --
8* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9* and University of California, Berkeley.
10* October 15, 1999
11*
12* .. Scalar Arguments ..
13 CHARACTER JOBZ, RANGE, UPLO
14 INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ,
15 $ LIWORK, LRWORK, LWORK, M, N, NZ
16 REAL ABSTOL, ORFAC, VL, VU
17* ..
18* .. Array Arguments ..
19*
20 INTEGER DESCA( * ), DESCB( * ), DESCZ( * ),
21 $ ICLUSTR( * ), IFAIL( * ), IWORK( * )
22 REAL GAP( * ), RWORK( * ), W( * )
23 COMPLEX A( * ), B( * ), WORK( * ), Z( * )
24* ..
25*
26* Purpose
27*
28* =======
29*
30* PCHEGVX computes all the eigenvalues, and optionally,
31* the eigenvectors
32* of a complex generalized Hermitian-definite eigenproblem, of the form
33* sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or
34* sub( B )*sub( A )*x=(lambda)*x.
35* Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be
36* Hermitian, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed
37* to be Hermitian positive definite.
38*
39* Notes
40* =====
41*
42*
43* Each global data object is described by an associated description
44* vector. This vector stores the information required to establish
45* the mapping between an object element and its corresponding process
46* and memory location.
47*
48* Let A be a generic term for any 2D block cyclicly distributed array.
49* Such a global array has an associated description vector DESCA.
50* In the following comments, the character _ should be read as
51* "of the global array".
52*
53* NOTATION STORED IN EXPLANATION
54* --------------- -------------- --------------------------------------
55* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
56* DTYPE_A = 1.
57* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
58* the BLACS process grid A is distribu-
59* ted over. The context itself is glo-
60* bal, but the handle (the integer
61* value) may vary.
62* M_A (global) DESCA( M_ ) The number of rows in the global
63* array A.
64* N_A (global) DESCA( N_ ) The number of columns in the global
65* array A.
66* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
67* the rows of the array.
68* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
69* the columns of the array.
70* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
71* row of the array A is distributed.
72* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
73* first column of the array A is
74* distributed.
75* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
76* array. LLD_A >= MAX(1,LOCr(M_A)).
77*
78* Let K be the number of rows or columns of a distributed matrix,
79* and assume that its process grid has dimension p x q.
80* LOCr( K ) denotes the number of elements of K that a process
81* would receive if K were distributed over the p processes of its
82* process column.
83* Similarly, LOCc( K ) denotes the number of elements of K that a
84* process would receive if K were distributed over the q processes of
85* its process row.
86* The values of LOCr() and LOCc() may be determined via a call to the
87* ScaLAPACK tool function, NUMROC:
88* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
89* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
90* An upper bound for these quantities may be computed by:
91* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
92* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
93*
94*
95* Arguments
96* =========
97*
98* IBTYPE (global input) INTEGER
99* Specifies the problem type to be solved:
100* = 1: sub( A )*x = (lambda)*sub( B )*x
101* = 2: sub( A )*sub( B )*x = (lambda)*x
102* = 3: sub( B )*sub( A )*x = (lambda)*x
103*
104* JOBZ (global input) CHARACTER*1
105* = 'N': Compute eigenvalues only;
106* = 'V': Compute eigenvalues and eigenvectors.
107*
108* RANGE (global input) CHARACTER*1
109* = 'A': all eigenvalues will be found.
110* = 'V': all eigenvalues in the interval [VL,VU] will be found.
111* = 'I': the IL-th through IU-th eigenvalues will be found.
112*
113* UPLO (global input) CHARACTER*1
114* = 'U': Upper triangles of sub( A ) and sub( B ) are stored;
115* = 'L': Lower triangles of sub( A ) and sub( B ) are stored.
116*
117* N (global input) INTEGER
118* The order of the matrices sub( A ) and sub( B ). N >= 0.
119*
120* A (local input/local output) COMPLEX pointer into the
121* local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122* On entry, this array contains the local pieces of the
123* N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U',
124* the leading N-by-N upper triangular part of sub( A ) contains
125* the upper triangular part of the matrix. If UPLO = 'L', the
126* leading N-by-N lower triangular part of sub( A ) contains
127* the lower triangular part of the matrix.
128*
129* On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains
130* the distributed matrix Z of eigenvectors. The eigenvectors
131* are normalized as follows:
132* if IBTYPE = 1 or 2, Z**H*sub( B )*Z = I;
133* if IBTYPE = 3, Z**H*inv( sub( B ) )*Z = I.
134* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
135* or the lower triangle (if UPLO='L') of sub( A ), including
136* the diagonal, is destroyed.
137*
138* IA (global input) INTEGER
139* The row index in the global array A indicating the first
140* row of sub( A ).
141*
142* JA (global input) INTEGER
143* The column index in the global array A indicating the
144* first column of sub( A ).
145*
146* DESCA (global and local input) INTEGER array of dimension DLEN_.
147* The array descriptor for the distributed matrix A.
148* If DESCA( CTXT_ ) is incorrect, PCHEGVX cannot guarantee
149* correct error reporting.
150*
151* B (local input/local output) COMPLEX pointer into the
152* local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
153* On entry, this array contains the local pieces of the
154* N-by-N Hermitian distributed matrix sub( B ). If UPLO = 'U',
155* the leading N-by-N upper triangular part of sub( B ) contains
156* the upper triangular part of the matrix. If UPLO = 'L', the
157* leading N-by-N lower triangular part of sub( B ) contains
158* the lower triangular part of the matrix.
159*
160* On exit, if INFO <= N, the part of sub( B ) containing the
161* matrix is overwritten by the triangular factor U or L from
162* the Cholesky factorization sub( B ) = U**H*U or
163* sub( B ) = L*L**H.
164*
165* IB (global input) INTEGER
166* The row index in the global array B indicating the first
167* row of sub( B ).
168*
169* JB (global input) INTEGER
170* The column index in the global array B indicating the
171* first column of sub( B ).
172*
173* DESCB (global and local input) INTEGER array of dimension DLEN_.
174* The array descriptor for the distributed matrix B.
175* DESCB( CTXT_ ) must equal DESCA( CTXT_ )
176*
177* VL (global input) REAL
178* If RANGE='V', the lower bound of the interval to be searched
179* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
180*
181* VU (global input) REAL
182* If RANGE='V', the upper bound of the interval to be searched
183* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
184*
185* IL (global input) INTEGER
186* If RANGE='I', the index (from smallest to largest) of the
187* smallest eigenvalue to be returned. IL >= 1.
188* Not referenced if RANGE = 'A' or 'V'.
189*
190* IU (global input) INTEGER
191* If RANGE='I', the index (from smallest to largest) of the
192* largest eigenvalue to be returned. min(IL,N) <= IU <= N.
193* Not referenced if RANGE = 'A' or 'V'.
194*
195* ABSTOL (global input) REAL
196* If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields
197* the most orthogonal eigenvectors.
198*
199* The absolute error tolerance for the eigenvalues.
200* An approximate eigenvalue is accepted as converged
201* when it is determined to lie in an interval [a,b]
202* of width less than or equal to
203*
204* ABSTOL + EPS * max( |a|,|b| ) ,
205*
206* where EPS is the machine precision. If ABSTOL is less than
207* or equal to zero, then EPS*norm(T) will be used in its place,
208* where norm(T) is the 1-norm of the tridiagonal matrix
209* obtained by reducing A to tridiagonal form.
210*
211* Eigenvalues will be computed most accurately when ABSTOL is
212* set to twice the underflow threshold 2*PSLAMCH('S') not zero.
213* If this routine returns with ((MOD(INFO,2).NE.0) .OR.
214* (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
215* eigenvectors did not converge, try setting ABSTOL to
216* 2*PSLAMCH('S').
217*
218* See "Computing Small Singular Values of Bidiagonal Matrices
219* with Guaranteed High Relative Accuracy," by Demmel and
220* Kahan, LAPACK Working Note #3.
221*
222* See "On the correctness of Parallel Bisection in Floating
223* Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
224*
225* M (global output) INTEGER
226* Total number of eigenvalues found. 0 <= M <= N.
227*
228* NZ (global output) INTEGER
229* Total number of eigenvectors computed. 0 <= NZ <= M.
230* The number of columns of Z that are filled.
231* If JOBZ .NE. 'V', NZ is not referenced.
232* If JOBZ .EQ. 'V', NZ = M unless the user supplies
233* insufficient space and PCHEGVX is not able to detect this
234* before beginning computation. To get all the eigenvectors
235* requested, the user must supply both sufficient
236* space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
237* and sufficient workspace to compute them. (See LWORK below.)
238* PCHEGVX is always able to detect insufficient space without
239* computation unless RANGE .EQ. 'V'.
240*
241* W (global output) REAL array, dimension (N)
242* On normal exit, the first M entries contain the selected
243* eigenvalues in ascending order.
244*
245* ORFAC (global input) REAL
246* Specifies which eigenvectors should be reorthogonalized.
247* Eigenvectors that correspond to eigenvalues which are within
248* tol=ORFAC*norm(A) of each other are to be reorthogonalized.
249* However, if the workspace is insufficient (see LWORK),
250* tol may be decreased until all eigenvectors to be
251* reorthogonalized can be stored in one process.
252* No reorthogonalization will be done if ORFAC equals zero.
253* A default value of 10^-3 is used if ORFAC is negative.
254* ORFAC should be identical on all processes.
255*
256* Z (local output) COMPLEX array,
257* global dimension (N, N),
258* local dimension ( LLD_Z, LOCc(JZ+N-1) )
259* If JOBZ = 'V', then on normal exit the first M columns of Z
260* contain the orthonormal eigenvectors of the matrix
261* corresponding to the selected eigenvalues. If an eigenvector
262* fails to converge, then that column of Z contains the latest
263* approximation to the eigenvector, and the index of the
264* eigenvector is returned in IFAIL.
265* If JOBZ = 'N', then Z is not referenced.
266*
267* IZ (global input) INTEGER
268* The row index in the global array Z indicating the first
269* row of sub( Z ).
270*
271* JZ (global input) INTEGER
272* The column index in the global array Z indicating the
273* first column of sub( Z ).
274*
275* DESCZ (global and local input) INTEGER array of dimension DLEN_.
276* The array descriptor for the distributed matrix Z.
277* DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
278*
279* WORK (local workspace/output) COMPLEX array,
280* dimension (LWORK)
281* WORK(1) returns the optimal workspace.
282*
283* LWORK (local input) INTEGER
284* Size of WORK array. If only eigenvalues are requested:
285* LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 )
286* If eigenvectors are requested:
287* LWORK >= N + ( NP0 + MQ0 + NB ) * NB
288* with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ).
289*
290* For optimal performance, greater workspace is needed, i.e.
291* LWORK >= MAX( LWORK, N + NHETRD_LWOPT,
292* NHEGST_LWOPT )
293* Where LWORK is as defined above, and
294* NHETRD_LWORK = 2*( ANB+1 )*( 4*NPS+2 ) +
295* ( NPS + 1 ) * NPS
296* NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB
297*
298* NB = DESCA( MB_ )
299* NP0 = NUMROC( N, NB, 0, 0, NPROW )
300* NQ0 = NUMROC( N, NB, 0, 0, NPCOL )
301* ICTXT = DESCA( CTXT_ )
302* ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 )
303* SQNPC = SQRT( DBLE( NPROW * NPCOL ) )
304* NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
305*
306* NUMROC is a ScaLAPACK tool functions;
307* PJLAENV is a ScaLAPACK envionmental inquiry function
308* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
309* the subroutine BLACS_GRIDINFO.
310*
311* If LWORK = -1, then LWORK is global input and a workspace
312* query is assumed; the routine only calculates the optimal
313* size for all work arrays. Each of these values is returned
314* in the first entry of the correspondingwork array, and no
315* error message is issued by PXERBLA.
316*
317* RWORK (local workspace/output) REAL array,
318* dimension max(3,LRWORK)
319* On return, RWORK(1) contains the amount of workspace
320* required for optimal efficiency
321* if JOBZ='N' RWORK(1) = optimal amount of workspace
322* required to compute eigenvalues efficiently
323* if JOBZ='V' RWORK(1) = optimal amount of workspace
324* required to compute eigenvalues and eigenvectors
325* efficiently with no guarantee on orthogonality.
326* If RANGE='V', it is assumed that all eigenvectors
327* may be required when computing optimal workspace.
328*
329* LRWORK (local input) INTEGER
330* Size of RWORK
331* See below for definitions of variables used to define LRWORK.
332* If no eigenvectors are requested (JOBZ = 'N') then
333* LRWORK >= 5 * NN + 4 * N
334* If eigenvectors are requested (JOBZ = 'V' ) then
335* the amount of workspace required to guarantee that all
336* eigenvectors are computed is:
337* LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) +
338* ICEIL( NEIG, NPROW*NPCOL)*NN
339*
340* The computed eigenvectors may not be orthogonal if the
341* minimal workspace is supplied and ORFAC is too small.
342* If you want to guarantee orthogonality (at the cost
343* of potentially poor performance) you should add
344* the following to LRWORK:
345* (CLUSTERSIZE-1)*N
346* where CLUSTERSIZE is the number of eigenvalues in the
347* largest cluster, where a cluster is defined as a set of
348* close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
349* W(J+1) <= W(J) + ORFAC*2*norm(A) }
350* Variable definitions:
351* NEIG = number of eigenvectors requested
352* NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) =
353* DESCZ( NB_ )
354* NN = MAX( N, NB, 2 )
355* DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
356* DESCZ( CSRC_ ) = 0
357* NP0 = NUMROC( NN, NB, 0, 0, NPROW )
358* MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
359* ICEIL( X, Y ) is a ScaLAPACK function returning
360* ceiling(X/Y)
361*
362* When LRWORK is too small:
363* If LRWORK is too small to guarantee orthogonality,
364* PCHEGVX attempts to maintain orthogonality in
365* the clusters with the smallest
366* spacing between the eigenvalues.
367* If LRWORK is too small to compute all the eigenvectors
368* requested, no computation is performed and INFO=-25
369* is returned. Note that when RANGE='V', PCHEGVX does
370* not know how many eigenvectors are requested until
371* the eigenvalues are computed. Therefore, when RANGE='V'
372* and as long as LRWORK is large enough to allow PCHEGVX to
373* compute the eigenvalues, PCHEGVX will compute the
374* eigenvalues and as many eigenvectors as it can.
375*
376* Relationship between workspace, orthogonality & performance:
377* If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
378* enough space to compute all the eigenvectors
379* orthogonally will cause serious degradation in
380* performance. In the limit (i.e. CLUSTERSIZE = N-1)
381* PCSTEIN will perform no better than CSTEIN on 1 processor.
382* For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
383* all eigenvectors will increase the total execution time
384* by a factor of 2 or more.
385* For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
386* grow as the square of the cluster size, all other factors
387* remaining equal and assuming enough workspace. Less
388* workspace means less reorthogonalization but faster
389* execution.
390*
391* If LRWORK = -1, then LRWORK is global input and a workspace
392* query is assumed; the routine only calculates the minimum
393* and optimal size for all work arrays. Each of these
394* values is returned in the first entry of the corresponding
395* work array, and no error message is issued by PXERBLA.
396*
397* IWORK (local workspace) INTEGER array
398* On return, IWORK(1) contains the amount of integer workspace
399* required.
400*
401* LIWORK (local input) INTEGER
402* size of IWORK
403* LIWORK >= 6 * NNP
404* Where:
405* NNP = MAX( N, NPROW*NPCOL + 1, 4 )
406*
407* If LIWORK = -1, then LIWORK is global input and a workspace
408* query is assumed; the routine only calculates the minimum
409* and optimal size for all work arrays. Each of these
410* values is returned in the first entry of the corresponding
411* work array, and no error message is issued by PXERBLA.
412*
413* IFAIL (output) INTEGER array, dimension (N)
414* IFAIL provides additional information when INFO .NE. 0
415* If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of
416* the smallest minor which is not positive definite.
417* If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the
418* indices of the eigenvectors that failed to converge.
419*
420* If neither of the above error conditions hold and JOBZ = 'V',
421* then the first M elements of IFAIL are set to zero.
422*
423* ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
424* This array contains indices of eigenvectors corresponding to
425* a cluster of eigenvalues that could not be reorthogonalized
426* due to insufficient workspace (see LWORK, ORFAC and INFO).
427* Eigenvectors corresponding to clusters of eigenvalues indexed
428* ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
429* reorthogonalized due to lack of workspace. Hence the
430* eigenvectors corresponding to these clusters may not be
431* orthogonal. ICLUSTR() is a zero terminated array.
432* (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
433* K is the number of clusters
434* ICLUSTR is not referenced if JOBZ = 'N'
435*
436* GAP (global output) REAL array,
437* dimension (NPROW*NPCOL)
438* This array contains the gap between eigenvalues whose
439* eigenvectors could not be reorthogonalized. The output
440* values in this array correspond to the clusters indicated
441* by the array ICLUSTR. As a result, the dot product between
442* eigenvectors correspoding to the I^th cluster may be as high
443* as ( C * n ) / GAP(I) where C is a small constant.
444*
445* INFO (global output) INTEGER
446* = 0: successful exit
447* < 0: If the i-th argument is an array and the j-entry had
448* an illegal value, then INFO = -(i*100+j), if the i-th
449* argument is a scalar and had an illegal value, then
450* INFO = -i.
451* > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
452* failed to converge. Their indices are stored
453* in IFAIL. Send e-mail to scalapack@cs.utk.edu
454* if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
455* to one or more clusters of eigenvalues could not be
456* reorthogonalized because of insufficient workspace.
457* The indices of the clusters are stored in the array
458* ICLUSTR.
459* if (MOD(INFO/4,2).NE.0), then space limit prevented
460* PCHEGVX from computing all of the eigenvectors
461* between VL and VU. The number of eigenvectors
462* computed is returned in NZ.
463* if (MOD(INFO/8,2).NE.0), then PCSTEBZ failed to
464* compute eigenvalues.
465* Send e-mail to scalapack@cs.utk.edu
466* if (MOD(INFO/16,2).NE.0), then B was not positive
467* definite. IFAIL(1) indicates the order of
468* the smallest minor which is not positive definite.
469*
470* Alignment requirements
471* ======================
472*
473* The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1),
474* and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties,
475* namely the following expressions should be true:
476*
477* DESCA(MB_) = DESCA(NB_)
478* IA = IB = IZ
479* JA = IB = JZ
480* DESCA(M_) = DESCB(M_) =DESCZ(M_)
481* DESCA(N_) = DESCB(N_)= DESCZ(N_)
482* DESCA(MB_) = DESCB(MB_) = DESCZ(MB_)
483* DESCA(NB_) = DESCB(NB_) = DESCZ(NB_)
484* DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_)
485* DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_)
486* MOD( IA-1, DESCA( MB_ ) ) = 0
487* MOD( JA-1, DESCA( NB_ ) ) = 0
488* MOD( IB-1, DESCB( MB_ ) ) = 0
489* MOD( JB-1, DESCB( NB_ ) ) = 0
490*
491* =====================================================================
492*
493* .. Parameters ..
494 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
495 $ MB_, NB_, RSRC_, CSRC_, LLD_
496 PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
497 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
498 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
499 COMPLEX ONE
500 parameter( one = 1.0e+0 )
501 REAL FIVE, ZERO
502 PARAMETER ( FIVE = 5.0e+0, zero = 0.0e+0 )
503 INTEGER IERRNPD
504 parameter( ierrnpd = 16 )
505* ..
506* .. Local Scalars ..
507 LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
508 CHARACTER TRANS
509 INTEGER ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA,
510 $ ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LRWMIN,
511 $ lrwopt, lwmin, lwopt, mq0, mycol, myrow, nb,
512 $ neig, nhegst_lwopt, nhetrd_lwopt, nn, np0,
513 $ npcol, nprow, nps, nq0, sqnpc
514 REAL EPS, SCALE
515* ..
516* .. Local Arrays ..
517 INTEGER IDUM1( 5 ), IDUM2( 5 )
518* ..
519* .. External Functions ..
520 LOGICAL LSAME
521 INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
522 REAL PSLAMCH
523 EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PSLAMCH
524* ..
525* .. External Subroutines ..
526 EXTERNAL blacs_gridinfo, chk1mat, pcheevx, pchengst,
527 $ pchk1mat, pchk2mat, pcpotrf, pctrmm, pctrsm,
528 $ pxerbla, sgebr2d, sgebs2d, sscal
529* ..
530* .. Intrinsic Functions ..
531 INTRINSIC abs, cmplx, dble, ichar, int, max, min, mod,
532 $ real, sqrt
533* ..
534* .. Executable Statements ..
535* This is just to keep ftnchek and toolpack/1 happy
536 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
537 $ rsrc_.LT.0 )RETURN
538*
539* Get grid parameters
540*
541 ictxt = desca( ctxt_ )
542 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
543*
544* Test the input parameters
545*
546 info = 0
547 IF( nprow.EQ.-1 ) THEN
548 info = -( 900+ctxt_ )
549 ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
550 info = -( 1300+ctxt_ )
551 ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
552 info = -( 2600+ctxt_ )
553 ELSE
554*
555* Get machine constants.
556*
557 eps = pslamch( desca( ctxt_ ), 'Precision' )
558*
559 wantz = lsame( jobz, 'V' )
560 upper = lsame( uplo, 'U' )
561 alleig = lsame( range, 'A' )
562 valeig = lsame( range, 'V' )
563 indeig = lsame( range, 'I' )
564 CALL chk1mat( n, 4, n, 4, ia, ja, desca, 9, info )
565 CALL chk1mat( n, 4, n, 4, ib, jb, descb, 13, info )
566 CALL chk1mat( n, 4, n, 4, iz, jz, descz, 26, info )
567 IF( info.EQ.0 ) THEN
568 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
569 rwork( 1 ) = abstol
570 IF( valeig ) THEN
571 rwork( 2 ) = vl
572 rwork( 3 ) = vu
573 ELSE
574 rwork( 2 ) = zero
575 rwork( 3 ) = zero
576 END IF
577 CALL sgebs2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, rwork,
578 $ 3 )
579 ELSE
580 CALL sgebr2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, rwork, 3,
581 $ 0, 0 )
582 END IF
583 iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
584 $ nprow )
585 ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
586 $ nprow )
587 iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
588 $ npcol )
589 ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
590 $ npcol )
591 iroffa = mod( ia-1, desca( mb_ ) )
592 icoffa = mod( ja-1, desca( nb_ ) )
593 iroffb = mod( ib-1, descb( mb_ ) )
594 icoffb = mod( jb-1, descb( nb_ ) )
595*
596* Compute the total amount of space needed
597*
598 lquery = .false.
599 IF( lwork.EQ.-1 .OR. liwork.EQ.-1 .OR. lrwork.EQ.-1 )
600 $ lquery = .true.
601*
602 liwmin = 6*max( n, ( nprow*npcol )+1, 4 )
603*
604 nb = desca( mb_ )
605 nn = max( n, nb, 2 )
606 np0 = numroc( nn, nb, 0, 0, nprow )
607*
608 IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
609 $ THEN
610 lwmin = n + max( nb*( np0+1 ), 3 )
611 lwopt = lwmin
612 lrwmin = 5*nn + 4*n
613 IF( wantz ) THEN
614 mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
615 lrwopt = 4*n + max( 5*nn, np0*mq0 )
616 ELSE
617 lrwopt = lrwmin
618 END IF
619 neig = 0
620 ELSE
621 IF( alleig .OR. valeig ) THEN
622 neig = n
623 ELSE IF( indeig ) THEN
624 neig = iu - il + 1
625 END IF
626 mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
627 lwmin = n + ( np0+mq0+nb )*nb
628 lwopt = lwmin
629 lrwmin = 4*n + max( 5*nn, np0*mq0 ) +
630 $ iceil( neig, nprow*npcol )*nn
631 lrwopt = lrwmin
632*
633 END IF
634*
635* Compute how much workspace is needed to use the
636* new TRD and GST algorithms
637*
638 anb = pjlaenv( ictxt, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 )
639 sqnpc = int( sqrt( dble( nprow*npcol ) ) )
640 nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
641 nhetrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
642 nb = desca( mb_ )
643 np0 = numroc( n, nb, 0, 0, nprow )
644 nq0 = numroc( n, nb, 0, 0, npcol )
645 nhegst_lwopt = 2*np0*nb + nq0*nb + nb*nb
646 lwopt = max( lwopt, n+nhetrd_lwopt, nhegst_lwopt )
647*
648* Version 1.0 Limitations
649*
650 IF( ibtype.LT.1 .OR. ibtype.GT.3 ) THEN
651 info = -1
652 ELSE IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
653 info = -2
654 ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
655 info = -3
656 ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
657 info = -4
658 ELSE IF( n.LT.0 ) THEN
659 info = -5
660 ELSE IF( iroffa.NE.0 ) THEN
661 info = -7
662 ELSE IF( icoffa.NE.0 ) THEN
663 info = -8
664 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
665 info = -( 900+nb_ )
666 ELSE IF( desca( m_ ).NE.descb( m_ ) ) THEN
667 info = -( 1300+m_ )
668 ELSE IF( desca( n_ ).NE.descb( n_ ) ) THEN
669 info = -( 1300+n_ )
670 ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
671 info = -( 1300+mb_ )
672 ELSE IF( desca( nb_ ).NE.descb( nb_ ) ) THEN
673 info = -( 1300+nb_ )
674 ELSE IF( desca( rsrc_ ).NE.descb( rsrc_ ) ) THEN
675 info = -( 1300+rsrc_ )
676 ELSE IF( desca( csrc_ ).NE.descb( csrc_ ) ) THEN
677 info = -( 1300+csrc_ )
678 ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
679 info = -( 1300+ctxt_ )
680 ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
681 info = -( 2200+m_ )
682 ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
683 info = -( 2200+n_ )
684 ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
685 info = -( 2200+mb_ )
686 ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
687 info = -( 2200+nb_ )
688 ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
689 info = -( 2200+rsrc_ )
690 ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
691 info = -( 2200+csrc_ )
692 ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
693 info = -( 2200+ctxt_ )
694 ELSE IF( iroffb.NE.0 .OR. ibrow.NE.iarow ) THEN
695 info = -11
696 ELSE IF( icoffb.NE.0 .OR. ibcol.NE.iacol ) THEN
697 info = -12
698 ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
699 info = -15
700 ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
701 $ THEN
702 info = -16
703 ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
704 $ THEN
705 info = -17
706 ELSE IF( valeig .AND. ( abs( rwork( 2 )-vl ).GT.five*eps*
707 $ abs( vl ) ) ) THEN
708 info = -14
709 ELSE IF( valeig .AND. ( abs( rwork( 3 )-vu ).GT.five*eps*
710 $ abs( vu ) ) ) THEN
711 info = -15
712 ELSE IF( abs( rwork( 1 )-abstol ).GT.five*eps*
713 $ abs( abstol ) ) THEN
714 info = -18
715 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
716 info = -28
717 ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
718 info = -30
719 ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
720 info = -32
721 END IF
722 END IF
723 idum1( 1 ) = ibtype
724 idum2( 1 ) = 1
725 IF( wantz ) THEN
726 idum1( 2 ) = ichar( 'V' )
727 ELSE
728 idum1( 2 ) = ichar( 'N' )
729 END IF
730 idum2( 2 ) = 2
731 IF( upper ) THEN
732 idum1( 3 ) = ichar( 'U' )
733 ELSE
734 idum1( 3 ) = ichar( 'L' )
735 END IF
736 idum2( 3 ) = 3
737 IF( alleig ) THEN
738 idum1( 4 ) = ichar( 'A' )
739 ELSE IF( indeig ) THEN
740 idum1( 4 ) = ichar( 'I' )
741 ELSE
742 idum1( 4 ) = ichar( 'V' )
743 END IF
744 idum2( 4 ) = 4
745 IF( lquery ) THEN
746 idum1( 5 ) = -1
747 ELSE
748 idum1( 5 ) = 1
749 END IF
750 idum2( 5 ) = 5
751 CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 9, n, 4, n, 4, ib,
752 $ jb, descb, 13, 5, idum1, idum2, info )
753 CALL pchk1mat( n, 4, n, 4, iz, jz, descz, 26, 0, idum1, idum2,
754 $ info )
755 END IF
756*
757 iwork( 1 ) = liwmin
758 work( 1 ) = cmplx( real( lwopt ) )
759 rwork( 1 ) = real( lrwopt )
760*
761 IF( info.NE.0 ) THEN
762 CALL pxerbla( ictxt, 'PCHEGVX ', -info )
763 RETURN
764 ELSE IF( lquery ) THEN
765 RETURN
766 END IF
767*
768* Form a Cholesky factorization of sub( B ).
769*
770 CALL pcpotrf( uplo, n, b, ib, jb, descb, info )
771 IF( info.NE.0 ) THEN
772 iwork( 1 ) = liwmin
773 work( 1 ) = cmplx( real( lwopt ) )
774 rwork( 1 ) = real( lrwopt )
775 ifail( 1 ) = info
776 info = ierrnpd
777 RETURN
778 END IF
779*
780* Transform problem to standard eigenvalue problem and solve.
781*
782 CALL pchengst( ibtype, uplo, n, a, ia, ja, desca, b, ib, jb,
783 $ descb, scale, work, lwork, info )
784 CALL pcheevx( jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il,
785 $ iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work,
786 $ lwork, rwork, lrwork, iwork, liwork, ifail, iclustr,
787 $ gap, info )
788*
789 IF( wantz ) THEN
790*
791* Backtransform eigenvectors to the original problem.
792*
793 neig = m
794 IF( ibtype.EQ.1 .OR. ibtype.EQ.2 ) THEN
795*
796* For sub( A )*x=(lambda)*sub( B )*x and
797* sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors:
798* x = inv(L)'*y or inv(U)*y
799*
800 IF( upper ) THEN
801 trans = 'N'
802 ELSE
803 trans = 'C'
804 END IF
805*
806 CALL pctrsm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
807 $ b, ib, jb, descb, z, iz, jz, descz )
808*
809 ELSE IF( ibtype.EQ.3 ) THEN
810*
811* For sub( B )*sub( A )*x=(lambda)*x;
812* backtransform eigenvectors: x = L*y or U'*y
813*
814 IF( upper ) THEN
815 trans = 'C'
816 ELSE
817 trans = 'N'
818 END IF
819*
820 CALL pctrmm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
821 $ b, ib, jb, descb, z, iz, jz, descz )
822 END IF
823 END IF
824*
825 IF( scale.NE.one ) THEN
826 CALL sscal( n, scale, w, 1 )
827 END IF
828*
829 iwork( 1 ) = liwmin
830 work( 1 ) = cmplx( real( lwopt ) )
831 rwork( 1 ) = real( lrwopt )
832 RETURN
833*
834* End of PCHEGVX
835*
836 END
float cmplx[2]
Definition pblas.h:136
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pcheevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pcheevx.f:5
subroutine pchegvx(ibtype, jobz, range, uplo, n, a, ia, ja, desca, b, ib, jb, descb, vl, vu, il, iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pchegvx.f:6
subroutine pchengst(ibtype, uplo, n, a, ia, ja, desca, b, ib, jb, descb, scale, work, lwork, info)
Definition pchengst.f:3
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
subroutine pcpotrf(uplo, n, a, ia, ja, desca, info)
Definition pcpotrf.f:2
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2