ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pchegvx.f
Go to the documentation of this file.
1  SUBROUTINE pchegvx( IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA,
2  $ DESCA, B, IB, JB, DESCB, VL, VU, IL, IU,
3  $ ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ,
4  $ WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK,
5  $ IFAIL, ICLUSTR, GAP, INFO )
6 *
7 * -- ScaLAPACK routine (version 1.7) --
8 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
9 * and University of California, Berkeley.
10 * October 15, 1999
11 *
12 * .. Scalar Arguments ..
13  CHARACTER JOBZ, RANGE, UPLO
14  INTEGER IA, IB, IBTYPE, IL, INFO, IU, IZ, JA, JB, JZ,
15  $ LIWORK, LRWORK, LWORK, M, N, NZ
16  REAL ABSTOL, ORFAC, VL, VU
17 * ..
18 * .. Array Arguments ..
19 *
20  INTEGER DESCA( * ), DESCB( * ), DESCZ( * ),
21  $ ICLUSTR( * ), IFAIL( * ), IWORK( * )
22  REAL GAP( * ), RWORK( * ), W( * )
23  COMPLEX A( * ), B( * ), WORK( * ), Z( * )
24 * ..
25 *
26 * Purpose
27 *
28 * =======
29 *
30 * PCHEGVX computes all the eigenvalues, and optionally,
31 * the eigenvectors
32 * of a complex generalized Hermitian-definite eigenproblem, of the form
33 * sub( A )*x=(lambda)*sub( B )*x, sub( A )*sub( B )x=(lambda)*x, or
34 * sub( B )*sub( A )*x=(lambda)*x.
35 * Here sub( A ) denoting A( IA:IA+N-1, JA:JA+N-1 ) is assumed to be
36 * Hermitian, and sub( B ) denoting B( IB:IB+N-1, JB:JB+N-1 ) is assumed
37 * to be Hermitian positive definite.
38 *
39 * Notes
40 * =====
41 *
42 *
43 * Each global data object is described by an associated description
44 * vector. This vector stores the information required to establish
45 * the mapping between an object element and its corresponding process
46 * and memory location.
47 *
48 * Let A be a generic term for any 2D block cyclicly distributed array.
49 * Such a global array has an associated description vector DESCA.
50 * In the following comments, the character _ should be read as
51 * "of the global array".
52 *
53 * NOTATION STORED IN EXPLANATION
54 * --------------- -------------- --------------------------------------
55 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
56 * DTYPE_A = 1.
57 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
58 * the BLACS process grid A is distribu-
59 * ted over. The context itself is glo-
60 * bal, but the handle (the integer
61 * value) may vary.
62 * M_A (global) DESCA( M_ ) The number of rows in the global
63 * array A.
64 * N_A (global) DESCA( N_ ) The number of columns in the global
65 * array A.
66 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
67 * the rows of the array.
68 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
69 * the columns of the array.
70 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
71 * row of the array A is distributed.
72 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
73 * first column of the array A is
74 * distributed.
75 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
76 * array. LLD_A >= MAX(1,LOCr(M_A)).
77 *
78 * Let K be the number of rows or columns of a distributed matrix,
79 * and assume that its process grid has dimension p x q.
80 * LOCr( K ) denotes the number of elements of K that a process
81 * would receive if K were distributed over the p processes of its
82 * process column.
83 * Similarly, LOCc( K ) denotes the number of elements of K that a
84 * process would receive if K were distributed over the q processes of
85 * its process row.
86 * The values of LOCr() and LOCc() may be determined via a call to the
87 * ScaLAPACK tool function, NUMROC:
88 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
89 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
90 * An upper bound for these quantities may be computed by:
91 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
92 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
93 *
94 *
95 * Arguments
96 * =========
97 *
98 * IBTYPE (global input) INTEGER
99 * Specifies the problem type to be solved:
100 * = 1: sub( A )*x = (lambda)*sub( B )*x
101 * = 2: sub( A )*sub( B )*x = (lambda)*x
102 * = 3: sub( B )*sub( A )*x = (lambda)*x
103 *
104 * JOBZ (global input) CHARACTER*1
105 * = 'N': Compute eigenvalues only;
106 * = 'V': Compute eigenvalues and eigenvectors.
107 *
108 * RANGE (global input) CHARACTER*1
109 * = 'A': all eigenvalues will be found.
110 * = 'V': all eigenvalues in the interval [VL,VU] will be found.
111 * = 'I': the IL-th through IU-th eigenvalues will be found.
112 *
113 * UPLO (global input) CHARACTER*1
114 * = 'U': Upper triangles of sub( A ) and sub( B ) are stored;
115 * = 'L': Lower triangles of sub( A ) and sub( B ) are stored.
116 *
117 * N (global input) INTEGER
118 * The order of the matrices sub( A ) and sub( B ). N >= 0.
119 *
120 * A (local input/local output) COMPLEX pointer into the
121 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
122 * On entry, this array contains the local pieces of the
123 * N-by-N Hermitian distributed matrix sub( A ). If UPLO = 'U',
124 * the leading N-by-N upper triangular part of sub( A ) contains
125 * the upper triangular part of the matrix. If UPLO = 'L', the
126 * leading N-by-N lower triangular part of sub( A ) contains
127 * the lower triangular part of the matrix.
128 *
129 * On exit, if JOBZ = 'V', then if INFO = 0, sub( A ) contains
130 * the distributed matrix Z of eigenvectors. The eigenvectors
131 * are normalized as follows:
132 * if IBTYPE = 1 or 2, Z**H*sub( B )*Z = I;
133 * if IBTYPE = 3, Z**H*inv( sub( B ) )*Z = I.
134 * If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
135 * or the lower triangle (if UPLO='L') of sub( A ), including
136 * the diagonal, is destroyed.
137 *
138 * IA (global input) INTEGER
139 * The row index in the global array A indicating the first
140 * row of sub( A ).
141 *
142 * JA (global input) INTEGER
143 * The column index in the global array A indicating the
144 * first column of sub( A ).
145 *
146 * DESCA (global and local input) INTEGER array of dimension DLEN_.
147 * The array descriptor for the distributed matrix A.
148 * If DESCA( CTXT_ ) is incorrect, PCHEGVX cannot guarantee
149 * correct error reporting.
150 *
151 * B (local input/local output) COMPLEX pointer into the
152 * local memory to an array of dimension (LLD_B, LOCc(JB+N-1)).
153 * On entry, this array contains the local pieces of the
154 * N-by-N Hermitian distributed matrix sub( B ). If UPLO = 'U',
155 * the leading N-by-N upper triangular part of sub( B ) contains
156 * the upper triangular part of the matrix. If UPLO = 'L', the
157 * leading N-by-N lower triangular part of sub( B ) contains
158 * the lower triangular part of the matrix.
159 *
160 * On exit, if INFO <= N, the part of sub( B ) containing the
161 * matrix is overwritten by the triangular factor U or L from
162 * the Cholesky factorization sub( B ) = U**H*U or
163 * sub( B ) = L*L**H.
164 *
165 * IB (global input) INTEGER
166 * The row index in the global array B indicating the first
167 * row of sub( B ).
168 *
169 * JB (global input) INTEGER
170 * The column index in the global array B indicating the
171 * first column of sub( B ).
172 *
173 * DESCB (global and local input) INTEGER array of dimension DLEN_.
174 * The array descriptor for the distributed matrix B.
175 * DESCB( CTXT_ ) must equal DESCA( CTXT_ )
176 *
177 * VL (global input) REAL
178 * If RANGE='V', the lower bound of the interval to be searched
179 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
180 *
181 * VU (global input) REAL
182 * If RANGE='V', the upper bound of the interval to be searched
183 * for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
184 *
185 * IL (global input) INTEGER
186 * If RANGE='I', the index (from smallest to largest) of the
187 * smallest eigenvalue to be returned. IL >= 1.
188 * Not referenced if RANGE = 'A' or 'V'.
189 *
190 * IU (global input) INTEGER
191 * If RANGE='I', the index (from smallest to largest) of the
192 * largest eigenvalue to be returned. min(IL,N) <= IU <= N.
193 * Not referenced if RANGE = 'A' or 'V'.
194 *
195 * ABSTOL (global input) REAL
196 * If JOBZ='V', setting ABSTOL to PSLAMCH( CONTEXT, 'U') yields
197 * the most orthogonal eigenvectors.
198 *
199 * The absolute error tolerance for the eigenvalues.
200 * An approximate eigenvalue is accepted as converged
201 * when it is determined to lie in an interval [a,b]
202 * of width less than or equal to
203 *
204 * ABSTOL + EPS * max( |a|,|b| ) ,
205 *
206 * where EPS is the machine precision. If ABSTOL is less than
207 * or equal to zero, then EPS*norm(T) will be used in its place,
208 * where norm(T) is the 1-norm of the tridiagonal matrix
209 * obtained by reducing A to tridiagonal form.
210 *
211 * Eigenvalues will be computed most accurately when ABSTOL is
212 * set to twice the underflow threshold 2*PSLAMCH('S') not zero.
213 * If this routine returns with ((MOD(INFO,2).NE.0) .OR.
214 * (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
215 * eigenvectors did not converge, try setting ABSTOL to
216 * 2*PSLAMCH('S').
217 *
218 * See "Computing Small Singular Values of Bidiagonal Matrices
219 * with Guaranteed High Relative Accuracy," by Demmel and
220 * Kahan, LAPACK Working Note #3.
221 *
222 * See "On the correctness of Parallel Bisection in Floating
223 * Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
224 *
225 * M (global output) INTEGER
226 * Total number of eigenvalues found. 0 <= M <= N.
227 *
228 * NZ (global output) INTEGER
229 * Total number of eigenvectors computed. 0 <= NZ <= M.
230 * The number of columns of Z that are filled.
231 * If JOBZ .NE. 'V', NZ is not referenced.
232 * If JOBZ .EQ. 'V', NZ = M unless the user supplies
233 * insufficient space and PCHEGVX is not able to detect this
234 * before beginning computation. To get all the eigenvectors
235 * requested, the user must supply both sufficient
236 * space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
237 * and sufficient workspace to compute them. (See LWORK below.)
238 * PCHEGVX is always able to detect insufficient space without
239 * computation unless RANGE .EQ. 'V'.
240 *
241 * W (global output) REAL array, dimension (N)
242 * On normal exit, the first M entries contain the selected
243 * eigenvalues in ascending order.
244 *
245 * ORFAC (global input) REAL
246 * Specifies which eigenvectors should be reorthogonalized.
247 * Eigenvectors that correspond to eigenvalues which are within
248 * tol=ORFAC*norm(A) of each other are to be reorthogonalized.
249 * However, if the workspace is insufficient (see LWORK),
250 * tol may be decreased until all eigenvectors to be
251 * reorthogonalized can be stored in one process.
252 * No reorthogonalization will be done if ORFAC equals zero.
253 * A default value of 10^-3 is used if ORFAC is negative.
254 * ORFAC should be identical on all processes.
255 *
256 * Z (local output) COMPLEX array,
257 * global dimension (N, N),
258 * local dimension ( LLD_Z, LOCc(JZ+N-1) )
259 * If JOBZ = 'V', then on normal exit the first M columns of Z
260 * contain the orthonormal eigenvectors of the matrix
261 * corresponding to the selected eigenvalues. If an eigenvector
262 * fails to converge, then that column of Z contains the latest
263 * approximation to the eigenvector, and the index of the
264 * eigenvector is returned in IFAIL.
265 * If JOBZ = 'N', then Z is not referenced.
266 *
267 * IZ (global input) INTEGER
268 * The row index in the global array Z indicating the first
269 * row of sub( Z ).
270 *
271 * JZ (global input) INTEGER
272 * The column index in the global array Z indicating the
273 * first column of sub( Z ).
274 *
275 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
276 * The array descriptor for the distributed matrix Z.
277 * DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
278 *
279 * WORK (local workspace/output) COMPLEX array,
280 * dimension (LWORK)
281 * WORK(1) returns the optimal workspace.
282 *
283 * LWORK (local input) INTEGER
284 * Size of WORK array. If only eigenvalues are requested:
285 * LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 )
286 * If eigenvectors are requested:
287 * LWORK >= N + ( NP0 + MQ0 + NB ) * NB
288 * with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ).
289 *
290 * For optimal performance, greater workspace is needed, i.e.
291 * LWORK >= MAX( LWORK, N + NHETRD_LWOPT,
292 * NHEGST_LWOPT )
293 * Where LWORK is as defined above, and
294 * NHETRD_LWORK = 2*( ANB+1 )*( 4*NPS+2 ) +
295 * ( NPS + 1 ) * NPS
296 * NHEGST_LWOPT = 2*NP0*NB + NQ0*NB + NB*NB
297 *
298 * NB = DESCA( MB_ )
299 * NP0 = NUMROC( N, NB, 0, 0, NPROW )
300 * NQ0 = NUMROC( N, NB, 0, 0, NPCOL )
301 * ICTXT = DESCA( CTXT_ )
302 * ANB = PJLAENV( ICTXT, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 )
303 * SQNPC = SQRT( DBLE( NPROW * NPCOL ) )
304 * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
305 *
306 * NUMROC is a ScaLAPACK tool functions;
307 * PJLAENV is a ScaLAPACK envionmental inquiry function
308 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
309 * the subroutine BLACS_GRIDINFO.
310 *
311 * If LWORK = -1, then LWORK is global input and a workspace
312 * query is assumed; the routine only calculates the optimal
313 * size for all work arrays. Each of these values is returned
314 * in the first entry of the correspondingwork array, and no
315 * error message is issued by PXERBLA.
316 *
317 * RWORK (local workspace/output) REAL array,
318 * dimension max(3,LRWORK)
319 * On return, RWORK(1) contains the amount of workspace
320 * required for optimal efficiency
321 * if JOBZ='N' RWORK(1) = optimal amount of workspace
322 * required to compute eigenvalues efficiently
323 * if JOBZ='V' RWORK(1) = optimal amount of workspace
324 * required to compute eigenvalues and eigenvectors
325 * efficiently with no guarantee on orthogonality.
326 * If RANGE='V', it is assumed that all eigenvectors
327 * may be required when computing optimal workspace.
328 *
329 * LRWORK (local input) INTEGER
330 * Size of RWORK
331 * See below for definitions of variables used to define LRWORK.
332 * If no eigenvectors are requested (JOBZ = 'N') then
333 * LRWORK >= 5 * NN + 4 * N
334 * If eigenvectors are requested (JOBZ = 'V' ) then
335 * the amount of workspace required to guarantee that all
336 * eigenvectors are computed is:
337 * LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) +
338 * ICEIL( NEIG, NPROW*NPCOL)*NN
339 *
340 * The computed eigenvectors may not be orthogonal if the
341 * minimal workspace is supplied and ORFAC is too small.
342 * If you want to guarantee orthogonality (at the cost
343 * of potentially poor performance) you should add
344 * the following to LRWORK:
345 * (CLUSTERSIZE-1)*N
346 * where CLUSTERSIZE is the number of eigenvalues in the
347 * largest cluster, where a cluster is defined as a set of
348 * close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
349 * W(J+1) <= W(J) + ORFAC*2*norm(A) }
350 * Variable definitions:
351 * NEIG = number of eigenvectors requested
352 * NB = DESCA( MB_ ) = DESCA( NB_ ) = DESCZ( MB_ ) =
353 * DESCZ( NB_ )
354 * NN = MAX( N, NB, 2 )
355 * DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
356 * DESCZ( CSRC_ ) = 0
357 * NP0 = NUMROC( NN, NB, 0, 0, NPROW )
358 * MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
359 * ICEIL( X, Y ) is a ScaLAPACK function returning
360 * ceiling(X/Y)
361 *
362 * When LRWORK is too small:
363 * If LRWORK is too small to guarantee orthogonality,
364 * PCHEGVX attempts to maintain orthogonality in
365 * the clusters with the smallest
366 * spacing between the eigenvalues.
367 * If LRWORK is too small to compute all the eigenvectors
368 * requested, no computation is performed and INFO=-25
369 * is returned. Note that when RANGE='V', PCHEGVX does
370 * not know how many eigenvectors are requested until
371 * the eigenvalues are computed. Therefore, when RANGE='V'
372 * and as long as LRWORK is large enough to allow PCHEGVX to
373 * compute the eigenvalues, PCHEGVX will compute the
374 * eigenvalues and as many eigenvectors as it can.
375 *
376 * Relationship between workspace, orthogonality & performance:
377 * If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
378 * enough space to compute all the eigenvectors
379 * orthogonally will cause serious degradation in
380 * performance. In the limit (i.e. CLUSTERSIZE = N-1)
381 * PCSTEIN will perform no better than CSTEIN on 1 processor.
382 * For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
383 * all eigenvectors will increase the total execution time
384 * by a factor of 2 or more.
385 * For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
386 * grow as the square of the cluster size, all other factors
387 * remaining equal and assuming enough workspace. Less
388 * workspace means less reorthogonalization but faster
389 * execution.
390 *
391 * If LRWORK = -1, then LRWORK is global input and a workspace
392 * query is assumed; the routine only calculates the minimum
393 * and optimal size for all work arrays. Each of these
394 * values is returned in the first entry of the corresponding
395 * work array, and no error message is issued by PXERBLA.
396 *
397 * IWORK (local workspace) INTEGER array
398 * On return, IWORK(1) contains the amount of integer workspace
399 * required.
400 *
401 * LIWORK (local input) INTEGER
402 * size of IWORK
403 * LIWORK >= 6 * NNP
404 * Where:
405 * NNP = MAX( N, NPROW*NPCOL + 1, 4 )
406 *
407 * If LIWORK = -1, then LIWORK is global input and a workspace
408 * query is assumed; the routine only calculates the minimum
409 * and optimal size for all work arrays. Each of these
410 * values is returned in the first entry of the corresponding
411 * work array, and no error message is issued by PXERBLA.
412 *
413 * IFAIL (output) INTEGER array, dimension (N)
414 * IFAIL provides additional information when INFO .NE. 0
415 * If (MOD(INFO/16,2).NE.0) then IFAIL(1) indicates the order of
416 * the smallest minor which is not positive definite.
417 * If (MOD(INFO,2).NE.0) on exit, then IFAIL contains the
418 * indices of the eigenvectors that failed to converge.
419 *
420 * If neither of the above error conditions hold and JOBZ = 'V',
421 * then the first M elements of IFAIL are set to zero.
422 *
423 * ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
424 * This array contains indices of eigenvectors corresponding to
425 * a cluster of eigenvalues that could not be reorthogonalized
426 * due to insufficient workspace (see LWORK, ORFAC and INFO).
427 * Eigenvectors corresponding to clusters of eigenvalues indexed
428 * ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
429 * reorthogonalized due to lack of workspace. Hence the
430 * eigenvectors corresponding to these clusters may not be
431 * orthogonal. ICLUSTR() is a zero terminated array.
432 * (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
433 * K is the number of clusters
434 * ICLUSTR is not referenced if JOBZ = 'N'
435 *
436 * GAP (global output) REAL array,
437 * dimension (NPROW*NPCOL)
438 * This array contains the gap between eigenvalues whose
439 * eigenvectors could not be reorthogonalized. The output
440 * values in this array correspond to the clusters indicated
441 * by the array ICLUSTR. As a result, the dot product between
442 * eigenvectors correspoding to the I^th cluster may be as high
443 * as ( C * n ) / GAP(I) where C is a small constant.
444 *
445 * INFO (global output) INTEGER
446 * = 0: successful exit
447 * < 0: If the i-th argument is an array and the j-entry had
448 * an illegal value, then INFO = -(i*100+j), if the i-th
449 * argument is a scalar and had an illegal value, then
450 * INFO = -i.
451 * > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
452 * failed to converge. Their indices are stored
453 * in IFAIL. Send e-mail to scalapack@cs.utk.edu
454 * if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
455 * to one or more clusters of eigenvalues could not be
456 * reorthogonalized because of insufficient workspace.
457 * The indices of the clusters are stored in the array
458 * ICLUSTR.
459 * if (MOD(INFO/4,2).NE.0), then space limit prevented
460 * PCHEGVX from computing all of the eigenvectors
461 * between VL and VU. The number of eigenvectors
462 * computed is returned in NZ.
463 * if (MOD(INFO/8,2).NE.0), then PCSTEBZ failed to
464 * compute eigenvalues.
465 * Send e-mail to scalapack@cs.utk.edu
466 * if (MOD(INFO/16,2).NE.0), then B was not positive
467 * definite. IFAIL(1) indicates the order of
468 * the smallest minor which is not positive definite.
469 *
470 * Alignment requirements
471 * ======================
472 *
473 * The distributed submatrices A(IA:*, JA:*), C(IC:IC+M-1,JC:JC+N-1),
474 * and B( IB:IB+N-1, JB:JB+N-1 ) must verify some alignment properties,
475 * namely the following expressions should be true:
476 *
477 * DESCA(MB_) = DESCA(NB_)
478 * IA = IB = IZ
479 * JA = IB = JZ
480 * DESCA(M_) = DESCB(M_) =DESCZ(M_)
481 * DESCA(N_) = DESCB(N_)= DESCZ(N_)
482 * DESCA(MB_) = DESCB(MB_) = DESCZ(MB_)
483 * DESCA(NB_) = DESCB(NB_) = DESCZ(NB_)
484 * DESCA(RSRC_) = DESCB(RSRC_) = DESCZ(RSRC_)
485 * DESCA(CSRC_) = DESCB(CSRC_) = DESCZ(CSRC_)
486 * MOD( IA-1, DESCA( MB_ ) ) = 0
487 * MOD( JA-1, DESCA( NB_ ) ) = 0
488 * MOD( IB-1, DESCB( MB_ ) ) = 0
489 * MOD( JB-1, DESCB( NB_ ) ) = 0
490 *
491 * =====================================================================
492 *
493 * .. Parameters ..
494  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
495  $ MB_, NB_, RSRC_, CSRC_, LLD_
496  PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
497  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
498  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
499  COMPLEX ONE
500  parameter( one = 1.0e+0 )
501  REAL FIVE, ZERO
502  PARAMETER ( FIVE = 5.0e+0, zero = 0.0e+0 )
503  INTEGER IERRNPD
504  parameter( ierrnpd = 16 )
505 * ..
506 * .. Local Scalars ..
507  LOGICAL ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
508  CHARACTER TRANS
509  INTEGER ANB, IACOL, IAROW, IBCOL, IBROW, ICOFFA,
510  $ ICOFFB, ICTXT, IROFFA, IROFFB, LIWMIN, LRWMIN,
511  $ lrwopt, lwmin, lwopt, mq0, mycol, myrow, nb,
512  $ neig, nhegst_lwopt, nhetrd_lwopt, nn, np0,
513  $ npcol, nprow, nps, nq0, sqnpc
514  REAL EPS, SCALE
515 * ..
516 * .. Local Arrays ..
517  INTEGER IDUM1( 5 ), IDUM2( 5 )
518 * ..
519 * .. External Functions ..
520  LOGICAL LSAME
521  INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
522  REAL PSLAMCH
523  EXTERNAL LSAME, ICEIL, INDXG2P, NUMROC, PJLAENV, PSLAMCH
524 * ..
525 * .. External Subroutines ..
526  EXTERNAL blacs_gridinfo, chk1mat, pcheevx, pchengst,
527  $ pchk1mat, pchk2mat, pcpotrf, pctrmm, pctrsm,
528  $ pxerbla, sgebr2d, sgebs2d, sscal
529 * ..
530 * .. Intrinsic Functions ..
531  INTRINSIC abs, cmplx, dble, ichar, int, max, min, mod,
532  $ real, sqrt
533 * ..
534 * .. Executable Statements ..
535 * This is just to keep ftnchek and toolpack/1 happy
536  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
537  $ rsrc_.LT.0 )RETURN
538 *
539 * Get grid parameters
540 *
541  ictxt = desca( ctxt_ )
542  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
543 *
544 * Test the input parameters
545 *
546  info = 0
547  IF( nprow.EQ.-1 ) THEN
548  info = -( 900+ctxt_ )
549  ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
550  info = -( 1300+ctxt_ )
551  ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
552  info = -( 2600+ctxt_ )
553  ELSE
554 *
555 * Get machine constants.
556 *
557  eps = pslamch( desca( ctxt_ ), 'Precision' )
558 *
559  wantz = lsame( jobz, 'V' )
560  upper = lsame( uplo, 'U' )
561  alleig = lsame( range, 'A' )
562  valeig = lsame( range, 'V' )
563  indeig = lsame( range, 'I' )
564  CALL chk1mat( n, 4, n, 4, ia, ja, desca, 9, info )
565  CALL chk1mat( n, 4, n, 4, ib, jb, descb, 13, info )
566  CALL chk1mat( n, 4, n, 4, iz, jz, descz, 26, info )
567  IF( info.EQ.0 ) THEN
568  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
569  rwork( 1 ) = abstol
570  IF( valeig ) THEN
571  rwork( 2 ) = vl
572  rwork( 3 ) = vu
573  ELSE
574  rwork( 2 ) = zero
575  rwork( 3 ) = zero
576  END IF
577  CALL sgebs2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, rwork,
578  $ 3 )
579  ELSE
580  CALL sgebr2d( desca( ctxt_ ), 'ALL', ' ', 3, 1, rwork, 3,
581  $ 0, 0 )
582  END IF
583  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
584  $ nprow )
585  ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
586  $ nprow )
587  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
588  $ npcol )
589  ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
590  $ npcol )
591  iroffa = mod( ia-1, desca( mb_ ) )
592  icoffa = mod( ja-1, desca( nb_ ) )
593  iroffb = mod( ib-1, descb( mb_ ) )
594  icoffb = mod( jb-1, descb( nb_ ) )
595 *
596 * Compute the total amount of space needed
597 *
598  lquery = .false.
599  IF( lwork.EQ.-1 .OR. liwork.EQ.-1 .OR. lrwork.EQ.-1 )
600  $ lquery = .true.
601 *
602  liwmin = 6*max( n, ( nprow*npcol )+1, 4 )
603 *
604  nb = desca( mb_ )
605  nn = max( n, nb, 2 )
606  np0 = numroc( nn, nb, 0, 0, nprow )
607 *
608  IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
609  $ THEN
610  lwmin = n + max( nb*( np0+1 ), 3 )
611  lwopt = lwmin
612  lrwmin = 5*nn + 4*n
613  IF( wantz ) THEN
614  mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
615  lrwopt = 4*n + max( 5*nn, np0*mq0 )
616  ELSE
617  lrwopt = lrwmin
618  END IF
619  neig = 0
620  ELSE
621  IF( alleig .OR. valeig ) THEN
622  neig = n
623  ELSE IF( indeig ) THEN
624  neig = iu - il + 1
625  END IF
626  mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
627  lwmin = n + ( np0+mq0+nb )*nb
628  lwopt = lwmin
629  lrwmin = 4*n + max( 5*nn, np0*mq0 ) +
630  $ iceil( neig, nprow*npcol )*nn
631  lrwopt = lrwmin
632 *
633  END IF
634 *
635 * Compute how much workspace is needed to use the
636 * new TRD and GST algorithms
637 *
638  anb = pjlaenv( ictxt, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 )
639  sqnpc = int( sqrt( dble( nprow*npcol ) ) )
640  nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
641  nhetrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
642  nb = desca( mb_ )
643  np0 = numroc( n, nb, 0, 0, nprow )
644  nq0 = numroc( n, nb, 0, 0, npcol )
645  nhegst_lwopt = 2*np0*nb + nq0*nb + nb*nb
646  lwopt = max( lwopt, n+nhetrd_lwopt, nhegst_lwopt )
647 *
648 * Version 1.0 Limitations
649 *
650  IF( ibtype.LT.1 .OR. ibtype.GT.3 ) THEN
651  info = -1
652  ELSE IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
653  info = -2
654  ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
655  info = -3
656  ELSE IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
657  info = -4
658  ELSE IF( n.LT.0 ) THEN
659  info = -5
660  ELSE IF( iroffa.NE.0 ) THEN
661  info = -7
662  ELSE IF( icoffa.NE.0 ) THEN
663  info = -8
664  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
665  info = -( 900+nb_ )
666  ELSE IF( desca( m_ ).NE.descb( m_ ) ) THEN
667  info = -( 1300+m_ )
668  ELSE IF( desca( n_ ).NE.descb( n_ ) ) THEN
669  info = -( 1300+n_ )
670  ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
671  info = -( 1300+mb_ )
672  ELSE IF( desca( nb_ ).NE.descb( nb_ ) ) THEN
673  info = -( 1300+nb_ )
674  ELSE IF( desca( rsrc_ ).NE.descb( rsrc_ ) ) THEN
675  info = -( 1300+rsrc_ )
676  ELSE IF( desca( csrc_ ).NE.descb( csrc_ ) ) THEN
677  info = -( 1300+csrc_ )
678  ELSE IF( desca( ctxt_ ).NE.descb( ctxt_ ) ) THEN
679  info = -( 1300+ctxt_ )
680  ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
681  info = -( 2200+m_ )
682  ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
683  info = -( 2200+n_ )
684  ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
685  info = -( 2200+mb_ )
686  ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
687  info = -( 2200+nb_ )
688  ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
689  info = -( 2200+rsrc_ )
690  ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
691  info = -( 2200+csrc_ )
692  ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
693  info = -( 2200+ctxt_ )
694  ELSE IF( iroffb.NE.0 .OR. ibrow.NE.iarow ) THEN
695  info = -11
696  ELSE IF( icoffb.NE.0 .OR. ibcol.NE.iacol ) THEN
697  info = -12
698  ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
699  info = -15
700  ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
701  $ THEN
702  info = -16
703  ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
704  $ THEN
705  info = -17
706  ELSE IF( valeig .AND. ( abs( rwork( 2 )-vl ).GT.five*eps*
707  $ abs( vl ) ) ) THEN
708  info = -14
709  ELSE IF( valeig .AND. ( abs( rwork( 3 )-vu ).GT.five*eps*
710  $ abs( vu ) ) ) THEN
711  info = -15
712  ELSE IF( abs( rwork( 1 )-abstol ).GT.five*eps*
713  $ abs( abstol ) ) THEN
714  info = -18
715  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
716  info = -28
717  ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
718  info = -30
719  ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
720  info = -32
721  END IF
722  END IF
723  idum1( 1 ) = ibtype
724  idum2( 1 ) = 1
725  IF( wantz ) THEN
726  idum1( 2 ) = ichar( 'V' )
727  ELSE
728  idum1( 2 ) = ichar( 'N' )
729  END IF
730  idum2( 2 ) = 2
731  IF( upper ) THEN
732  idum1( 3 ) = ichar( 'U' )
733  ELSE
734  idum1( 3 ) = ichar( 'L' )
735  END IF
736  idum2( 3 ) = 3
737  IF( alleig ) THEN
738  idum1( 4 ) = ichar( 'A' )
739  ELSE IF( indeig ) THEN
740  idum1( 4 ) = ichar( 'I' )
741  ELSE
742  idum1( 4 ) = ichar( 'V' )
743  END IF
744  idum2( 4 ) = 4
745  IF( lquery ) THEN
746  idum1( 5 ) = -1
747  ELSE
748  idum1( 5 ) = 1
749  END IF
750  idum2( 5 ) = 5
751  CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 9, n, 4, n, 4, ib,
752  $ jb, descb, 13, 5, idum1, idum2, info )
753  CALL pchk1mat( n, 4, n, 4, iz, jz, descz, 26, 0, idum1, idum2,
754  $ info )
755  END IF
756 *
757  iwork( 1 ) = liwmin
758  work( 1 ) = cmplx( real( lwopt ) )
759  rwork( 1 ) = real( lrwopt )
760 *
761  IF( info.NE.0 ) THEN
762  CALL pxerbla( ictxt, 'PCHEGVX ', -info )
763  RETURN
764  ELSE IF( lquery ) THEN
765  RETURN
766  END IF
767 *
768 * Form a Cholesky factorization of sub( B ).
769 *
770  CALL pcpotrf( uplo, n, b, ib, jb, descb, info )
771  IF( info.NE.0 ) THEN
772  iwork( 1 ) = liwmin
773  work( 1 ) = cmplx( real( lwopt ) )
774  rwork( 1 ) = real( lrwopt )
775  ifail( 1 ) = info
776  info = ierrnpd
777  RETURN
778  END IF
779 *
780 * Transform problem to standard eigenvalue problem and solve.
781 *
782  CALL pchengst( ibtype, uplo, n, a, ia, ja, desca, b, ib, jb,
783  $ descb, scale, work, lwork, info )
784  CALL pcheevx( jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il,
785  $ iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work,
786  $ lwork, rwork, lrwork, iwork, liwork, ifail, iclustr,
787  $ gap, info )
788 *
789  IF( wantz ) THEN
790 *
791 * Backtransform eigenvectors to the original problem.
792 *
793  neig = m
794  IF( ibtype.EQ.1 .OR. ibtype.EQ.2 ) THEN
795 *
796 * For sub( A )*x=(lambda)*sub( B )*x and
797 * sub( A )*sub( B )*x=(lambda)*x; backtransform eigenvectors:
798 * x = inv(L)'*y or inv(U)*y
799 *
800  IF( upper ) THEN
801  trans = 'N'
802  ELSE
803  trans = 'C'
804  END IF
805 *
806  CALL pctrsm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
807  $ b, ib, jb, descb, z, iz, jz, descz )
808 *
809  ELSE IF( ibtype.EQ.3 ) THEN
810 *
811 * For sub( B )*sub( A )*x=(lambda)*x;
812 * backtransform eigenvectors: x = L*y or U'*y
813 *
814  IF( upper ) THEN
815  trans = 'C'
816  ELSE
817  trans = 'N'
818  END IF
819 *
820  CALL pctrmm( 'Left', uplo, trans, 'Non-unit', n, neig, one,
821  $ b, ib, jb, descb, z, iz, jz, descz )
822  END IF
823  END IF
824 *
825  IF( scale.NE.one ) THEN
826  CALL sscal( n, scale, w, 1 )
827  END IF
828 *
829  iwork( 1 ) = liwmin
830  work( 1 ) = cmplx( real( lwopt ) )
831  rwork( 1 ) = real( lrwopt )
832  RETURN
833 *
834 * End of PCHEGVX
835 *
836  END
cmplx
float cmplx[2]
Definition: pblas.h:132
max
#define max(A, B)
Definition: pcgemr.c:180
pcpotrf
subroutine pcpotrf(UPLO, N, A, IA, JA, DESCA, INFO)
Definition: pcpotrf.f:2
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
pchegvx
subroutine pchegvx(IBTYPE, JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, DESCB, VL, VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pchegvx.f:6
pchengst
subroutine pchengst(IBTYPE, UPLO, N, A, IA, JA, DESCA, B, IB, JB, DESCB, SCALE, WORK, LWORK, INFO)
Definition: pchengst.f:3
pcheevx
subroutine pcheevx(JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL, VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ, JZ, DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, IFAIL, ICLUSTR, GAP, INFO)
Definition: pcheevx.f:5
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181