SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzheevx.f
Go to the documentation of this file.
1 SUBROUTINE pzheevx( JOBZ, RANGE, UPLO, N, A, IA, JA, DESCA, VL,
2 $ VU, IL, IU, ABSTOL, M, NZ, W, ORFAC, Z, IZ,
3 $ JZ, DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK,
4 $ LIWORK, IFAIL, ICLUSTR, GAP, INFO )
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* May 25, 2001
10*
11* .. Scalar Arguments ..
12 CHARACTER JOBZ, RANGE, UPLO
13 INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LRWORK,
14 $ LWORK, M, N, NZ
15 DOUBLE PRECISION ABSTOL, ORFAC, VL, VU
16* ..
17* .. Array Arguments ..
18 INTEGER DESCA( * ), DESCZ( * ), ICLUSTR( * ),
19 $ IFAIL( * ), IWORK( * )
20 DOUBLE PRECISION GAP( * ), RWORK( * ), W( * )
21 COMPLEX*16 A( * ), WORK( * ), Z( * )
22* ..
23*
24* Purpose
25* =======
26*
27* PZHEEVX computes selected eigenvalues and, optionally, eigenvectors
28* of a complex hermitian matrix A by calling the recommended sequence
29* of ScaLAPACK routines. Eigenvalues/vectors can be selected by
30* specifying a range of values or a range of indices for the desired
31* eigenvalues.
32*
33* Notes
34* =====
35*
36* Each global data object is described by an associated description
37* vector. This vector stores the information required to establish
38* the mapping between an object element and its corresponding process
39* and memory location.
40*
41* Let A be a generic term for any 2D block cyclicly distributed array.
42* Such a global array has an associated description vector DESCA.
43* In the following comments, the character _ should be read as
44* "of the global array".
45*
46* NOTATION STORED IN EXPLANATION
47* --------------- -------------- --------------------------------------
48* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
49* DTYPE_A = 1.
50* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51* the BLACS process grid A is distribu-
52* ted over. The context itself is glo-
53* bal, but the handle (the integer
54* value) may vary.
55* M_A (global) DESCA( M_ ) The number of rows in the global
56* array A.
57* N_A (global) DESCA( N_ ) The number of columns in the global
58* array A.
59* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
60* the rows of the array.
61* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
62* the columns of the array.
63* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64* row of the array A is distributed.
65* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66* first column of the array A is
67* distributed.
68* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
69* array. LLD_A >= MAX(1,LOCr(M_A)).
70*
71* Let K be the number of rows or columns of a distributed matrix,
72* and assume that its process grid has dimension p x q.
73* LOCr( K ) denotes the number of elements of K that a process
74* would receive if K were distributed over the p processes of its
75* process column.
76* Similarly, LOCc( K ) denotes the number of elements of K that a
77* process would receive if K were distributed over the q processes of
78* its process row.
79* The values of LOCr() and LOCc() may be determined via a call to the
80* ScaLAPACK tool function, NUMROC:
81* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83* An upper bound for these quantities may be computed by:
84* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86*
87* PZHEEVX assumes IEEE 754 standard compliant arithmetic. To port
88* to a system which does not have IEEE 754 arithmetic, modify
89* the appropriate SLmake.inc file to include the compiler switch
90* -DNO_IEEE. This switch only affects the compilation of pdlaiect.c.
91*
92* Arguments
93* =========
94*
95* NP = the number of rows local to a given process.
96* NQ = the number of columns local to a given process.
97*
98* JOBZ (global input) CHARACTER*1
99* Specifies whether or not to compute the eigenvectors:
100* = 'N': Compute eigenvalues only.
101* = 'V': Compute eigenvalues and eigenvectors.
102*
103* RANGE (global input) CHARACTER*1
104* = 'A': all eigenvalues will be found.
105* = 'V': all eigenvalues in the interval [VL,VU] will be found.
106* = 'I': the IL-th through IU-th eigenvalues will be found.
107*
108* UPLO (global input) CHARACTER*1
109* Specifies whether the upper or lower triangular part of the
110* Hermitian matrix A is stored:
111* = 'U': Upper triangular
112* = 'L': Lower triangular
113*
114* N (global input) INTEGER
115* The number of rows and columns of the matrix A. N >= 0.
116*
117* A (local input/workspace) block cyclic COMPLEX*16 array,
118* global dimension (N, N),
119* local dimension ( LLD_A, LOCc(JA+N-1) )
120*
121* On entry, the Hermitian matrix A. If UPLO = 'U', only the
122* upper triangular part of A is used to define the elements of
123* the Hermitian matrix. If UPLO = 'L', only the lower
124* triangular part of A is used to define the elements of the
125* Hermitian matrix.
126*
127* On exit, the lower triangle (if UPLO='L') or the upper
128* triangle (if UPLO='U') of A, including the diagonal, is
129* destroyed.
130*
131* IA (global input) INTEGER
132* A's global row index, which points to the beginning of the
133* submatrix which is to be operated on.
134*
135* JA (global input) INTEGER
136* A's global column index, which points to the beginning of
137* the submatrix which is to be operated on.
138*
139* DESCA (global and local input) INTEGER array of dimension DLEN_.
140* The array descriptor for the distributed matrix A.
141* If DESCA( CTXT_ ) is incorrect, PZHEEVX cannot guarantee
142* correct error reporting.
143*
144* VL (global input) DOUBLE PRECISION
145* If RANGE='V', the lower bound of the interval to be searched
146* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
147*
148* VU (global input) DOUBLE PRECISION
149* If RANGE='V', the upper bound of the interval to be searched
150* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
151*
152* IL (global input) INTEGER
153* If RANGE='I', the index (from smallest to largest) of the
154* smallest eigenvalue to be returned. IL >= 1.
155* Not referenced if RANGE = 'A' or 'V'.
156*
157* IU (global input) INTEGER
158* If RANGE='I', the index (from smallest to largest) of the
159* largest eigenvalue to be returned. min(IL,N) <= IU <= N.
160* Not referenced if RANGE = 'A' or 'V'.
161*
162* ABSTOL (global input) DOUBLE PRECISION
163* If JOBZ='V', setting ABSTOL to PDLAMCH( CONTEXT, 'U') yields
164* the most orthogonal eigenvectors.
165*
166* The absolute error tolerance for the eigenvalues.
167* An approximate eigenvalue is accepted as converged
168* when it is determined to lie in an interval [a,b]
169* of width less than or equal to
170*
171* ABSTOL + EPS * max( |a|,|b| ) ,
172*
173* where EPS is the machine precision. If ABSTOL is less than
174* or equal to zero, then EPS*norm(T) will be used in its place,
175* where norm(T) is the 1-norm of the tridiagonal matrix
176* obtained by reducing A to tridiagonal form.
177*
178* Eigenvalues will be computed most accurately when ABSTOL is
179* set to twice the underflow threshold 2*PDLAMCH('S') not zero.
180* If this routine returns with ((MOD(INFO,2).NE.0) .OR.
181* (MOD(INFO/8,2).NE.0)), indicating that some eigenvalues or
182* eigenvectors did not converge, try setting ABSTOL to
183* 2*PDLAMCH('S').
184*
185* See "Computing Small Singular Values of Bidiagonal Matrices
186* with Guaranteed High Relative Accuracy," by Demmel and
187* Kahan, LAPACK Working Note #3.
188*
189* See "On the correctness of Parallel Bisection in Floating
190* Point" by Demmel, Dhillon and Ren, LAPACK Working Note #70
191*
192* M (global output) INTEGER
193* Total number of eigenvalues found. 0 <= M <= N.
194*
195* NZ (global output) INTEGER
196* Total number of eigenvectors computed. 0 <= NZ <= M.
197* The number of columns of Z that are filled.
198* If JOBZ .NE. 'V', NZ is not referenced.
199* If JOBZ .EQ. 'V', NZ = M unless the user supplies
200* insufficient space and PZHEEVX is not able to detect this
201* before beginning computation. To get all the eigenvectors
202* requested, the user must supply both sufficient
203* space to hold the eigenvectors in Z (M .LE. DESCZ(N_))
204* and sufficient workspace to compute them. (See LWORK below.)
205* PZHEEVX is always able to detect insufficient space without
206* computation unless RANGE .EQ. 'V'.
207*
208* W (global output) DOUBLE PRECISION array, dimension (N)
209* On normal exit, the first M entries contain the selected
210* eigenvalues in ascending order.
211*
212* ORFAC (global input) DOUBLE PRECISION
213* Specifies which eigenvectors should be reorthogonalized.
214* Eigenvectors that correspond to eigenvalues which are within
215* tol=ORFAC*norm(A) of each other are to be reorthogonalized.
216* However, if the workspace is insufficient (see LWORK),
217* tol may be decreased until all eigenvectors to be
218* reorthogonalized can be stored in one process.
219* No reorthogonalization will be done if ORFAC equals zero.
220* A default value of 10^-3 is used if ORFAC is negative.
221* ORFAC should be identical on all processes.
222*
223* Z (local output) COMPLEX*16 array,
224* global dimension (N, N),
225* local dimension ( LLD_Z, LOCc(JZ+N-1) )
226* If JOBZ = 'V', then on normal exit the first M columns of Z
227* contain the orthonormal eigenvectors of the matrix
228* corresponding to the selected eigenvalues. If an eigenvector
229* fails to converge, then that column of Z contains the latest
230* approximation to the eigenvector, and the index of the
231* eigenvector is returned in IFAIL.
232* If JOBZ = 'N', then Z is not referenced.
233*
234* IZ (global input) INTEGER
235* Z's global row index, which points to the beginning of the
236* submatrix which is to be operated on.
237*
238* JZ (global input) INTEGER
239* Z's global column index, which points to the beginning of
240* the submatrix which is to be operated on.
241*
242* DESCZ (global and local input) INTEGER array of dimension DLEN_.
243* The array descriptor for the distributed matrix Z.
244* DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
245*
246* WORK (local workspace/output) COMPLEX*16 array,
247* dimension (LWORK)
248* WORK(1) returns workspace adequate workspace to allow
249* optimal performance.
250*
251* LWORK (local input) INTEGER
252* Size of WORK array. If only eigenvalues are requested:
253* LWORK >= N + MAX( NB * ( NP0 + 1 ), 3 )
254* If eigenvectors are requested:
255* LWORK >= N + ( NP0 + MQ0 + NB ) * NB
256* with NQ0 = NUMROC( NN, NB, 0, 0, NPCOL ).
257*
258* For optimal performance, greater workspace is needed, i.e.
259* LWORK >= MAX( LWORK, NHETRD_LWORK )
260* Where LWORK is as defined above, and
261* NHETRD_LWORK = N + 2*( ANB+1 )*( 4*NPS+2 ) +
262* ( NPS + 1 ) * NPS
263*
264* ICTXT = DESCA( CTXT_ )
265* ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 )
266* SQNPC = SQRT( DBLE( NPROW * NPCOL ) )
267* NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
268*
269* NUMROC is a ScaLAPACK tool functions;
270* PJLAENV is a ScaLAPACK envionmental inquiry function
271* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
272* the subroutine BLACS_GRIDINFO.
273*
274* If LWORK = -1, then LWORK is global input and a workspace
275* query is assumed; the routine only calculates the
276* optimal size for all work arrays. Each of these
277* values is returned in the first entry of the corresponding
278* work array, and no error message is issued by PXERBLA.
279*
280* RWORK (local workspace/output) DOUBLE PRECISION array,
281* dimension max(3,LRWORK)
282* On return, WORK(1) contains the optimal amount of
283* workspace required for efficient execution.
284* if JOBZ='N' RWORK(1) = optimal amount of workspace
285* required to compute eigenvalues efficiently
286* if JOBZ='V' RWORK(1) = optimal amount of workspace
287* required to compute eigenvalues and eigenvectors
288* efficiently with no guarantee on orthogonality.
289* If RANGE='V', it is assumed that all eigenvectors
290* may be required.
291*
292* LRWORK (local input) INTEGER
293* Size of RWORK
294* See below for definitions of variables used to define LRWORK.
295* If no eigenvectors are requested (JOBZ = 'N') then
296* LRWORK >= 5 * NN + 4 * N
297* If eigenvectors are requested (JOBZ = 'V' ) then
298* the amount of workspace required to guarantee that all
299* eigenvectors are computed is:
300* LRWORK >= 4*N + MAX( 5*NN, NP0 * MQ0 ) +
301* ICEIL( NEIG, NPROW*NPCOL)*NN
302*
303* The computed eigenvectors may not be orthogonal if the
304* minimal workspace is supplied and ORFAC is too small.
305* If you want to guarantee orthogonality (at the cost
306* of potentially poor performance) you should add
307* the following to LRWORK:
308* (CLUSTERSIZE-1)*N
309* where CLUSTERSIZE is the number of eigenvalues in the
310* largest cluster, where a cluster is defined as a set of
311* close eigenvalues: { W(K),...,W(K+CLUSTERSIZE-1) |
312* W(J+1) <= W(J) + ORFAC*2*norm(A) }
313* Variable definitions:
314* NEIG = number of eigenvectors requested
315* NB = DESCA( MB_ ) = DESCA( NB_ ) =
316* DESCZ( MB_ ) = DESCZ( NB_ )
317* NN = MAX( N, NB, 2 )
318* DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
319* DESCZ( CSRC_ ) = 0
320* NP0 = NUMROC( NN, NB, 0, 0, NPROW )
321* MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
322* ICEIL( X, Y ) is a ScaLAPACK function returning
323* ceiling(X/Y)
324*
325* When LRWORK is too small:
326* If LRWORK is too small to guarantee orthogonality,
327* PZHEEVX attempts to maintain orthogonality in
328* the clusters with the smallest
329* spacing between the eigenvalues.
330* If LRWORK is too small to compute all the eigenvectors
331* requested, no computation is performed and INFO=-25
332* is returned. Note that when RANGE='V', PZHEEVX does
333* not know how many eigenvectors are requested until
334* the eigenvalues are computed. Therefore, when RANGE='V'
335* and as long as LRWORK is large enough to allow PZHEEVX to
336* compute the eigenvalues, PZHEEVX will compute the
337* eigenvalues and as many eigenvectors as it can.
338*
339* Relationship between workspace, orthogonality & performance:
340* If CLUSTERSIZE >= N/SQRT(NPROW*NPCOL), then providing
341* enough space to compute all the eigenvectors
342* orthogonally will cause serious degradation in
343* performance. In the limit (i.e. CLUSTERSIZE = N-1)
344* PZSTEIN will perform no better than ZSTEIN on 1
345* processor.
346* For CLUSTERSIZE = N/SQRT(NPROW*NPCOL) reorthogonalizing
347* all eigenvectors will increase the total execution time
348* by a factor of 2 or more.
349* For CLUSTERSIZE > N/SQRT(NPROW*NPCOL) execution time will
350* grow as the square of the cluster size, all other factors
351* remaining equal and assuming enough workspace. Less
352* workspace means less reorthogonalization but faster
353* execution.
354*
355* If LRWORK = -1, then LRWORK is global input and a workspace
356* query is assumed; the routine only calculates the size
357* required for optimal performance for all work arrays. Each of
358* these values is returned in the first entry of the
359* corresponding work arrays, and no error message is issued by
360* PXERBLA.
361*
362* IWORK (local workspace) INTEGER array
363* On return, IWORK(1) contains the amount of integer workspace
364* required.
365*
366* LIWORK (local input) INTEGER
367* size of IWORK
368* LIWORK >= 6 * NNP
369* Where:
370* NNP = MAX( N, NPROW*NPCOL + 1, 4 )
371* If LIWORK = -1, then LIWORK is global input and a workspace
372* query is assumed; the routine only calculates the minimum
373* and optimal size for all work arrays. Each of these
374* values is returned in the first entry of the corresponding
375* work array, and no error message is issued by PXERBLA.
376*
377* IFAIL (global output) INTEGER array, dimension (N)
378* If JOBZ = 'V', then on normal exit, the first M elements of
379* IFAIL are zero. If (MOD(INFO,2).NE.0) on exit, then
380* IFAIL contains the
381* indices of the eigenvectors that failed to converge.
382* If JOBZ = 'N', then IFAIL is not referenced.
383*
384* ICLUSTR (global output) integer array, dimension (2*NPROW*NPCOL)
385* This array contains indices of eigenvectors corresponding to
386* a cluster of eigenvalues that could not be reorthogonalized
387* due to insufficient workspace (see LWORK, ORFAC and INFO).
388* Eigenvectors corresponding to clusters of eigenvalues indexed
389* ICLUSTR(2*I-1) to ICLUSTR(2*I), could not be
390* reorthogonalized due to lack of workspace. Hence the
391* eigenvectors corresponding to these clusters may not be
392* orthogonal. ICLUSTR() is a zero terminated array.
393* (ICLUSTR(2*K).NE.0 .AND. ICLUSTR(2*K+1).EQ.0) if and only if
394* K is the number of clusters
395* ICLUSTR is not referenced if JOBZ = 'N'
396*
397* GAP (global output) DOUBLE PRECISION array,
398* dimension (NPROW*NPCOL)
399* This array contains the gap between eigenvalues whose
400* eigenvectors could not be reorthogonalized. The output
401* values in this array correspond to the clusters indicated
402* by the array ICLUSTR. As a result, the dot product between
403* eigenvectors correspoding to the I^th cluster may be as high
404* as ( C * n ) / GAP(I) where C is a small constant.
405*
406* INFO (global output) INTEGER
407* = 0: successful exit
408* < 0: If the i-th argument is an array and the j-entry had
409* an illegal value, then INFO = -(i*100+j), if the i-th
410* argument is a scalar and had an illegal value, then
411* INFO = -i.
412* > 0: if (MOD(INFO,2).NE.0), then one or more eigenvectors
413* failed to converge. Their indices are stored
414* in IFAIL. Ensure ABSTOL=2.0*PDLAMCH( 'U' )
415* Send e-mail to scalapack@cs.utk.edu
416* if (MOD(INFO/2,2).NE.0),then eigenvectors corresponding
417* to one or more clusters of eigenvalues could not be
418* reorthogonalized because of insufficient workspace.
419* The indices of the clusters are stored in the array
420* ICLUSTR.
421* if (MOD(INFO/4,2).NE.0), then space limit prevented
422* PZHEEVX from computing all of the eigenvectors
423* between VL and VU. The number of eigenvectors
424* computed is returned in NZ.
425* if (MOD(INFO/8,2).NE.0), then PZSTEBZ failed to compute
426* eigenvalues. Ensure ABSTOL=2.0*PDLAMCH( 'U' )
427* Send e-mail to scalapack@cs.utk.edu
428*
429* Alignment requirements
430* ======================
431*
432* The distributed submatrices A(IA:*, JA:*) and C(IC:IC+M-1,JC:JC+N-1)
433* must verify some alignment properties, namely the following
434* expressions should be true:
435*
436* ( MB_A.EQ.NB_A.EQ.MB_Z .AND. IROFFA.EQ.IROFFZ .AND. IROFFA.EQ.0 .AND.
437* IAROW.EQ.IZROW )
438* where
439* IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
440*
441* =====================================================================
442*
443* Differences between PZHEEVX and ZHEEVX
444* ======================================
445*
446* A, LDA -> A, IA, JA, DESCA
447* Z, LDZ -> Z, IZ, JZ, DESCZ
448* WORKSPACE needs are larger for PZHEEVX.
449* LIWORK parameter added
450*
451* ORFAC, ICLUSTER() and GAP() parameters added
452* meaning of INFO is changed
453*
454* Functional differences:
455* PZHEEVX does not promise orthogonality for eigenvectors associated
456* with tighly clustered eigenvalues.
457* PZHEEVX does not reorthogonalize eigenvectors
458* that are on different processes. The extent of reorthogonalization
459* is controlled by the input parameter LWORK.
460*
461* Version 1.4 limitations:
462* DESCA(MB_) = DESCA(NB_)
463* DESCA(M_) = DESCZ(M_)
464* DESCA(N_) = DESCZ(N_)
465* DESCA(MB_) = DESCZ(MB_)
466* DESCA(NB_) = DESCZ(NB_)
467* DESCA(RSRC_) = DESCZ(RSRC_)
468*
469* .. Parameters ..
470 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
471 $ MB_, NB_, RSRC_, CSRC_, LLD_
472 PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
473 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
474 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
475 DOUBLE PRECISION ZERO, ONE, TEN, FIVE
476 PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0, ten = 10.0d+0,
477 $ five = 5.0d+0 )
478 INTEGER IERREIN, IERRCLS, IERRSPC, IERREBZ
479 parameter( ierrein = 1, ierrcls = 2, ierrspc = 4,
480 $ ierrebz = 8 )
481* ..
482* .. Local Scalars ..
483 LOGICAL ALLEIG, INDEIG, LOWER, LQUERY, QUICKRETURN,
484 $ VALEIG, WANTZ
485 CHARACTER ORDER
486 INTEGER ANB, CSRC_A, I, IAROW, ICOFFA, ICTXT, IINFO,
487 $ INDD, INDD2, INDE, INDE2, INDIBL, INDISP,
488 $ indrwork, indtau, indwork, iroffa, iroffz,
489 $ iscale, isizestebz, isizestein, izrow,
490 $ lallwork, liwmin, llrwork, llwork, lrwmin,
491 $ lrwopt, lwmin, lwopt, maxeigs, mb_a, mq0,
492 $ mycol, myrow, nb, nb_a, neig, nhetrd_lwopt, nn,
493 $ nnp, np0, npcol, nprocs, nprow, nps, nq0,
494 $ nsplit, nzz, offset, rsrc_a, rsrc_z, sizeheevx,
495 $ sizestein, sqnpc
496 DOUBLE PRECISION ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
497 $ SIGMA, SMLNUM, VLL, VUU
498* ..
499* .. Local Arrays ..
500 INTEGER IDUM1( 4 ), IDUM2( 4 )
501* ..
502* .. External Functions ..
503 LOGICAL LSAME
504 INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
505 DOUBLE PRECISION PDLAMCH, PZLANHE
506 EXTERNAL lsame, iceil, indxg2p, numroc, pjlaenv,
507 $ pdlamch, pzlanhe
508* ..
509* .. External Subroutines ..
510 EXTERNAL blacs_gridinfo, chk1mat, dgebr2d, dgebs2d,
511 $ dlasrt, dscal, igamn2d, pchk1mat, pchk2mat,
514* ..
515* .. Intrinsic Functions ..
516 INTRINSIC abs, dble, dcmplx, ichar, int, max, min, mod,
517 $ sqrt
518* ..
519* .. Executable Statements ..
520* This is just to keep ftnchek and toolpack/1 happy
521 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
522 $ rsrc_.LT.0 )RETURN
523*
524 quickreturn = ( n.EQ.0 )
525*
526* Test the input arguments.
527*
528 ictxt = desca( ctxt_ )
529 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
530 info = 0
531*
532 wantz = lsame( jobz, 'V' )
533 IF( nprow.EQ.-1 ) THEN
534 info = -( 800+ctxt_ )
535 ELSE IF( wantz ) THEN
536 IF( ictxt.NE.descz( ctxt_ ) ) THEN
537 info = -( 2100+ctxt_ )
538 END IF
539 END IF
540 IF( info.EQ.0 ) THEN
541 CALL chk1mat( n, 4, n, 4, ia, ja, desca, 8, info )
542 IF( wantz )
543 $ CALL chk1mat( n, 4, n, 4, iz, jz, descz, 21, info )
544*
545 IF( info.EQ.0 ) THEN
546*
547* Get machine constants.
548*
549 safmin = pdlamch( ictxt, 'Safe minimum' )
550 eps = pdlamch( ictxt, 'Precision' )
551 smlnum = safmin / eps
552 bignum = one / smlnum
553 rmin = sqrt( smlnum )
554 rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) )
555*
556 nprocs = nprow*npcol
557 lower = lsame( uplo, 'L' )
558 alleig = lsame( range, 'A' )
559 valeig = lsame( range, 'V' )
560 indeig = lsame( range, 'I' )
561*
562* Set up pointers into the WORK array
563*
564 indtau = 1
565 indwork = indtau + n
566 llwork = lwork - indwork + 1
567*
568* Set up pointers into the RWORK array
569*
570 inde = 1
571 indd = inde + n
572 indd2 = indd + n
573 inde2 = indd2 + n
574 indrwork = inde2 + n
575 llrwork = lrwork - indrwork + 1
576*
577* Set up pointers into the IWORK array
578*
579 isizestein = 3*n + nprocs + 1
580 isizestebz = max( 4*n, 14, nprocs )
581 indibl = ( max( isizestein, isizestebz ) ) + 1
582 indisp = indibl + n
583*
584* Compute the total amount of space needed
585*
586 lquery = .false.
587 IF( lwork.EQ.-1 .OR. liwork.EQ.-1 .OR. lrwork.EQ.-1 )
588 $ lquery = .true.
589*
590 nnp = max( n, nprocs+1, 4 )
591 liwmin = 6*nnp
592*
593 nprocs = nprow*npcol
594 nb_a = desca( nb_ )
595 mb_a = desca( mb_ )
596 nb = nb_a
597 nn = max( n, nb, 2 )
598*
599 rsrc_a = desca( rsrc_ )
600 csrc_a = desca( csrc_ )
601 iroffa = mod( ia-1, mb_a )
602 icoffa = mod( ja-1, nb_a )
603 iarow = indxg2p( 1, nb_a, myrow, rsrc_a, nprow )
604 np0 = numroc( n+iroffa, nb, 0, 0, nprow )
605 mq0 = numroc( n+icoffa, nb, 0, 0, npcol )
606 IF( wantz ) THEN
607 rsrc_z = descz( rsrc_ )
608 iroffz = mod( iz-1, mb_a )
609 izrow = indxg2p( 1, nb_a, myrow, rsrc_z, nprow )
610 ELSE
611 iroffz = 0
612 izrow = 0
613 END IF
614*
615 IF( ( .NOT.wantz ) .OR. ( valeig .AND. ( .NOT.lquery ) ) )
616 $ THEN
617 lwmin = n + max( nb*( np0+1 ), 3 )
618 lwopt = lwmin
619 lrwmin = 5*nn + 4*n
620 IF( wantz ) THEN
621 mq0 = numroc( max( n, nb, 2 ), nb, 0, 0, npcol )
622 lrwopt = 4*n + max( 5*nn, np0*mq0 ) +
623 $ iceil( n, nprow*npcol )*nn
624 ELSE
625 lrwopt = lrwmin
626 END IF
627 neig = 0
628 ELSE
629 IF( alleig .OR. valeig ) THEN
630 neig = n
631 ELSE IF( indeig ) THEN
632 neig = iu - il + 1
633 END IF
634 mq0 = numroc( max( neig, nb, 2 ), nb, 0, 0, npcol )
635 nq0 = numroc( nn, nb, 0, 0, npcol )
636 lwmin = n + ( np0+nq0+nb )*nb
637 lrwmin = 4*n + max( 5*nn, np0*mq0 ) +
638 $ iceil( neig, nprow*npcol )*nn
639 lrwopt = lrwmin
640 lwopt = lwmin
641*
642 END IF
643*
644* Compute how much workspace is needed to use the
645* new TRD code
646*
647 anb = pjlaenv( ictxt, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 )
648 sqnpc = int( sqrt( dble( nprow*npcol ) ) )
649 nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
650 nhetrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+2 )*nps
651 lwopt = max( lwopt, n+nhetrd_lwopt )
652*
653 END IF
654 IF( info.EQ.0 ) THEN
655 IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
656 rwork( 1 ) = abstol
657 IF( valeig ) THEN
658 rwork( 2 ) = vl
659 rwork( 3 ) = vu
660 ELSE
661 rwork( 2 ) = zero
662 rwork( 3 ) = zero
663 END IF
664 CALL dgebs2d( ictxt, 'ALL', ' ', 3, 1, rwork, 3 )
665 ELSE
666 CALL dgebr2d( ictxt, 'ALL', ' ', 3, 1, rwork, 3, 0, 0 )
667 END IF
668 IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
669 info = -1
670 ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
671 info = -2
672 ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
673 info = -3
674 ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
675 info = -10
676 ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
677 $ THEN
678 info = -11
679 ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
680 $ THEN
681 info = -12
682 ELSE IF( lwork.LT.lwmin .AND. lwork.NE.-1 ) THEN
683 info = -23
684 ELSE IF( lrwork.LT.lrwmin .AND. lrwork.NE.-1 ) THEN
685 info = -25
686 ELSE IF( liwork.LT.liwmin .AND. liwork.NE.-1 ) THEN
687 info = -27
688 ELSE IF( valeig .AND. ( abs( rwork( 2 )-vl ).GT.five*eps*
689 $ abs( vl ) ) ) THEN
690 info = -9
691 ELSE IF( valeig .AND. ( abs( rwork( 3 )-vu ).GT.five*eps*
692 $ abs( vu ) ) ) THEN
693 info = -10
694 ELSE IF( abs( rwork( 1 )-abstol ).GT.five*eps*
695 $ abs( abstol ) ) THEN
696 info = -13
697 ELSE IF( iroffa.NE.0 ) THEN
698 info = -6
699 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
700 info = -( 800+nb_ )
701 END IF
702 IF( wantz ) THEN
703 IF( iroffa.NE.iroffz ) THEN
704 info = -19
705 ELSE IF( iarow.NE.izrow ) THEN
706 info = -19
707 ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
708 info = -( 2100+m_ )
709 ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
710 info = -( 2100+n_ )
711 ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
712 info = -( 2100+mb_ )
713 ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
714 info = -( 2100+nb_ )
715 ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
716 info = -( 2100+rsrc_ )
717 ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
718 info = -( 2100+csrc_ )
719 ELSE IF( ictxt.NE.descz( ctxt_ ) ) THEN
720 info = -( 2100+ctxt_ )
721 END IF
722 END IF
723 END IF
724 IF( wantz ) THEN
725 idum1( 1 ) = ichar( 'V' )
726 ELSE
727 idum1( 1 ) = ichar( 'N' )
728 END IF
729 idum2( 1 ) = 1
730 IF( lower ) THEN
731 idum1( 2 ) = ichar( 'L' )
732 ELSE
733 idum1( 2 ) = ichar( 'U' )
734 END IF
735 idum2( 2 ) = 2
736 IF( alleig ) THEN
737 idum1( 3 ) = ichar( 'A' )
738 ELSE IF( indeig ) THEN
739 idum1( 3 ) = ichar( 'I' )
740 ELSE
741 idum1( 3 ) = ichar( 'V' )
742 END IF
743 idum2( 3 ) = 3
744 IF( lquery ) THEN
745 idum1( 4 ) = -1
746 ELSE
747 idum1( 4 ) = 1
748 END IF
749 idum2( 4 ) = 4
750 IF( wantz ) THEN
751 CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 8, n, 4, n, 4, iz,
752 $ jz, descz, 21, 4, idum1, idum2, info )
753 ELSE
754 CALL pchk1mat( n, 4, n, 4, ia, ja, desca, 8, 4, idum1,
755 $ idum2, info )
756 END IF
757 work( 1 ) = dcmplx( lwopt )
758 rwork( 1 ) = dble( lrwopt )
759 iwork( 1 ) = liwmin
760 END IF
761*
762 IF( info.NE.0 ) THEN
763 CALL pxerbla( ictxt, 'PZHEEVX', -info )
764 RETURN
765 ELSE IF( lquery ) THEN
766 RETURN
767 END IF
768*
769* Quick return if possible
770*
771 IF( quickreturn ) THEN
772 IF( wantz ) THEN
773 nz = 0
774 iclustr( 1 ) = 0
775 END IF
776 m = 0
777 work( 1 ) = dcmplx( lwopt )
778 rwork( 1 ) = dble( lrwmin )
779 iwork( 1 ) = liwmin
780 RETURN
781 END IF
782*
783* Scale matrix to allowable range, if necessary.
784*
785 abstll = abstol
786 iscale = 0
787 IF( valeig ) THEN
788 vll = vl
789 vuu = vu
790 ELSE
791 vll = zero
792 vuu = zero
793 END IF
794*
795 anrm = pzlanhe( 'M', uplo, n, a, ia, ja, desca,
796 $ rwork( indrwork ) )
797*
798 IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
799 iscale = 1
800 sigma = rmin / anrm
801 anrm = anrm*sigma
802 ELSE IF( anrm.GT.rmax ) THEN
803 iscale = 1
804 sigma = rmax / anrm
805 anrm = anrm*sigma
806 END IF
807*
808 IF( iscale.EQ.1 ) THEN
809 CALL pzlascl( uplo, one, sigma, n, n, a, ia, ja, desca, iinfo )
810 IF( abstol.GT.0 )
811 $ abstll = abstol*sigma
812 IF( valeig ) THEN
813 vll = vl*sigma
814 vuu = vu*sigma
815 IF( vuu.EQ.vll ) THEN
816 vuu = vuu + 2*max( abs( vuu )*eps, safmin )
817 END IF
818 END IF
819 END IF
820*
821* Call PZHENTRD to reduce Hermitian matrix to tridiagonal form.
822*
823 lallwork = llrwork
824*
825 CALL pzhentrd( uplo, n, a, ia, ja, desca, rwork( indd ),
826 $ rwork( inde ), work( indtau ), work( indwork ),
827 $ llwork, rwork( indrwork ), llrwork, iinfo )
828*
829*
830* Copy the values of D, E to all processes
831*
832* Here PxLARED1D is used to redistribute the tridiagonal matrix.
833* PxLARED1D, however, doesn't yet work with arbritary matrix
834* distributions so we have PxELGET as a backup.
835*
836 offset = 0
837 IF( ia.EQ.1 .AND. ja.EQ.1 .AND. rsrc_a.EQ.0 .AND. csrc_a.EQ.0 )
838 $ THEN
839 CALL pdlared1d( n, ia, ja, desca, rwork( indd ),
840 $ rwork( indd2 ), rwork( indrwork ), llrwork )
841*
842 CALL pdlared1d( n, ia, ja, desca, rwork( inde ),
843 $ rwork( inde2 ), rwork( indrwork ), llrwork )
844 IF( .NOT.lower )
845 $ offset = 1
846 ELSE
847 DO 10 i = 1, n
848 CALL pzelget( 'A', ' ', work( indd2+i-1 ), a, i+ia-1,
849 $ i+ja-1, desca )
850 rwork( indd2+i-1 ) = dble( work( indd2+i-1 ) )
851 10 CONTINUE
852 IF( lsame( uplo, 'U' ) ) THEN
853 DO 20 i = 1, n - 1
854 CALL pzelget( 'A', ' ', work( inde2+i-1 ), a, i+ia-1,
855 $ i+ja, desca )
856 rwork( inde2+i-1 ) = dble( work( inde2+i-1 ) )
857 20 CONTINUE
858 ELSE
859 DO 30 i = 1, n - 1
860 CALL pzelget( 'A', ' ', work( inde2+i-1 ), a, i+ia,
861 $ i+ja-1, desca )
862 rwork( inde2+i-1 ) = dble( work( inde2+i-1 ) )
863 30 CONTINUE
864 END IF
865 END IF
866*
867* Call PDSTEBZ and, if eigenvectors are desired, PZSTEIN.
868*
869 IF( wantz ) THEN
870 order = 'B'
871 ELSE
872 order = 'E'
873 END IF
874*
875 CALL pdstebz( ictxt, range, order, n, vll, vuu, il, iu, abstll,
876 $ rwork( indd2 ), rwork( inde2+offset ), m, nsplit, w,
877 $ iwork( indibl ), iwork( indisp ), rwork( indrwork ),
878 $ llrwork, iwork( 1 ), isizestebz, iinfo )
879*
880*
881* IF PDSTEBZ fails, the error propogates to INFO, but
882* we do not propogate the eigenvalue(s) which failed because:
883* 1) This should never happen if the user specifies
884* ABSTOL = 2 * PDLAMCH( 'U' )
885* 2) PDSTEIN will confirm/deny whether the eigenvalues are
886* close enough.
887*
888 IF( iinfo.NE.0 ) THEN
889 info = info + ierrebz
890 DO 40 i = 1, m
891 iwork( indibl+i-1 ) = abs( iwork( indibl+i-1 ) )
892 40 CONTINUE
893 END IF
894 IF( wantz ) THEN
895*
896 IF( valeig ) THEN
897*
898* Compute the maximum number of eigenvalues that we can
899* compute in the
900* workspace that we have, and that we can store in Z.
901*
902* Loop through the possibilities looking for the largest
903* NZ that we can feed to PZSTEIN and PZUNMTR
904*
905* Since all processes must end up with the same value
906* of NZ, we first compute the minimum of LALLWORK
907*
908 CALL igamn2d( ictxt, 'A', ' ', 1, 1, lallwork, 1, 1, 1, -1,
909 $ -1, -1 )
910*
911 maxeigs = descz( n_ )
912*
913 DO 50 nz = min( maxeigs, m ), 0, -1
914 mq0 = numroc( nz, nb, 0, 0, npcol )
915 sizestein = iceil( nz, nprocs )*n + max( 5*n, np0*mq0 )
916 sizeheevx = sizestein
917 IF( sizeheevx.LE.lallwork )
918 $ GO TO 60
919 50 CONTINUE
920 60 CONTINUE
921 ELSE
922 nz = m
923 END IF
924 nz = max( nz, 0 )
925 IF( nz.NE.m ) THEN
926 info = info + ierrspc
927*
928 DO 70 i = 1, m
929 ifail( i ) = 0
930 70 CONTINUE
931*
932* The following code handles a rare special case
933* - NZ .NE. M means that we don't have enough room to store
934* all the vectors.
935* - NSPLIT .GT. 1 means that the matrix split
936* In this case, we cannot simply take the first NZ eigenvalues
937* because PDSTEBZ sorts the eigenvalues by block when
938* a split occurs. So, we have to make another call to
939* PDSTEBZ with a new upper limit - VUU.
940*
941 IF( nsplit.GT.1 ) THEN
942 CALL dlasrt( 'I', m, w, iinfo )
943 nzz = 0
944 IF( nz.GT.0 ) THEN
945*
946 vuu = w( nz ) - ten*( eps*anrm+safmin )
947 IF( vll.GE.vuu ) THEN
948 nzz = 0
949 ELSE
950 CALL pdstebz( ictxt, range, order, n, vll, vuu, il,
951 $ iu, abstll, rwork( indd2 ),
952 $ rwork( inde2+offset ), nzz, nsplit,
953 $ w, iwork( indibl ), iwork( indisp ),
954 $ rwork( indrwork ), llrwork,
955 $ iwork( 1 ), isizestebz, iinfo )
956 END IF
957*
958 IF( mod( info / ierrebz, 1 ).EQ.0 ) THEN
959 IF( nzz.GT.nz .OR. iinfo.NE.0 ) THEN
960 info = info + ierrebz
961 END IF
962 END IF
963 END IF
964 nz = min( nz, nzz )
965*
966 END IF
967 END IF
968 CALL pzstein( n, rwork( indd2 ), rwork( inde2+offset ), nz, w,
969 $ iwork( indibl ), iwork( indisp ), orfac, z, iz,
970 $ jz, descz, rwork( indrwork ), lallwork,
971 $ iwork( 1 ), isizestein, ifail, iclustr, gap,
972 $ iinfo )
973*
974 IF( iinfo.GE.nz+1 )
975 $ info = info + ierrcls
976 IF( mod( iinfo, nz+1 ).NE.0 )
977 $ info = info + ierrein
978*
979* Z = Q * Z
980*
981*
982 IF( nz.GT.0 ) THEN
983 CALL pzunmtr( 'L', uplo, 'N', n, nz, a, ia, ja, desca,
984 $ work( indtau ), z, iz, jz, descz,
985 $ work( indwork ), llwork, iinfo )
986 END IF
987*
988 END IF
989*
990* If matrix was scaled, then rescale eigenvalues appropriately.
991*
992 IF( iscale.EQ.1 ) THEN
993 CALL dscal( m, one / sigma, w, 1 )
994 END IF
995*
996 work( 1 ) = dcmplx( lwopt )
997 rwork( 1 ) = dble( lrwopt )
998 iwork( 1 ) = liwmin
999*
1000 RETURN
1001*
1002* End of PZHEEVX
1003*
1004 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
subroutine pdlared1d(n, ia, ja, desc, bycol, byall, work, lwork)
Definition pdlared1d.f:2
subroutine pdstebz(ictxt, range, order, n, vl, vu, il, iu, abstol, d, e, m, nsplit, w, iblock, isplit, work, lwork, iwork, liwork, info)
Definition pdstebz.f:4
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine pzelget(scope, top, alpha, a, ia, ja, desca)
Definition pzelget.f:2
subroutine pzheevx(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, abstol, m, nz, w, orfac, z, iz, jz, descz, work, lwork, rwork, lrwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pzheevx.f:5
subroutine pzhentrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, rwork, lrwork, info)
Definition pzhentrd.f:3
subroutine pzlascl(type, cfrom, cto, m, n, a, ia, ja, desca, info)
Definition pzlascl.f:3
subroutine pzstein(n, d, e, m, w, iblock, isplit, orfac, z, iz, jz, descz, work, lwork, iwork, liwork, ifail, iclustr, gap, info)
Definition pzstein.f:4
subroutine pzunmtr(side, uplo, trans, m, n, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition pzunmtr.f:3