ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcheevd.f
Go to the documentation of this file.
1  SUBROUTINE pcheevd( JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ,
2  $ DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK,
3  $ LIWORK, INFO )
4 *
5 * -- ScaLAPACK routine (version 1.7) --
6 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7 * and University of California, Berkeley.
8 * March 25, 2002
9 *
10 * .. Scalar Arguments ..
11  CHARACTER JOBZ, UPLO
12  INTEGER IA, INFO, IZ, JA, JZ, LIWORK, LRWORK, LWORK, N
13 * ..
14 * .. Array Arguments ..
15  INTEGER DESCA( * ), DESCZ( * ), IWORK( * )
16  REAL RWORK( * ), W( * )
17  COMPLEX A( * ), WORK( * ), Z( * )
18 * ..
19 *
20 * Purpose
21 * =======
22 *
23 * PCHEEVD computes all the eigenvalues and eigenvectors of a Hermitian
24 * matrix A by using a divide and conquer algorithm.
25 *
26 * Arguments
27 * =========
28 *
29 * NP = the number of rows local to a given process.
30 * NQ = the number of columns local to a given process.
31 *
32 * JOBZ (input) CHARACTER*1
33 * = 'N': Compute eigenvalues only; (NOT IMPLEMENTED YET)
34 * = 'V': Compute eigenvalues and eigenvectors.
35 *
36 * UPLO (global input) CHARACTER*1
37 * Specifies whether the upper or lower triangular part of the
38 * symmetric matrix A is stored:
39 * = 'U': Upper triangular
40 * = 'L': Lower triangular
41 *
42 * N (global input) INTEGER
43 * The number of rows and columns of the matrix A. N >= 0.
44 *
45 * A (local input/workspace) block cyclic COMPLEX array,
46 * global dimension (N, N), local dimension ( LLD_A,
47 * LOCc(JA+N-1) )
48 *
49 * On entry, the symmetric matrix A. If UPLO = 'U', only the
50 * upper triangular part of A is used to define the elements of
51 * the symmetric matrix. If UPLO = 'L', only the lower
52 * triangular part of A is used to define the elements of the
53 * symmetric matrix.
54 *
55 * On exit, the lower triangle (if UPLO='L') or the upper
56 * triangle (if UPLO='U') of A, including the diagonal, is
57 * destroyed.
58 *
59 * IA (global input) INTEGER
60 * A's global row index, which points to the beginning of the
61 * submatrix which is to be operated on.
62 *
63 * JA (global input) INTEGER
64 * A's global column index, which points to the beginning of
65 * the submatrix which is to be operated on.
66 *
67 * DESCA (global and local input) INTEGER array of dimension DLEN_.
68 * The array descriptor for the distributed matrix A.
69 * If DESCA( CTXT_ ) is incorrect, PCHEEVD cannot guarantee
70 * correct error reporting.
71 *
72 * W (global output) REAL array, dimension (N)
73 * If INFO=0, the eigenvalues in ascending order.
74 *
75 * Z (local output) COMPLEX array,
76 * global dimension (N, N),
77 * local dimension ( LLD_Z, LOCc(JZ+N-1) )
78 * Z contains the orthonormal eigenvectors of the matrix A.
79 *
80 * IZ (global input) INTEGER
81 * Z's global row index, which points to the beginning of the
82 * submatrix which is to be operated on.
83 *
84 * JZ (global input) INTEGER
85 * Z's global column index, which points to the beginning of
86 * the submatrix which is to be operated on.
87 *
88 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
89 * The array descriptor for the distributed matrix Z.
90 * DESCZ( CTXT_ ) must equal DESCA( CTXT_ )
91 *
92 * WORK (local workspace/output) COMPLEX array,
93 * dimension (LWORK)
94 * On output, WORK(1) returns the workspace needed for the
95 * computation.
96 *
97 * LWORK (local input) INTEGER
98 * If eigenvectors are requested:
99 * LWORK = N + ( NP0 + MQ0 + NB ) * NB,
100 * with NP0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPROW )
101 * MQ0 = NUMROC( MAX( N, NB, 2 ), NB, 0, 0, NPCOL )
102 *
103 * If LWORK = -1, then LWORK is global input and a workspace
104 * query is assumed; the routine calculates the size for all
105 * work arrays. Each of these values is returned in the first
106 * entry of the corresponding work array, and no error message
107 * is issued by PXERBLA.
108 *
109 * RWORK (local workspace/output) REAL array,
110 * dimension (LRWORK)
111 * On output RWORK(1) returns the real workspace needed to
112 * guarantee completion. If the input parameters are incorrect,
113 * RWORK(1) may also be incorrect.
114 *
115 * LRWORK (local input) INTEGER
116 * Size of RWORK array.
117 * LRWORK >= 1 + 9*N + 3*NP*NQ,
118 * NP = NUMROC( N, NB, MYROW, IAROW, NPROW )
119 * NQ = NUMROC( N, NB, MYCOL, IACOL, NPCOL )
120 *
121 * IWORK (local workspace/output) INTEGER array, dimension (LIWORK)
122 * On output IWORK(1) returns the integer workspace needed.
123 *
124 * LIWORK (input) INTEGER
125 * The dimension of the array IWORK.
126 * LIWORK = 7*N + 8*NPCOL + 2
127 *
128 * INFO (global output) INTEGER
129 * = 0: successful exit
130 * < 0: If the i-th argument is an array and the j-entry had
131 * an illegal value, then INFO = -(i*100+j), if the i-th
132 * argument is a scalar and had an illegal value, then
133 * INFO = -i.
134 * > 0: If INFO = 1 through N, the i(th) eigenvalue did not
135 * converge in PSLAED3.
136 *
137 * Alignment requirements
138 * ======================
139 *
140 * The distributed submatrices sub( A ), sub( Z ) must verify
141 * some alignment properties, namely the following expression
142 * should be true:
143 * ( MB_A.EQ.NB_A.EQ.MB_Z.EQ.NB_Z .AND. IROFFA.EQ.ICOFFA .AND.
144 * IROFFA.EQ.0 .AND.IROFFA.EQ.IROFFZ. AND. IAROW.EQ.IZROW)
145 * with IROFFA = MOD( IA-1, MB_A )
146 * and ICOFFA = MOD( JA-1, NB_A ).
147 *
148 * Further Details
149 * ======= =======
150 *
151 * Contributed by Francoise Tisseur, University of Manchester.
152 *
153 * Reference: F. Tisseur and J. Dongarra, "A Parallel Divide and
154 * Conquer Algorithm for the Symmetric Eigenvalue Problem
155 * on Distributed Memory Architectures",
156 * SIAM J. Sci. Comput., 6:20 (1999), pp. 2223--2236.
157 * (see also LAPACK Working Note 132)
158 * http://www.netlib.org/lapack/lawns/lawn132.ps
159 *
160 * =====================================================================
161 *
162 * .. Parameters ..
163  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
164  $ MB_, NB_, RSRC_, CSRC_, LLD_
165  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
166  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
167  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
168  REAL ZERO, ONE
169  PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0 )
170  COMPLEX CZERO, CONE
171  parameter( czero = ( 0.0e+0, 0.0e+0 ),
172  $ cone = ( 1.0e+0, 0.0e+0 ) )
173 * ..
174 * .. Local Scalars ..
175  LOGICAL LOWER, LQUERY
176  INTEGER CSRC_A, I, IACOL, IAROW, ICOFFA, IINFO, IIZ,
177  $ indd, inde, inde2, indrwork, indtau, indwork,
178  $ indz, ipr, ipz, iroffa, iroffz, iscale, izcol,
179  $ izrow, j, jjz, ldr, ldz, liwmin, llrwork,
180  $ llwork, lrwmin, lwmin, mb_a, mycol, myrow, nb,
181  $ nb_a, nn, np0, npcol, nprow, nq, nq0, offset,
182  $ rsrc_a
183  REAL ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
184  $ SMLNUM
185 * ..
186 * .. Local Arrays ..
187  INTEGER DESCRZ( 9 ), IDUM1( 2 ), IDUM2( 2 )
188 * ..
189 * .. External Functions ..
190  LOGICAL LSAME
191  INTEGER INDXG2L, INDXG2P, NUMROC
192  REAL PCLANHE, PSLAMCH
193  EXTERNAL lsame, indxg2l, indxg2p, numroc, pclanhe,
194  $ pslamch
195 * ..
196 * .. External Subroutines ..
197  EXTERNAL blacs_gridinfo, chk1mat, descinit, infog2l,
200  $ sscal
201 * ..
202 * .. Intrinsic Functions ..
203  INTRINSIC cmplx, ichar, max, min, mod, real, sqrt
204 * ..
205 * .. Executable Statements ..
206 * This is just to keep ftnchek and toolpack/1 happy
207  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
208  $ rsrc_.LT.0 )RETURN
209 *
210  info = 0
211 *
212 * Quick return
213 *
214  IF( n.EQ.0 )
215  $ RETURN
216 *
217 * Test the input arguments.
218 *
219  ictxt = desca( ctxt_ )
220  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
221 *
222  IF( nprow.EQ.-1 ) THEN
223  info = -( 700+ctxt_ )
224  ELSE
225  CALL chk1mat( n, 3, n, 3, ia, ja, desca, 7, info )
226  CALL chk1mat( n, 3, n, 3, iz, jz, descz, 12, info )
227  IF( info.EQ.0 ) THEN
228  lower = lsame( uplo, 'L' )
229  nb_a = desca( nb_ )
230  mb_a = desca( mb_ )
231  nb = nb_a
232  rsrc_a = desca( rsrc_ )
233  csrc_a = desca( csrc_ )
234  iroffa = mod( ia-1, mb_a )
235  icoffa = mod( ja-1, nb_a )
236  iarow = indxg2p( ia, nb_a, myrow, rsrc_a, nprow )
237  iacol = indxg2p( ja, mb_a, mycol, csrc_a, npcol )
238  np0 = numroc( n, nb, myrow, iarow, nprow )
239  nq0 = numroc( n, nb, mycol, iacol, npcol )
240  iroffz = mod( iz-1, mb_a )
241  CALL infog2l( iz, jz, descz, nprow, npcol, myrow, mycol,
242  $ iiz, jjz, izrow, izcol )
243  lquery = ( lwork.EQ.-1 .OR. liwork.EQ.-1 .OR. lrwork.EQ.-1 )
244 *
245 * Compute the total amount of space needed
246 *
247  nn = max( n, nb, 2 )
248  nq = numroc( nn, nb, 0, 0, npcol )
249  lwmin = n + ( np0+nq+nb )*nb
250  lrwmin = 1 + 9*n + 3*np0*nq0
251  liwmin = 7*n + 8*npcol + 2
252  work( 1 ) = cmplx( lwmin )
253  rwork( 1 ) = real( lrwmin )
254  iwork( 1 ) = liwmin
255  IF( .NOT.lsame( jobz, 'V' ) ) THEN
256  info = -1
257  ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
258  info = -2
259  ELSE IF( lwork.LT.lwmin .AND. lwork.NE.-1 ) THEN
260  info = -14
261  ELSE IF( lrwork.LT.lrwmin .AND. lrwork.NE.-1 ) THEN
262  info = -16
263  ELSE IF( iroffa.NE.0 ) THEN
264  info = -4
265  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
266  info = -( 700+nb_ )
267  ELSE IF( iroffa.NE.iroffz ) THEN
268  info = -10
269  ELSE IF( iarow.NE.izrow ) THEN
270  info = -10
271  ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
272  info = -( 1200+m_ )
273  ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
274  info = -( 1200+n_ )
275  ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
276  info = -( 1200+mb_ )
277  ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
278  info = -( 1200+nb_ )
279  ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
280  info = -( 1200+rsrc_ )
281  ELSE IF( desca( ctxt_ ).NE.descz( ctxt_ ) ) THEN
282  info = -( 1200+ctxt_ )
283  END IF
284  END IF
285  IF( lower ) THEN
286  idum1( 1 ) = ichar( 'L' )
287  ELSE
288  idum1( 1 ) = ichar( 'U' )
289  END IF
290  idum2( 1 ) = 2
291  IF( lwork.EQ.-1 ) THEN
292  idum1( 2 ) = -1
293  ELSE
294  idum1( 2 ) = 1
295  END IF
296  idum2( 2 ) = 14
297  CALL pchk2mat( n, 3, n, 3, ia, ja, desca, 7, n, 3, n, 3, iz,
298  $ jz, descz, 12, 2, idum1, idum2, info )
299  END IF
300 *
301  IF( info.NE.0 ) THEN
302  CALL pxerbla( desca( ctxt_ ), 'PCHEEVD', -info )
303  RETURN
304  ELSE IF( lquery ) THEN
305  RETURN
306  END IF
307 *
308 * Get machine constants.
309 *
310  safmin = pslamch( desca( ctxt_ ), 'Safe minimum' )
311  eps = pslamch( desca( ctxt_ ), 'Precision' )
312  smlnum = safmin / eps
313  bignum = one / smlnum
314  rmin = sqrt( smlnum )
315  rmax = min( sqrt( bignum ), one / sqrt( sqrt( safmin ) ) )
316 *
317 * Set up pointers into the WORK array
318 *
319  indtau = 1
320  indwork = indtau + n
321  llwork = lwork - indwork + 1
322 *
323 * Set up pointers into the RWORK array
324 *
325  inde = 1
326  indd = inde + n
327  inde2 = indd + n
328  indrwork = inde2 + n
329  llrwork = lrwork - indrwork + 1
330 *
331 * Scale matrix to allowable range, if necessary.
332 *
333  iscale = 0
334 *
335  anrm = pclanhe( 'M', uplo, n, a, ia, ja, desca,
336  $ rwork( indrwork ) )
337 *
338 *
339  IF( anrm.GT.zero .AND. anrm.LT.rmin ) THEN
340  iscale = 1
341  sigma = rmin / anrm
342  ELSE IF( anrm.GT.rmax ) THEN
343  iscale = 1
344  sigma = rmax / anrm
345  END IF
346 *
347  IF( iscale.EQ.1 ) THEN
348  CALL pclascl( uplo, one, sigma, n, n, a, ia, ja, desca, iinfo )
349  END IF
350 *
351 * Reduce Hermitian matrix to tridiagonal form.
352 *
353  CALL pchetrd( uplo, n, a, ia, ja, desca, rwork( indd ),
354  $ rwork( inde2 ), work( indtau ), work( indwork ),
355  $ llwork, iinfo )
356 *
357 * Copy the values of D, E to all processes
358 *
359 * Here PxLARED1D is used to redistribute the tridiagonal matrix.
360 * PxLARED1D, however, doesn't yet work with arbritary matrix
361 * distributions so we have PxELGET as a backup.
362 *
363  offset = 0
364  IF( ia.EQ.1 .AND. ja.EQ.1 .AND. rsrc_a.EQ.0 .AND. csrc_a.EQ.0 )
365  $ THEN
366  CALL pslared1d( n, ia, ja, desca, rwork( indd ), w,
367  $ rwork( indrwork ), llrwork )
368 *
369  CALL pslared1d( n, ia, ja, desca, rwork( inde2 ),
370  $ rwork( inde ), rwork( indrwork ), llrwork )
371  IF( .NOT.lower )
372  $ offset = 1
373  ELSE
374  DO 10 i = 1, n
375  CALL pcelget( 'A', ' ', work( indwork ), a, i+ia-1, i+ja-1,
376  $ desca )
377  w( i ) = real( work( indwork ) )
378  10 CONTINUE
379  IF( lsame( uplo, 'U' ) ) THEN
380  DO 20 i = 1, n - 1
381  CALL pcelget( 'A', ' ', work( indwork ), a, i+ia-1, i+ja,
382  $ desca )
383  rwork( inde+i-1 ) = real( work( indwork ) )
384  20 CONTINUE
385  ELSE
386  DO 30 i = 1, n - 1
387  CALL pcelget( 'A', ' ', work( indwork ), a, i+ia, i+ja-1,
388  $ desca )
389  rwork( inde+i-1 ) = real( work( indwork ) )
390  30 CONTINUE
391  END IF
392  END IF
393 *
394 * Call PSSTEDC to compute eigenvalues and eigenvectors.
395 *
396  indz = inde + n
397  indrwork = indz + np0*nq0
398  llrwork = lrwork - indrwork + 1
399  ldr = max( 1, np0 )
400  CALL descinit( descrz, descz( m_ ), descz( n_ ), descz( mb_ ),
401  $ descz( nb_ ), descz( rsrc_ ), descz( csrc_ ),
402  $ descz( ctxt_ ), ldr, info )
403  CALL pclaset( 'Full', n, n, czero, cone, z, iz, jz, descz )
404  CALL pslaset( 'Full', n, n, zero, one, rwork( indz ), 1, 1,
405  $ descrz )
406  CALL psstedc( 'I', n, w, rwork( inde+offset ), rwork( indz ), iz,
407  $ jz, descrz, rwork( indrwork ), llrwork, iwork,
408  $ liwork, iinfo )
409 *
410  ldz = descz( lld_ )
411  ldr = descrz( lld_ )
412  iiz = indxg2l( iz, nb, myrow, myrow, nprow )
413  jjz = indxg2l( jz, nb, mycol, mycol, npcol )
414  ipz = iiz + ( jjz-1 )*ldz
415  ipr = indz - 1 + iiz + ( jjz-1 )*ldr
416  DO 50 j = 0, nq0 - 1
417  DO 40 i = 0, np0 - 1
418  z( ipz+i+j*ldz ) = rwork( ipr+i+j*ldr )
419  40 CONTINUE
420  50 CONTINUE
421 *
422 * Z = Q * Z
423 *
424  CALL pcunmtr( 'L', uplo, 'N', n, n, a, ia, ja, desca,
425  $ work( indtau ), z, iz, jz, descz, work( indwork ),
426  $ llwork, iinfo )
427 *
428 * If matrix was scaled, then rescale eigenvalues appropriately.
429 *
430  IF( iscale.EQ.1 ) THEN
431  CALL sscal( n, one / sigma, w, 1 )
432  END IF
433 *
434  work( 1 ) = cmplx( lwmin )
435  rwork( 1 ) = real( lrwmin )
436  iwork( 1 ) = liwmin
437 *
438  RETURN
439 *
440 * End of PCHEEVD
441 *
442  END
cmplx
float cmplx[2]
Definition: pblas.h:132
max
#define max(A, B)
Definition: pcgemr.c:180
psstedc
subroutine psstedc(COMPZ, N, D, E, Q, IQ, JQ, DESCQ, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: psstedc.f:3
pcheevd
subroutine pcheevd(JOBZ, UPLO, N, A, IA, JA, DESCA, W, Z, IZ, JZ, DESCZ, WORK, LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO)
Definition: pcheevd.f:4
pcelget
subroutine pcelget(SCOPE, TOP, ALPHA, A, IA, JA, DESCA)
Definition: pcelget.f:2
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
pslared1d
subroutine pslared1d(N, IA, JA, DESC, BYCOL, BYALL, WORK, LWORK)
Definition: pslared1d.f:2
descinit
subroutine descinit(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD, INFO)
Definition: descinit.f:3
pclaset
subroutine pclaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: pcblastst.f:7508
pcunmtr
subroutine pcunmtr(SIDE, UPLO, TRANS, M, N, A, IA, JA, DESCA, TAU, C, IC, JC, DESCC, WORK, LWORK, INFO)
Definition: pcunmtr.f:3
pchetrd
subroutine pchetrd(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, INFO)
Definition: pchetrd.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
pslaset
subroutine pslaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: psblastst.f:6863
pclascl
subroutine pclascl(TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, INFO)
Definition: pclascl.f:3
min
#define min(A, B)
Definition: pcgemr.c:181