SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pssyevr.f
Go to the documentation of this file.
1 SUBROUTINE pssyevr( JOBZ, RANGE, UPLO, N, A, IA, JA,
2 $ DESCA, VL, VU, IL, IU, M, NZ, W, Z, IZ,
3 $ JZ, DESCZ, WORK, LWORK, IWORK, LIWORK,
4 $ INFO )
5
6 IMPLICIT NONE
7*
8* -- ScaLAPACK routine (version 2.0.2) --
9* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
10* May 1 2012
11*
12* .. Scalar Arguments ..
13 CHARACTER JOBZ, RANGE, UPLO
14 INTEGER IA, IL, INFO, IU, IZ, JA, JZ, LIWORK, LWORK, M,
15 $ N, NZ
16 REAL VL, VU
17* ..
18* .. Array Arguments ..
19 INTEGER DESCA( * ), DESCZ( * ), IWORK( * )
20 REAL A( * ), W( * ), WORK( * ), Z( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PSSYEVR computes selected eigenvalues and, optionally, eigenvectors
27* of a real symmetric matrix A distributed in 2D blockcyclic format
28* by calling the recommended sequence of ScaLAPACK routines.
29*
30* First, the matrix A is reduced to real symmetric tridiagonal form.
31* Then, the eigenproblem is solved using the parallel MRRR algorithm.
32* Last, if eigenvectors have been computed, a backtransformation is done.
33*
34* Upon successful completion, each processor stores a copy of all computed
35* eigenvalues in W. The eigenvector matrix Z is stored in
36* 2D blockcyclic format distributed over all processors.
37*
38* Note that subsets of eigenvalues/vectors can be selected by
39* specifying a range of values or a range of indices for the desired
40* eigenvalues.
41*
42* For constructive feedback and comments, please contact cvoemel@lbl.gov
43* C. Voemel
44*
45* Arguments
46* =========
47*
48* JOBZ (global input) CHARACTER*1
49* Specifies whether or not to compute the eigenvectors:
50* = 'N': Compute eigenvalues only.
51* = 'V': Compute eigenvalues and eigenvectors.
52*
53* RANGE (global input) CHARACTER*1
54* = 'A': all eigenvalues will be found.
55* = 'V': all eigenvalues in the interval [VL,VU] will be found.
56* = 'I': the IL-th through IU-th eigenvalues will be found.
57*
58* UPLO (global input) CHARACTER*1
59* Specifies whether the upper or lower triangular part of the
60* symmetric matrix A is stored:
61* = 'U': Upper triangular
62* = 'L': Lower triangular
63*
64* N (global input) INTEGER
65* The number of rows and columns of the matrix A. N >= 0
66*
67* A (local input/workspace) 2D block cyclic REAL array,
68* global dimension (N, N),
69* local dimension ( LLD_A, LOCc(JA+N-1) ),
70* (see Notes below for more detailed explanation of 2d arrays)
71*
72* On entry, the symmetric matrix A. If UPLO = 'U', only the
73* upper triangular part of A is used to define the elements of
74* the symmetric matrix. If UPLO = 'L', only the lower
75* triangular part of A is used to define the elements of the
76* symmetric matrix.
77*
78* On exit, the lower triangle (if UPLO='L') or the upper
79* triangle (if UPLO='U') of A, including the diagonal, is
80* destroyed.
81*
82* IA (global input) INTEGER
83* A's global row index, which points to the beginning of the
84* submatrix which is to be operated on.
85* It should be set to 1 when operating on a full matrix.
86*
87* JA (global input) INTEGER
88* A's global column index, which points to the beginning of
89* the submatrix which is to be operated on.
90* It should be set to 1 when operating on a full matrix.
91*
92* DESCA (global and local input) INTEGER array of dimension DLEN=9.
93* The array descriptor for the distributed matrix A.
94* The descriptor stores details about the 2D block-cyclic
95* storage, see the notes below.
96* If DESCA is incorrect, PSSYEVR cannot guarantee
97* correct error reporting.
98* Also note the array alignment requirements specified below.
99*
100* VL (global input) REAL
101* If RANGE='V', the lower bound of the interval to be searched
102* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
103*
104* VU (global input) REAL
105* If RANGE='V', the upper bound of the interval to be searched
106* for eigenvalues. Not referenced if RANGE = 'A' or 'I'.
107*
108* IL (global input) INTEGER
109* If RANGE='I', the index (from smallest to largest) of the
110* smallest eigenvalue to be returned. IL >= 1.
111* Not referenced if RANGE = 'A'.
112*
113* IU (global input) INTEGER
114* If RANGE='I', the index (from smallest to largest) of the
115* largest eigenvalue to be returned. min(IL,N) <= IU <= N.
116* Not referenced if RANGE = 'A'.
117*
118* M (global output) INTEGER
119* Total number of eigenvalues found. 0 <= M <= N.
120*
121* NZ (global output) INTEGER
122* Total number of eigenvectors computed. 0 <= NZ <= M.
123* The number of columns of Z that are filled.
124* If JOBZ .NE. 'V', NZ is not referenced.
125* If JOBZ .EQ. 'V', NZ = M
126*
127* W (global output) REAL array, dimension (N)
128* Upon successful exit, the first M entries contain the selected
129* eigenvalues in ascending order.
130*
131* Z (local output) REAL array,
132* global dimension (N, N),
133* local dimension ( LLD_Z, LOCc(JZ+N-1) )
134* (see Notes below for more detailed explanation of 2d arrays)
135* If JOBZ = 'V', then on normal exit the first M columns of Z
136* contain the orthonormal eigenvectors of the matrix
137* corresponding to the selected eigenvalues.
138* If JOBZ = 'N', then Z is not referenced.
139*
140* IZ (global input) INTEGER
141* Z's global row index, which points to the beginning of the
142* submatrix which is to be operated on.
143* It should be set to 1 when operating on a full matrix.
144*
145* JZ (global input) INTEGER
146* Z's global column index, which points to the beginning of
147* the submatrix which is to be operated on.
148* It should be set to 1 when operating on a full matrix.
149*
150* DESCZ (global and local input) INTEGER array of dimension DLEN_.
151* The array descriptor for the distributed matrix Z.
152* The context DESCZ( CTXT_ ) must equal DESCA( CTXT_ ).
153* Also note the array alignment requirements specified below.
154*
155* WORK (local workspace/output) REAL array,
156* dimension (LWORK)
157* On return, WORK(1) contains the optimal amount of
158* workspace required for efficient execution.
159* if JOBZ='N' WORK(1) = optimal amount of workspace
160* required to compute the eigenvalues.
161* if JOBZ='V' WORK(1) = optimal amount of workspace
162* required to compute eigenvalues and eigenvectors.
163*
164* LWORK (local input) INTEGER
165* Size of WORK, must be at least 3.
166* See below for definitions of variables used to define LWORK.
167* If no eigenvectors are requested (JOBZ = 'N') then
168* LWORK >= 2 + 5*N + MAX( 12 * NN, NB * ( NP0 + 1 ) )
169* If eigenvectors are requested (JOBZ = 'V' ) then
170* the amount of workspace required is:
171* LWORK >= 2 + 5*N + MAX( 18*NN, NP0 * MQ0 + 2 * NB * NB ) +
172* (2 + ICEIL( NEIG, NPROW*NPCOL))*NN
173*
174* Variable definitions:
175* NEIG = number of eigenvectors requested
176* NB = DESCA( MB_ ) = DESCA( NB_ ) =
177* DESCZ( MB_ ) = DESCZ( NB_ )
178* NN = MAX( N, NB, 2 )
179* DESCA( RSRC_ ) = DESCA( NB_ ) = DESCZ( RSRC_ ) =
180* DESCZ( CSRC_ ) = 0
181* NP0 = NUMROC( NN, NB, 0, 0, NPROW )
182* MQ0 = NUMROC( MAX( NEIG, NB, 2 ), NB, 0, 0, NPCOL )
183* ICEIL( X, Y ) is a ScaLAPACK function returning
184* ceiling(X/Y)
185*
186* If LWORK = -1, then LWORK is global input and a workspace
187* query is assumed; the routine only calculates the size
188* required for optimal performance for all work arrays. Each of
189* these values is returned in the first entry of the
190* corresponding work arrays, and no error message is issued by
191* PXERBLA.
192* Note that in a workspace query, for performance the optimal
193* workspace LWOPT is returned rather than the minimum necessary
194* WORKSPACE LWMIN. For very small matrices, LWOPT >> LWMIN.
195*
196* IWORK (local workspace) INTEGER array
197* On return, IWORK(1) contains the amount of integer workspace
198* required.
199*
200* LIWORK (local input) INTEGER
201* size of IWORK
202*
203* Let NNP = MAX( N, NPROW*NPCOL + 1, 4 ). Then:
204* LIWORK >= 12*NNP + 2*N when the eigenvectors are desired
205* LIWORK >= 10*NNP + 2*N when only the eigenvalues have to be computed
206*
207* If LIWORK = -1, then LIWORK is global input and a workspace
208* query is assumed; the routine only calculates the minimum
209* and optimal size for all work arrays. Each of these
210* values is returned in the first entry of the corresponding
211* work array, and no error message is issued by PXERBLA.
212*
213* INFO (global output) INTEGER
214* = 0: successful exit
215* < 0: If the i-th argument is an array and the j-entry had
216* an illegal value, then INFO = -(i*100+j), if the i-th
217* argument is a scalar and had an illegal value, then
218* INFO = -i.
219*
220* Notes
221* =====
222*
223* Each global data object is described by an associated description
224* vector. This vector stores the information required to establish
225* the mapping between an object element and its corresponding process
226* and memory location.
227*
228* Let A be a generic term for any 2D block cyclicly distributed array.
229* Such a global array has an associated description vector DESCA,
230* or DESCZ for the descriptor of Z, etc.
231* The length of a ScaLAPACK descriptor is nine.
232* In the following comments, the character _ should be read as
233* "of the global array".
234*
235* NOTATION STORED IN EXPLANATION
236* --------------- -------------- --------------------------------------
237* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
238* DTYPE_A = 1.
239* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
240* the BLACS process grid A is distribu-
241* ted over. The context itself is glo-
242* bal, but the handle (the integer
243* value) may vary.
244* M_A (global) DESCA( M_ ) The number of rows in the global
245* array A.
246* N_A (global) DESCA( N_ ) The number of columns in the global
247* array A.
248* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
249* the rows of the array.
250* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
251* the columns of the array.
252* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
253* row of the array A is distributed.
254* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
255* first column of the array A is
256* distributed.
257* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
258* array. LLD_A >= MAX(1,LOCr(M_A)).
259*
260* Let K be the number of rows or columns of a distributed matrix,
261* and assume that its process grid has dimension p x q.
262* LOCr( K ) denotes the number of elements of K that a process
263* would receive if K were distributed over the p processes of its
264* process column.
265* Similarly, LOCc( K ) denotes the number of elements of K that a
266* process would receive if K were distributed over the q processes of
267* its process row.
268* The values of LOCr() and LOCc() may be determined via a call to the
269* ScaLAPACK tool function, NUMROC:
270* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
271* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
272* An upper bound for these quantities may be computed by:
273* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
274* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
275*
276* PSSYEVR assumes IEEE 754 standard compliant arithmetic.
277*
278* Alignment requirements
279* ======================
280*
281* The distributed submatrices A(IA:*, JA:*) and Z(IZ:IZ+M-1,JZ:JZ+N-1)
282* must satisfy the following alignment properties:
283*
284* 1.Identical (quadratic) dimension:
285* DESCA(M_) = DESCZ(M_) = DESCA(N_) = DESCZ(N_)
286* 2.Quadratic conformal blocking:
287* DESCA(MB_) = DESCA(NB_) = DESCZ(MB_) = DESCZ(NB_)
288* DESCA(RSRC_) = DESCZ(RSRC_)
289* 3.MOD( IA-1, MB_A ) = MOD( IZ-1, MB_Z ) = 0
290* 4.IAROW = IZROW
291*
292*
293* .. Parameters ..
294 INTEGER CTXT_, M_, N_,
295 $ MB_, NB_, RSRC_, CSRC_
296 PARAMETER ( CTXT_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
297 $ rsrc_ = 7, csrc_ = 8 )
298 REAL ZERO
299 parameter( zero = 0.0e0 )
300* ..
301* .. Local Scalars ..
302 LOGICAL ALLEIG, COLBRT, DOBCST, FINISH, FIRST, INDEIG,
303 $ LOWER, LQUERY, VALEIG, VSTART, WANTZ
304 INTEGER ANB, DOL, DOU, DSTCOL, DSTROW, EIGCNT, FRSTCL,
305 $ i, iarow, ictxt, iil, iinderr, iindwlc, iinfo,
306 $ iiu, im, indd, indd2, inde, inde2, inderr,
307 $ indilu, indrw, indtau, indwlc, indwork, ipil,
308 $ ipiu, iproc, izrow, lastcl, lengthi, lengthi2,
309 $ liwmin, llwork, lwmin, lwopt, maxcls, mq00,
310 $ mycol, myil, myiu, myproc, myrow, mz, nb,
311 $ ndepth, needil, neediu, nnp, np00, npcol,
312 $ nprocs, nprow, nps, nsplit, nsytrd_lwopt,
313 $ offset, parity, rlengthi, rlengthi2, rstarti,
314 $ size1, size2, sqnpc, srccol, srcrow, starti,
315 $ zoffset
316
317 REAL PIVMIN, SAFMIN, SCALE, VLL, VUU, WL,
318 $ WU
319*
320* .. Local Arrays ..
321 INTEGER IDUM1( 4 ), IDUM2( 4 )
322* ..
323* .. External Functions ..
324 LOGICAL LSAME
325 INTEGER ICEIL, INDXG2P, NUMROC, PJLAENV
326 REAL PSLAMCH
327 EXTERNAL iceil, indxg2p, lsame, numroc, pjlaenv,
328 $ pslamch
329* ..
330* .. External Subroutines ..
331 EXTERNAL blacs_gridinfo, chk1mat, igebr2d, igebs2d,
332 $ igerv2d, igesd2d, igsum2d, pchk1mat, pchk2mat,
334 $ pssyntrd, pxerbla, scopy, sgebr2d, sgebs2d,
335 $ sgerv2d, sgesd2d, slarrc, slasrt2,
337* ..
338* .. Intrinsic Functions ..
339 INTRINSIC abs, real, ichar, int, max, min, mod, sqrt
340* ..
341* .. Executable Statements ..
342*
343
344
345 info = 0
346***********************************************************************
347*
348* Decode character arguments to find out what the code should do
349*
350***********************************************************************
351 wantz = lsame( jobz, 'V' )
352 lower = lsame( uplo, 'L' )
353 alleig = lsame( range, 'A' )
354 valeig = lsame( range, 'V' )
355 indeig = lsame( range, 'I' )
356 lquery = ( lwork.EQ.-1 .OR. liwork.EQ.-1 )
357
358***********************************************************************
359*
360* GET MACHINE PARAMETERS
361*
362***********************************************************************
363 ictxt = desca( ctxt_ )
364 safmin = pslamch( ictxt, 'Safe minimum' )
365
366***********************************************************************
367*
368* Set up pointers into the WORK array
369*
370***********************************************************************
371 indtau = 1
372 indd = indtau + n
373 inde = indd + n + 1
374 indd2 = inde + n + 1
375 inde2 = indd2 + n
376 indwork = inde2 + n
377 llwork = lwork - indwork + 1
378
379***********************************************************************
380*
381* BLACS PROCESSOR GRID SETUP
382*
383***********************************************************************
384 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
385
386
387 nprocs = nprow * npcol
388 myproc = myrow * npcol + mycol
389 IF( nprow.EQ.-1 ) THEN
390 info = -( 800+ctxt_ )
391 ELSE IF( wantz ) THEN
392 IF( ictxt.NE.descz( ctxt_ ) ) THEN
393 info = -( 2100+ctxt_ )
394 END IF
395 END IF
396
397***********************************************************************
398*
399* COMPUTE REAL WORKSPACE
400*
401***********************************************************************
402 IF ( alleig ) THEN
403 mz = n
404 ELSE IF ( indeig ) THEN
405 mz = iu - il + 1
406 ELSE
407* Take upper bound for VALEIG case
408 mz = n
409 END IF
410*
411 nb = desca( nb_ )
412 IF ( wantz ) THEN
413 np00 = numroc( n, nb, 0, 0, nprow )
414 mq00 = numroc( mz, nb, 0, 0, npcol )
415 indrw = indwork + max(18*n, np00*mq00 + 2*nb*nb)
416 lwmin = indrw - 1 + (iceil(mz, nprocs) + 2)*n
417 ELSE
418 indrw = indwork + 12*n
419 lwmin = indrw - 1
420 END IF
421* The code that validates the input requires 3 workspace entries
422 lwmin = max(3, lwmin)
423 lwopt = lwmin
424 anb = pjlaenv( ictxt, 3, 'PSSYTTRD', 'L', 0, 0, 0, 0 )
425 sqnpc = int( sqrt( real( nprocs ) ) )
426 nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
427 nsytrd_lwopt = 2*( anb+1 )*( 4*nps+2 ) + ( nps+4 )*nps
428 lwopt = max( lwopt, 5*n+nsytrd_lwopt )
429*
430 size1 = indrw - indwork
431
432***********************************************************************
433*
434* COMPUTE INTEGER WORKSPACE
435*
436***********************************************************************
437 nnp = max( n, nprocs+1, 4 )
438 IF ( wantz ) THEN
439 liwmin = 12*nnp + 2*n
440 ELSE
441 liwmin = 10*nnp + 2*n
442 END IF
443
444***********************************************************************
445*
446* Set up pointers into the IWORK array
447*
448***********************************************************************
449* Pointer to eigenpair distribution over processors
450 indilu = liwmin - 2*nprocs + 1
451 size2 = indilu - 2*n
452
453
454***********************************************************************
455*
456* Test the input arguments.
457*
458***********************************************************************
459 IF( info.EQ.0 ) THEN
460 CALL chk1mat( n, 4, n, 4, ia, ja, desca, 8, info )
461 IF( wantz )
462 $ CALL chk1mat( n, 4, n, 4, iz, jz, descz, 21, info )
463*
464 IF( info.EQ.0 ) THEN
465 IF( .NOT.( wantz .OR. lsame( jobz, 'N' ) ) ) THEN
466 info = -1
467 ELSE IF( .NOT.( alleig .OR. valeig .OR. indeig ) ) THEN
468 info = -2
469 ELSE IF( .NOT.( lower .OR. lsame( uplo, 'U' ) ) ) THEN
470 info = -3
471 ELSE IF( mod( ia-1, desca( mb_ ) ).NE.0 ) THEN
472 info = -6
473 ELSE IF( valeig .AND. n.GT.0 .AND. vu.LE.vl ) THEN
474 info = -10
475 ELSE IF( indeig .AND. ( il.LT.1 .OR. il.GT.max( 1, n ) ) )
476 $ THEN
477 info = -11
478 ELSE IF( indeig .AND. ( iu.LT.min( n, il ) .OR. iu.GT.n ) )
479 $ THEN
480 info = -12
481 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
482 info = -21
483 ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
484 info = -23
485 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
486 info = -( 800+nb_ )
487 END IF
488 IF( wantz ) THEN
489 iarow = indxg2p( 1, desca( nb_ ), myrow,
490 $ desca( rsrc_ ), nprow )
491 izrow = indxg2p( 1, desca( nb_ ), myrow,
492 $ descz( rsrc_ ), nprow )
493 IF( iarow.NE.izrow ) THEN
494 info = -19
495 ELSE IF( mod( ia-1, desca( mb_ ) ).NE.
496 $ mod( iz-1, descz( mb_ ) ) ) THEN
497 info = -19
498 ELSE IF( desca( m_ ).NE.descz( m_ ) ) THEN
499 info = -( 2100+m_ )
500 ELSE IF( desca( n_ ).NE.descz( n_ ) ) THEN
501 info = -( 2100+n_ )
502 ELSE IF( desca( mb_ ).NE.descz( mb_ ) ) THEN
503 info = -( 2100+mb_ )
504 ELSE IF( desca( nb_ ).NE.descz( nb_ ) ) THEN
505 info = -( 2100+nb_ )
506 ELSE IF( desca( rsrc_ ).NE.descz( rsrc_ ) ) THEN
507 info = -( 2100+rsrc_ )
508 ELSE IF( desca( csrc_ ).NE.descz( csrc_ ) ) THEN
509 info = -( 2100+csrc_ )
510 ELSE IF( ictxt.NE.descz( ctxt_ ) ) THEN
511 info = -( 2100+ctxt_ )
512 END IF
513 END IF
514 END IF
515 idum2( 1 ) = 1
516 IF( lower ) THEN
517 idum1( 2 ) = ichar( 'L' )
518 ELSE
519 idum1( 2 ) = ichar( 'U' )
520 END IF
521 idum2( 2 ) = 2
522 IF( alleig ) THEN
523 idum1( 3 ) = ichar( 'A' )
524 ELSE IF( indeig ) THEN
525 idum1( 3 ) = ichar( 'I' )
526 ELSE
527 idum1( 3 ) = ichar( 'V' )
528 END IF
529 idum2( 3 ) = 3
530 IF( lquery ) THEN
531 idum1( 4 ) = -1
532 ELSE
533 idum1( 4 ) = 1
534 END IF
535 idum2( 4 ) = 4
536 IF( wantz ) THEN
537 idum1( 1 ) = ichar( 'V' )
538 CALL pchk2mat( n, 4, n, 4, ia, ja, desca, 8, n, 4, n, 4, iz,
539 $ jz, descz, 21, 4, idum1, idum2, info )
540 ELSE
541 idum1( 1 ) = ichar( 'N' )
542 CALL pchk1mat( n, 4, n, 4, ia, ja, desca, 8, 4, idum1,
543 $ idum2, info )
544 END IF
545 work( 1 ) = real( lwopt )
546 iwork( 1 ) = liwmin
547 END IF
548*
549 IF( info.NE.0 ) THEN
550 CALL pxerbla( ictxt, 'PSSYEVR', -info )
551 RETURN
552 ELSE IF( lquery ) THEN
553 RETURN
554 END IF
555
556***********************************************************************
557*
558* Quick return if possible
559*
560***********************************************************************
561 IF( n.EQ.0 ) THEN
562 IF( wantz ) THEN
563 nz = 0
564 END IF
565 m = 0
566 work( 1 ) = real( lwopt )
567 iwork( 1 ) = liwmin
568 RETURN
569 END IF
570
571 IF( valeig ) THEN
572 vll = vl
573 vuu = vu
574 ELSE
575 vll = zero
576 vuu = zero
577 END IF
578*
579* No scaling done here, leave this to MRRR kernel.
580* Scale tridiagonal rather than full matrix.
581*
582***********************************************************************
583*
584* REDUCE SYMMETRIC MATRIX TO TRIDIAGONAL FORM.
585*
586***********************************************************************
587
588
589 CALL pssyntrd( uplo, n, a, ia, ja, desca, work( indd ),
590 $ work( inde ), work( indtau ), work( indwork ),
591 $ llwork, iinfo )
592
593
594 IF (iinfo .NE. 0) THEN
595 CALL pxerbla( ictxt, 'PSSYNTRD', -iinfo )
596 RETURN
597 END IF
598
599***********************************************************************
600*
601* DISTRIBUTE TRIDIAGONAL TO ALL PROCESSORS
602*
603***********************************************************************
604 offset = 0
605 IF( ia.EQ.1 .AND. ja.EQ.1 .AND.
606 $ desca( rsrc_ ).EQ.0 .AND. desca( csrc_ ).EQ.0 )
607 $ THEN
608 CALL pslared1d( n, ia, ja, desca, work( indd ), work( indd2 ),
609 $ work( indwork ), llwork )
610*
611 CALL pslared1d( n, ia, ja, desca, work( inde ), work( inde2 ),
612 $ work( indwork ), llwork )
613 IF( .NOT.lower )
614 $ offset = 1
615 ELSE
616 DO 10 i = 1, n
617 CALL pselget( 'A', ' ', work( indd2+i-1 ), a, i+ia-1,
618 $ i+ja-1, desca )
619 10 CONTINUE
620 IF( lsame( uplo, 'U' ) ) THEN
621 DO 20 i = 1, n - 1
622 CALL pselget( 'A', ' ', work( inde2+i-1 ), a, i+ia-1,
623 $ i+ja, desca )
624 20 CONTINUE
625 ELSE
626 DO 30 i = 1, n - 1
627 CALL pselget( 'A', ' ', work( inde2+i-1 ), a, i+ia,
628 $ i+ja-1, desca )
629 30 CONTINUE
630 END IF
631 END IF
632
633
634
635
636***********************************************************************
637*
638* SET IIL, IIU
639*
640***********************************************************************
641 IF ( alleig ) THEN
642 iil = 1
643 iiu = n
644 ELSE IF ( indeig ) THEN
645 iil = il
646 iiu = iu
647 ELSE IF ( valeig ) THEN
648 CALL slarrc('T', n, vll, vuu, work( indd2 ),
649 $ work( inde2 + offset ), safmin, eigcnt, iil, iiu, info)
650* Refine upper bound N that was taken
651 mz = eigcnt
652 iil = iil + 1
653 ENDIF
654
655 IF(mz.EQ.0) THEN
656 m = 0
657 IF( wantz ) THEN
658 nz = 0
659 END IF
660 work( 1 ) = real( lwopt )
661 iwork( 1 ) = liwmin
662 RETURN
663 END IF
664
665 myil = 0
666 myiu = 0
667 m = 0
668 im = 0
669
670***********************************************************************
671*
672* COMPUTE WORK ASSIGNMENTS
673*
674***********************************************************************
675*
676* Each processor computes the work assignments for all processors
677*
678 CALL pmpim2( iil, iiu, nprocs,
679 $ iwork(indilu), iwork(indilu+nprocs) )
680*
681* Find local work assignment
682*
683 myil = iwork(indilu+myproc)
684 myiu = iwork(indilu+nprocs+myproc)
685
686
687 zoffset = max(0, myil - iil - 1)
688 first = ( myil .EQ. iil )
689
690
691***********************************************************************
692*
693* CALLS TO MRRR KERNEL
694*
695***********************************************************************
696 IF(.NOT.wantz) THEN
697*
698* Compute eigenvalues only.
699*
700 iinfo = 0
701 IF ( myil.GT.0 ) THEN
702 dol = 1
703 dou = myiu - myil + 1
704 CALL sstegr2( jobz, 'I', n, work( indd2 ),
705 $ work( inde2+offset ), vll, vuu, myil, myiu,
706 $ im, w( 1 ), work( indrw ), n,
707 $ myiu - myil + 1,
708 $ iwork( 1 ), work( indwork ), size1,
709 $ iwork( 2*n+1 ), size2,
710 $ dol, dou, zoffset, iinfo )
711* SSTEGR2 zeroes out the entire W array, so we can't just give
712* it the part of W we need. So here we copy the W entries into
713* their correct location
714 DO 49 i = 1, im
715 w( myil-iil+i ) = w( i )
716 49 CONTINUE
717* W( MYIL ) is at W( MYIL - IIL + 1 )
718* W( X ) is at W(X - IIL + 1 )
719 END IF
720 IF (iinfo .NE. 0) THEN
721 CALL pxerbla( ictxt, 'SSTEGR2', -iinfo )
722 RETURN
723 END IF
724 ELSEIF ( wantz .AND. nprocs.EQ.1 ) THEN
725*
726* Compute eigenvalues and -vectors, but only on one processor
727*
728 iinfo = 0
729 IF ( myil.GT.0 ) THEN
730 dol = myil - iil + 1
731 dou = myiu - iil + 1
732 CALL sstegr2( jobz, 'I', n, work( indd2 ),
733 $ work( inde2+offset ), vll, vuu, iil, iiu,
734 $ im, w( 1 ), work( indrw ), n,
735 $ n,
736 $ iwork( 1 ), work( indwork ), size1,
737 $ iwork( 2*n+1 ), size2, dol, dou,
738 $ zoffset, iinfo )
739 ENDIF
740 IF (iinfo .NE. 0) THEN
741 CALL pxerbla( ictxt, 'SSTEGR2', -iinfo )
742 RETURN
743 END IF
744 ELSEIF ( wantz ) THEN
745*
746* Compute representations in parallel.
747* Share eigenvalue computation for root between all processors
748* Then compute the eigenvectors.
749*
750 iinfo = 0
751* Part 1. compute root representations and root eigenvalues
752 IF ( myil.GT.0 ) THEN
753 dol = myil - iil + 1
754 dou = myiu - iil + 1
755 CALL sstegr2a( jobz, 'I', n, work( indd2 ),
756 $ work( inde2+offset ), vll, vuu, iil, iiu,
757 $ im, w( 1 ), work( indrw ), n,
758 $ n, work( indwork ), size1,
759 $ iwork( 2*n+1 ), size2, dol,
760 $ dou, needil, neediu,
761 $ inderr, nsplit, pivmin, scale, wl, wu,
762 $ iinfo )
763 ENDIF
764 IF (iinfo .NE. 0) THEN
765 CALL pxerbla( ictxt, 'SSTEGR2A', -iinfo )
766 RETURN
767 END IF
768*
769* The second part of parallel MRRR, the representation tree
770* construction begins. Upon successful completion, the
771* eigenvectors have been computed. This is indicated by
772* the flag FINISH.
773*
774 vstart = .true.
775 finish = (myil.LE.0)
776C Part 2. Share eigenvalues and uncertainties between all processors
777 iinderr = indwork + inderr - 1
778
779*
780*
781* There are currently two ways to communicate eigenvalue information
782* using the BLACS.
783* 1.) BROADCAST
784* 2.) POINT2POINT between collaborators (those processors working
785* jointly on a cluster.
786* For efficiency, BROADCAST has been disabled.
787* At a later stage, other more efficient communication algorithms
788* might be implemented, e. g. group or tree-based communication.
789*
790 dobcst = .false.
791 IF(dobcst) THEN
792* First gather everything on the first processor.
793* Then use BROADCAST-based communication
794 DO 45 i = 2, nprocs
795 IF (myproc .EQ. (i - 1)) THEN
796 dstrow = 0
797 dstcol = 0
798 starti = dol
799 iwork(1) = starti
800 IF(myil.GT.0) THEN
801 lengthi = myiu - myil + 1
802 ELSE
803 lengthi = 0
804 ENDIF
805 iwork(2) = lengthi
806 CALL igesd2d( ictxt, 2, 1, iwork, 2,
807 $ dstrow, dstcol )
808 IF (( starti.GE.1 ) .AND. ( lengthi.GE.1 )) THEN
809 lengthi2 = 2*lengthi
810* Copy eigenvalues into communication buffer
811 CALL scopy(lengthi,w( starti ),1,
812 $ work( indd ), 1)
813* Copy uncertainties into communication buffer
814 CALL scopy(lengthi,work( iinderr+starti-1 ),1,
815 $ work( indd+lengthi ), 1)
816* send buffer
817 CALL sgesd2d( ictxt, lengthi2,
818 $ 1, work( indd ), lengthi2,
819 $ dstrow, dstcol )
820 END IF
821 ELSE IF (myproc .EQ. 0) THEN
822 srcrow = (i-1) / npcol
823 srccol = mod(i-1, npcol)
824 CALL igerv2d( ictxt, 2, 1, iwork, 2,
825 $ srcrow, srccol )
826 starti = iwork(1)
827 lengthi = iwork(2)
828 IF (( starti.GE.1 ) .AND. ( lengthi.GE.1 )) THEN
829 lengthi2 = 2*lengthi
830* receive buffer
831 CALL sgerv2d( ictxt, lengthi2, 1,
832 $ work(indd), lengthi2, srcrow, srccol )
833* copy eigenvalues from communication buffer
834 CALL scopy( lengthi, work(indd), 1,
835 $ w( starti ), 1)
836* copy uncertainties (errors) from communication buffer
837 CALL scopy(lengthi,work(indd+lengthi),1,
838 $ work( iinderr+starti-1 ), 1)
839 END IF
840 END IF
841 45 CONTINUE
842 lengthi = iiu - iil + 1
843 lengthi2 = lengthi * 2
844 IF (myproc .EQ. 0) THEN
845* Broadcast eigenvalues and errors to all processors
846 CALL scopy(lengthi,w ,1, work( indd ), 1)
847 CALL scopy(lengthi,work( iinderr ),1,
848 $ work( indd+lengthi ), 1)
849 CALL sgebs2d( ictxt, 'A', ' ', lengthi2, 1,
850 $ work(indd), lengthi2 )
851 ELSE
852 srcrow = 0
853 srccol = 0
854 CALL sgebr2d( ictxt, 'A', ' ', lengthi2, 1,
855 $ work(indd), lengthi2, srcrow, srccol )
856 CALL scopy( lengthi, work(indd), 1, w, 1)
857 CALL scopy(lengthi,work(indd+lengthi),1,
858 $ work( iinderr ), 1)
859 END IF
860 ELSE
861*
862* Enable point2point communication between collaborators
863*
864* Find collaborators of MYPROC
865 IF( (nprocs.GT.1).AND.(myil.GT.0) ) THEN
866 CALL pmpcol( myproc, nprocs, iil, needil, neediu,
867 $ iwork(indilu), iwork(indilu+nprocs),
868 $ colbrt, frstcl, lastcl )
869 ELSE
870 colbrt = .false.
871 ENDIF
872
873 IF(colbrt) THEN
874* If the processor collaborates with others,
875* communicate information.
876 DO 47 iproc = frstcl, lastcl
877 IF (myproc .EQ. iproc) THEN
878 starti = dol
879 iwork(1) = starti
880 lengthi = myiu - myil + 1
881 iwork(2) = lengthi
882
883 IF ((starti.GE.1) .AND. (lengthi.GE.1)) THEN
884* Copy eigenvalues into communication buffer
885 CALL scopy(lengthi,w( starti ),1,
886 $ work(indd), 1)
887* Copy uncertainties into communication buffer
888 CALL scopy(lengthi,
889 $ work( iinderr+starti-1 ),1,
890 $ work(indd+lengthi), 1)
891 ENDIF
892
893 DO 46 i = frstcl, lastcl
894 IF(i.EQ.myproc) GOTO 46
895 dstrow = i/ npcol
896 dstcol = mod(i, npcol)
897 CALL igesd2d( ictxt, 2, 1, iwork, 2,
898 $ dstrow, dstcol )
899 IF ((starti.GE.1) .AND. (lengthi.GE.1)) THEN
900 lengthi2 = 2*lengthi
901* send buffer
902 CALL sgesd2d( ictxt, lengthi2,
903 $ 1, work(indd), lengthi2,
904 $ dstrow, dstcol )
905 END IF
906 46 CONTINUE
907 ELSE
908 srcrow = iproc / npcol
909 srccol = mod(iproc, npcol)
910 CALL igerv2d( ictxt, 2, 1, iwork, 2,
911 $ srcrow, srccol )
912 rstarti = iwork(1)
913 rlengthi = iwork(2)
914 IF ((rstarti.GE.1 ) .AND. (rlengthi.GE.1 )) THEN
915 rlengthi2 = 2*rlengthi
916 CALL sgerv2d( ictxt, rlengthi2, 1,
917 $ work(inde), rlengthi2,
918 $ srcrow, srccol )
919* copy eigenvalues from communication buffer
920 CALL scopy( rlengthi, work(inde), 1,
921 $ w( rstarti ), 1)
922* copy uncertainties (errors) from communication buffer
923 CALL scopy(rlengthi,work(inde+rlengthi),1,
924 $ work( iinderr+rstarti-1 ), 1)
925 END IF
926 END IF
927 47 CONTINUE
928 ENDIF
929 ENDIF
930
931*
932* Part 3. Compute representation tree and eigenvectors.
933* What follows is a loop in which the tree
934* is constructed in parallel from top to bottom,
935* on level at a time, until all eigenvectors
936* have been computed.
937*
938 100 CONTINUE
939 IF ( myil.GT.0 ) THEN
940 CALL sstegr2b( jobz, n, work( indd2 ),
941 $ work( inde2+offset ),
942 $ im, w( 1 ), work( indrw ), n, n,
943 $ iwork( 1 ), work( indwork ), size1,
944 $ iwork( 2*n+1 ), size2, dol,
945 $ dou, needil, neediu, indwlc,
946 $ pivmin, scale, wl, wu,
947 $ vstart, finish,
948 $ maxcls, ndepth, parity, zoffset, iinfo )
949 iindwlc = indwork + indwlc - 1
950 IF(.NOT.finish) THEN
951 IF((needil.LT.dol).OR.(neediu.GT.dou)) THEN
952 CALL pmpcol( myproc, nprocs, iil, needil, neediu,
953 $ iwork(indilu), iwork(indilu+nprocs),
954 $ colbrt, frstcl, lastcl )
955 ELSE
956 colbrt = .false.
957 frstcl = myproc
958 lastcl = myproc
959 ENDIF
960*
961* Check if this processor collaborates, i.e.
962* communication is needed.
963*
964 IF(colbrt) THEN
965 DO 147 iproc = frstcl, lastcl
966 IF (myproc .EQ. iproc) THEN
967 starti = dol
968 iwork(1) = starti
969 IF(myil.GT.0) THEN
970 lengthi = myiu - myil + 1
971 ELSE
972 lengthi = 0
973 ENDIF
974 iwork(2) = lengthi
975 IF ((starti.GE.1).AND.(lengthi.GE.1)) THEN
976* Copy eigenvalues into communication buffer
977 CALL scopy(lengthi,
978 $ work( iindwlc+starti-1 ),1,
979 $ work(indd), 1)
980* Copy uncertainties into communication buffer
981 CALL scopy(lengthi,
982 $ work( iinderr+starti-1 ),1,
983 $ work(indd+lengthi), 1)
984 ENDIF
985
986 DO 146 i = frstcl, lastcl
987 IF(i.EQ.myproc) GOTO 146
988 dstrow = i/ npcol
989 dstcol = mod(i, npcol)
990 CALL igesd2d( ictxt, 2, 1, iwork, 2,
991 $ dstrow, dstcol )
992 IF ((starti.GE.1).AND.(lengthi.GE.1)) THEN
993 lengthi2 = 2*lengthi
994* send buffer
995 CALL sgesd2d( ictxt, lengthi2,
996 $ 1, work(indd), lengthi2,
997 $ dstrow, dstcol )
998 END IF
999 146 CONTINUE
1000 ELSE
1001 srcrow = iproc / npcol
1002 srccol = mod(iproc, npcol)
1003 CALL igerv2d( ictxt, 2, 1, iwork, 2,
1004 $ srcrow, srccol )
1005 rstarti = iwork(1)
1006 rlengthi = iwork(2)
1007 IF ((rstarti.GE.1).AND.(rlengthi.GE.1)) THEN
1008 rlengthi2 = 2*rlengthi
1009 CALL sgerv2d( ictxt,rlengthi2, 1,
1010 $ work(inde),rlengthi2,
1011 $ srcrow, srccol )
1012* copy eigenvalues from communication buffer
1013 CALL scopy(rlengthi, work(inde), 1,
1014 $ work( iindwlc+rstarti-1 ), 1)
1015* copy uncertainties (errors) from communication buffer
1016 CALL scopy(rlengthi,work(inde+rlengthi),1,
1017 $ work( iinderr+rstarti-1 ), 1)
1018 END IF
1019 END IF
1020 147 CONTINUE
1021 ENDIF
1022 GOTO 100
1023 ENDIF
1024 ENDIF
1025 IF (iinfo .NE. 0) THEN
1026 CALL pxerbla( ictxt, 'SSTEGR2B', -iinfo )
1027 RETURN
1028 END IF
1029*
1030 ENDIF
1031
1032*
1033***********************************************************************
1034*
1035* MAIN PART ENDS HERE
1036*
1037***********************************************************************
1038*
1039***********************************************************************
1040*
1041* ALLGATHER: EACH PROCESSOR SENDS ITS EIGENVALUES TO THE FIRST ONE,
1042* THEN THE FIRST PROCESSOR BROADCASTS ALL EIGENVALUES
1043*
1044***********************************************************************
1045*
1046 DO 50 i = 2, nprocs
1047 IF (myproc .EQ. (i - 1)) THEN
1048 dstrow = 0
1049 dstcol = 0
1050 starti = myil - iil + 1
1051 iwork(1) = starti
1052 IF(myil.GT.0) THEN
1053 lengthi = myiu - myil + 1
1054 ELSE
1055 lengthi = 0
1056 ENDIF
1057 iwork(2) = lengthi
1058 CALL igesd2d( ictxt, 2, 1, iwork, 2,
1059 $ dstrow, dstcol )
1060 IF ((starti.GE.1).AND.(lengthi.GE.1)) THEN
1061 CALL sgesd2d( ictxt, lengthi,
1062 $ 1, w( starti ), lengthi,
1063 $ dstrow, dstcol )
1064 ENDIF
1065 ELSE IF (myproc .EQ. 0) THEN
1066 srcrow = (i-1) / npcol
1067 srccol = mod(i-1, npcol)
1068 CALL igerv2d( ictxt, 2, 1, iwork, 2,
1069 $ srcrow, srccol )
1070 starti = iwork(1)
1071 lengthi = iwork(2)
1072 IF ((starti.GE.1).AND.(lengthi.GE.1)) THEN
1073 CALL sgerv2d( ictxt, lengthi, 1,
1074 $ w( starti ), lengthi, srcrow, srccol )
1075 ENDIF
1076 ENDIF
1077 50 CONTINUE
1078
1079* Accumulate M from all processors
1080 m = im
1081 CALL igsum2d( ictxt, 'A', ' ', 1, 1, m, 1, -1, -1 )
1082
1083* Broadcast eigenvalues to all processors
1084 IF (myproc .EQ. 0) THEN
1085* Send eigenvalues
1086 CALL sgebs2d( ictxt, 'A', ' ', m, 1, w, m )
1087 ELSE
1088 srcrow = 0
1089 srccol = 0
1090 CALL sgebr2d( ictxt, 'A', ' ', m, 1,
1091 $ w, m, srcrow, srccol )
1092 END IF
1093*
1094* Sort the eigenvalues and keep permutation in IWORK to
1095* sort the eigenvectors accordingly
1096*
1097 DO 160 i = 1, m
1098 iwork( nprocs+1+i ) = i
1099 160 CONTINUE
1100 CALL slasrt2( 'I', m, w, iwork( nprocs+2 ), iinfo )
1101 IF (iinfo.NE.0) THEN
1102 CALL pxerbla( ictxt, 'SLASRT2', -iinfo )
1103 RETURN
1104 END IF
1105
1106***********************************************************************
1107*
1108* TRANSFORM Z FROM 1D WORKSPACE INTO 2D BLOCKCYCLIC STORAGE
1109*
1110***********************************************************************
1111 IF ( wantz ) THEN
1112 DO 170 i = 1, m
1113 iwork( m+nprocs+1+iwork( nprocs+1+i ) ) = i
1114 170 CONTINUE
1115* Store NVS in IWORK(1:NPROCS+1) for PSLAEVSWP
1116 iwork( 1 ) = 0
1117 DO 180 i = 1, nprocs
1118* Find IL and IU for processor i-1
1119* Has already been computed by PMPIM2 and stored
1120 ipil = iwork(indilu+i-1)
1121 ipiu = iwork(indilu+nprocs+i-1)
1122 IF (ipil .EQ. 0) THEN
1123 iwork( i + 1 ) = iwork( i )
1124 ELSE
1125 iwork( i + 1 ) = iwork( i ) + ipiu - ipil + 1
1126 ENDIF
1127 180 CONTINUE
1128
1129 IF ( first ) THEN
1130 CALL pslaevswp(n, work( indrw ), n, z, iz, jz,
1131 $ descz, iwork( 1 ), iwork( nprocs+m+2 ), work( indwork ),
1132 $ indrw - indwork )
1133 ELSE
1134 CALL pslaevswp(n, work( indrw + n ), n, z, iz, jz,
1135 $ descz, iwork( 1 ), iwork( nprocs+m+2 ), work( indwork ),
1136 $ indrw - indwork )
1137 END IF
1138*
1139 nz = m
1140*
1141
1142***********************************************************************
1143*
1144* Compute eigenvectors of A from eigenvectors of T
1145*
1146***********************************************************************
1147 IF( nz.GT.0 ) THEN
1148 CALL psormtr( 'L', uplo, 'N', n, nz, a, ia, ja, desca,
1149 $ work( indtau ), z, iz, jz, descz,
1150 $ work( indwork ), size1, iinfo )
1151 END IF
1152 IF (iinfo.NE.0) THEN
1153 CALL pxerbla( ictxt, 'PSORMTR', -iinfo )
1154 RETURN
1155 END IF
1156*
1157
1158 END IF
1159*
1160 work( 1 ) = real( lwopt )
1161 iwork( 1 ) = liwmin
1162
1163 RETURN
1164*
1165* End of PSSYEVR
1166*
1167 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
subroutine pmpcol(myproc, nprocs, iil, needil, neediu, pmyils, pmyius, colbrt, frstcl, lastcl)
Definition pmpcol.f:9
subroutine pmpim2(il, iu, nprocs, pmyils, pmyius)
Definition pmpim2.f:7
subroutine pselget(scope, top, alpha, a, ia, ja, desca)
Definition pselget.f:2
subroutine pslaevswp(n, zin, ldzi, z, iz, jz, descz, nvs, key, work, lwork)
Definition pslaevswp.f:5
subroutine pslared1d(n, ia, ja, desc, bycol, byall, work, lwork)
Definition pslared1d.f:2
subroutine psormtr(side, uplo, trans, m, n, a, ia, ja, desca, tau, c, ic, jc, descc, work, lwork, info)
Definition psormtr.f:3
subroutine pssyevr(jobz, range, uplo, n, a, ia, ja, desca, vl, vu, il, iu, m, nz, w, z, iz, jz, descz, work, lwork, iwork, liwork, info)
Definition pssyevr.f:5
subroutine pssyntrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pssyntrd.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine slasrt2(id, n, d, key, info)
Definition slasrt2.f:4
subroutine sstegr2(jobz, range, n, d, e, vl, vu, il, iu, m, w, z, ldz, nzc, isuppz, work, lwork, iwork, liwork, dol, dou, zoffset, info)
Definition sstegr2.f:4
subroutine sstegr2a(jobz, range, n, d, e, vl, vu, il, iu, m, w, z, ldz, nzc, work, lwork, iwork, liwork, dol, dou, needil, neediu, inderr, nsplit, pivmin, scale, wl, wu, info)
Definition sstegr2a.f:6
subroutine sstegr2b(jobz, n, d, e, m, w, z, ldz, nzc, isuppz, work, lwork, iwork, liwork, dol, dou, needil, neediu, indwlc, pivmin, scale, wl, wu, vstart, finish, maxcls, ndepth, parity, zoffset, info)
Definition sstegr2b.f:7