SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pdsyttrd.f
Go to the documentation of this file.
1 SUBROUTINE pdsyttrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
2 $ LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER UPLO
10 INTEGER IA, INFO, JA, LWORK, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 DOUBLE PRECISION A( * ), D( * ), E( * ), TAU( * ), WORK( * )
15* ..
16*
17* Purpose
18*
19* =======
20*
21* PDSYTTRD reduces a complex Hermitian matrix sub( A ) to Hermitian
22* tridiagonal form T by an unitary similarity transformation:
23* Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
24*
25* Notes
26* =====
27*
28* Each global data object is described by an associated description
29* vector. This vector stores the information required to establish
30* the mapping between an object element and its corresponding
31* process and memory location.
32*
33* Let A be a generic term for any 2D block cyclicly distributed
34* array.
35* Such a global array has an associated description vector DESCA.
36* In the following comments, the character _ should be read as
37* "of the global array".
38*
39* NOTATION STORED IN EXPLANATION
40* --------------- -------------- -----------------------------------
41* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42* DTYPE_A = 1.
43* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle,
44* indicating the BLACS process grid A is distribu-
45* ted over. The context itself is glo-
46* bal, but the handle (the integer
47* value) may vary.
48* M_A (global) DESCA( M_ ) The number of rows in the global
49* array A.
50* N_A (global) DESCA( N_ ) The number of columns in the global
51* array A.
52* MB_A (global) DESCA( MB_ ) The blocking factor used to
53* distribute the rows of the array.
54* NB_A (global) DESCA( NB_ ) The blocking factor used to
55* distribute the columns of the array.
56* RSRC_A (global) DESCA( RSRC_ ) The process row over which the
57* first row of the array A is distributed.
58* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59* first column of the array A is
60* distributed.
61* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62* array. LLD_A >= MAX(1,LOCp(M_A)).
63*
64* Let K be the number of rows or columns of a distributed matrix,
65* and assume that its process grid has dimension p x q.
66* LOCp( K ) denotes the number of elements of K that a process
67* would receive if K were distributed over the p processes of its
68* process column.
69* Similarly, LOCq( K ) denotes the number of elements of K that a
70* process would receive if K were distributed over the q processes
71* of its process row.
72* The values of LOCp() and LOCq() may be determined via a call to
73* the ScaLAPACK tool function, NUMROC:
74* LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75* LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76* An upper bound for these quantities may be computed by:
77* LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78* LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79*
80* Arguments
81* =========
82*
83* UPLO (global input) CHARACTER
84* Specifies whether the upper or lower triangular part of the
85* Hermitian matrix sub( A ) is stored:
86* = 'U': Upper triangular
87* = 'L': Lower triangular
88*
89* N (global input) INTEGER
90* The number of rows and columns to be operated on, i.e. the
91* order of the distributed submatrix sub( A ). N >= 0.
92*
93* A (local input/local output) DOUBLE PRECISION pointer into the
94* local memory to an array of dimension (LLD_A,LOCq(JA+N-1)).
95* On entry, this array contains the local pieces of the
96* Hermitian distributed matrix sub( A ). If UPLO = 'U', the
97* leading N-by-N upper triangular part of sub( A ) contains
98* the upper triangular part of the matrix, and its strictly
99* lower triangular part is not referenced. If UPLO = 'L', the
100* leading N-by-N lower triangular part of sub( A ) contains the
101* lower triangular part of the matrix, and its strictly upper
102* triangular part is not referenced. On exit, if UPLO = 'U',
103* the diagonal and first superdiagonal of sub( A ) are over-
104* written by the corresponding elements of the tridiagonal
105* matrix T, and the elements above the first superdiagonal,
106* with the array TAU, represent the unitary matrix Q as a
107* product of elementary reflectors; if UPLO = 'L', the diagonal
108* and first subdiagonal of sub( A ) are overwritten by the
109* corresponding elements of the tridiagonal matrix T, and the
110* elements below the first subdiagonal, with the array TAU,
111* represent the unitary matrix Q as a product of elementary
112* reflectors. See Further Details.
113*
114* IA (global input) INTEGER
115* The row index in the global array A indicating the first
116* row of sub( A ).
117*
118* JA (global input) INTEGER
119* The column index in the global array A indicating the
120* first column of sub( A ).
121*
122* DESCA (global and local input) INTEGER array of dimension DLEN_.
123* The array descriptor for the distributed matrix A.
124*
125* D (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1)
126* The diagonal elements of the tridiagonal matrix T:
127* D(i) = A(i,i). D is tied to the distributed matrix A.
128*
129* E (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1)
130* if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal
131* elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
132* UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
133* distributed matrix A.
134*
135* TAU (local output) DOUBLE PRECISION array, dimension
136* LOCq(JA+N-1). This array contains the scalar factors TAU of
137* the elementary reflectors. TAU is tied to the distributed
138* matrix A.
139*
140* WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
141* On exit, WORK( 1 ) returns the minimal and optimal workspace
142*
143* LWORK (local input) INTEGER
144* The dimension of the array WORK.
145* LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS
146* Where:
147* NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
148* ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PDSYTTRD', 'L', 0, 0,
149* 0, 0 )
150*
151* NUMROC is a ScaLAPACK tool function;
152* PJLAENV is a ScaLAPACK envionmental inquiry function
153* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
154* the subroutine BLACS_GRIDINFO.
155*
156* INFO (global output) INTEGER
157* = 0: successful exit
158* < 0: If the i-th argument is an array and the j-entry had
159* an illegal value, then INFO = -(i*100+j), if the i-th
160* argument is a scalar and had an illegal value, then
161* INFO = -i.
162*
163* Further Details
164* ===============
165*
166* If UPLO = 'U', the matrix Q is represented as a product of
167* elementary reflectors
168*
169* Q = H(n-1) . . . H(2) H(1).
170*
171* Each H(i) has the form
172*
173* H(i) = I - tau * v * v'
174*
175* where tau is a complex scalar, and v is a complex vector with
176* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
177* A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
178*
179* If UPLO = 'L', the matrix Q is represented as a product of
180* elementary reflectors
181*
182* Q = H(1) H(2) . . . H(n-1).
183*
184* Each H(i) has the form
185*
186* H(i) = I - tau * v * v'
187*
188* where tau is a complex scalar, and v is a complex vector with
189* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
190* A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
191*
192* The contents of sub( A ) on exit are illustrated by the following
193* examples with n = 5:
194*
195* if UPLO = 'U': if UPLO = 'L':
196*
197* ( d e v2 v3 v4 ) ( d )
198* ( d e v3 v4 ) ( e d )
199* ( d e v4 ) ( v1 e d )
200* ( d e ) ( v1 v2 e d )
201* ( d ) ( v1 v2 v3 e d )
202*
203* where d and e denote diagonal and off-diagonal elements of T, and
204* vi denotes an element of the vector defining H(i).
205*
206* Data storage requirements
207* =========================
208*
209* PDSYTTRD is not intended to be called directly. All users are
210* encourage to call PDSYTRD which will then call PDHETTRD if
211* appropriate. A must be in cyclic format (i.e. MB = NB = 1),
212* the process grid must be square ( i.e. NPROW = NPCOL ) and
213* only lower triangular storage is supported.
214*
215* Local variables
216* ===============
217*
218* PDSYTTRD uses five local arrays:
219* WORK ( InV ) dimension ( NP, ANB+1): array V
220* WORK ( InH ) dimension ( NP, ANB+1): array H
221* WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V
222* WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H
223* WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT
224*
225* Arrays V and H are replicated across all processor columns.
226* Arrays V^T and H^T are replicated across all processor rows.
227*
228* WORK ( InVT ), or V^T, is stored as a tall skinny
229* array ( NQ x ANB-1 ) for efficiency. Since only the lower
230* triangular portion of A is updated, Av is computed as:
231* tril(A) * v + v^T * tril(A,-1). This is performed as
232* two local triangular matrix-vector multiplications (both in
233* MVR2) followed by a transpose and a sum across the columns.
234* In the local computation, WORK( InVT ) is used to compute
235* tril(A) * v and WORK( InV ) is used to compute
236* v^T * tril(A,-1)
237*
238* The following variables are global indices into A:
239* INDEX: The current global row and column number.
240* MAXINDEX: The global row and column for the first row and
241* column in the trailing block of A.
242* LIIB, LIJB: The first row, column in
243*
244* The following variables point into the arrays A, V, H, V^T, H^T:
245* BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T.
246* LII: local index I: The local row number for row INDEX
247* LIJ: local index J: The local column number for column INDEX
248* LIIP1: local index I+1: The local row number for row INDEX+1
249* LIJP1: local index J+1: The local col number for col INDEX+1
250* LTLI: lower triangular local index I: The local row for the
251* upper left entry in tril( A(INDEX, INDEX) )
252* LTLIP1: lower triangular local index I+1: The local row for the
253* upper left entry in tril( A(INDEX+1, INDEX+1) )
254*
255* Details: The distinction between LII and LTLI (and between
256* LIIP1 and LTLIP1) is subtle. Within the current processor
257* column (i.e. MYCOL .eq. CURCOL) they are the same. However,
258* on some processors, A( LII, LIJ ) points to an element
259* above the diagonal, on these processors, LTLI = LII+1.
260*
261* The following variables give the number of rows and/or columns
262* in various matrices:
263* NP: The number of local rows in A( 1:N, 1:N )
264* NQ: The number of local columns in A( 1:N, 1:N )
265* NPM0: The number of local rows in A( INDEX:N, INDEX:N )
266* NQM0: The number of local columns in A( INDEX:N, INDEX:N )
267* NPM1: The number of local rows in A( INDEX+1:N, INDEX:N )
268* NQM1: The number of local columns in A( INDEX+1:N, INDEX:N )
269* LTNM0: The number of local rows & columns in
270* tril( A( INDEX:N, INDEX:N ) )
271* LTNM1: The number of local rows & columns in
272* tril( A( INDEX+1:N, INDEX+1:N ) )
273* NOTE: LTNM0 == LTNM1 on all processors except the diagonal
274* processors, i.e. those where MYCOL == MYROW.
275*
276* Invariants:
277* NP = NPM0 + LII - 1
278* NQ = NQM0 + LIJ - 1
279* NP = NPM1 + LIIP1 - 1
280* NQ = NQM1 + LIJP1 - 1
281* NP = LTLI + LTNM0 - 1
282* NP = LTLIP1 + LTNM1 - 1
283*
284* Temporary variables. The following variables are used within
285* a few lines after they are set and do hold state from one loop
286* iteration to the next:
287*
288* The matrix A:
289* The matrix A does not hold the same values that it would
290* in an unblocked code nor the values that it would hold in
291* in a blocked code.
292*
293* The value of A is confusing. It is easiest to state the
294* difference between trueA and A at the point that MVR2 is called,
295* so we will start there.
296*
297* Let trueA be the value that A would
298* have at a given point in an unblocked code and A
299* be the value that A has in this code at the same point.
300*
301* At the time of the call to MVR2,
302* trueA = A + V' * H + H' * V
303* where H = H( MAXINDEX:N, 1:BINDEX ) and
304* V = V( MAXINDEX:N, 1:BINDEX ).
305*
306* At the bottom of the inner loop,
307* trueA = A + V' * H + H' * V + v' * h + h' * v
308* where H = H( MAXINDEX:N, 1:BINDEX ) and
309* V = V( MAXINDEX:N, 1:BINDEX ) and
310* v = V( liip1:N, BINDEX+1 ) and
311* h = H( liip1:N, BINDEX+1 )
312*
313* At the top of the loop, BINDEX gets incremented, hence:
314* trueA = A + V' * H + H' * V + v' * h + h' * v
315* where H = H( MAXINDEX:N, 1:BINDEX-1 ) and
316* V = V( MAXINDEX:N, 1:BINDEX-1 ) and
317* v = V( liip1:N, BINDEX ) and
318* h = H( liip1:N, BINDEX )
319*
320*
321* A gets updated at the bottom of the outer loop
322* After this update, trueA = A + v' * h + h' * v
323* where v = V( liip1:N, BINDEX ) and
324* h = H( liip1:N, BINDEX ) and BINDEX = 0
325* Indeed, the previous loop invariant as stated above for the
326* top of the loop still holds, but with BINDEX = 0, H and V
327* are null matrices.
328*
329* After the current column of A is updated,
330* trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N )
331* the rest of A is untouched.
332*
333* After the current block column of A is updated,
334* trueA = A + V' * H + H' * V
335* where H = H( MAXINDEX:N, 1:BINDEX ) and
336* V = V( MAXINDEX:N, 1:BINDEX )
337*
338* This brings us back to the point at which mvr2 is called.
339*
340*
341* Details of the parallelization:
342*
343* We delay spreading v across to all processor columns (which
344* would naturally happen at the bottom of the loop) in order to
345* combine the spread of v( : , i-1 ) with the spread of h( : , i )
346*
347* In order to compute h( :, i ), we must update A( :, i )
348* which means that the processor column owning A( :, i ) must
349* have: c, tau, v( i, i ) and h( i, i ).
350*
351* The traditional
352* way of computing v (and the one used in pzlatrd.f and
353* zlatrd.f) is:
354* v = tau * v
355* c = v' * h
356* alpha = - tau * c / 2
357* v = v + alpha * h
358* However, the traditional way of computing v requires that tau
359* be broadcast to all processors in the current column (to compute
360* v = tau * v) and then a sum-to-all is required (to
361* compute v' * h ). We use the following formula instead:
362* c = v' * h
363* v = tau * ( v - c * tau' * h / 2 )
364* The above formula allows tau to be spread down in the
365* same call to DGSUM2D which performs the sum-to-all of c.
366*
367* The computation of v, which could be performed in any processor
368* column (or other procesor subsets), is performed in the
369* processor column that owns A( :, i+1 ) so that A( :, i+1 )
370* can be updated prior to spreading v across.
371*
372* We keep the block column of A up-to-date to minimize the
373* work required in updating the current column of A. Updating
374* the block column of A is reasonably load balanced whereas
375* updating the current column of A is not (only the current
376* processor column is involved).
377*
378* In the following overview of the steps performed, M in the
379* margin indicates message traffic and C indicates O(n^2 nb/sqrt(p))
380* or more flops per processor.
381*
382* Inner loop:
383* A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) )
384*M h = house( A(index:n, index) )
385*M Spread v, h across
386*M vt = v^T; ht = h^T
387* A( index+1:n, index+1:maxindex ) -=
388* ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) )
389*C v = tril(A) * h; vt = ht * tril(A,-1)
390*MorC v = v - H*V*h - V*H*h
391*M v = v + vt^T
392*M c = v' * h
393* v = tau * ( v - c * tau' * h / 2 )
394*C A = A - H*V - V*H
395*
396*
397*
398* =================================================================
399*
400* .. Parameters ..
401 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
402 $ mb_, nb_, rsrc_, csrc_, lld_
403 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
404 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
405 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
406 DOUBLE PRECISION ONE
407 parameter( one = 1.0d0 )
408 DOUBLE PRECISION Z_ONE, Z_NEGONE, Z_ZERO
409 parameter( z_one = 1.0d0, z_negone = -1.0d0,
410 $ z_zero = 0.0d0 )
411 DOUBLE PRECISION ZERO
412 parameter( zero = 0.0d+0 )
413* ..
414*
415*
416* .. Local Scalars ..
417*
418*
419 LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
420 INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
421 $ indexa, indexinh, indexinv, inh, inhb, inht,
422 $ inhtb, intmp, inv, invb, invt, invtb, j, lda,
423 $ ldv, ldzg, lii, liib, liip1, lij, lijb, lijp1,
424 $ ltlip1, ltnm1, lwmin, maxindex, minindex,
425 $ mycol, myfirstrow, myrow, mysetnum, nbzg, np,
426 $ npb, npcol, npm0, npm1, nprow, nps, npset, nq,
427 $ nqb, nqm1, numrows, nxtcol, nxtrow, pbmax,
428 $ pbmin, pbsize, pnb, rowsperproc
429 DOUBLE PRECISION ALPHA, BETA, C, CONJTOPH, CONJTOPV, NORM,
430 $ oneoverbeta, safmax, safmin, toph, topnv,
431 $ toptau, topv
432* ..
433* .. Local Arrays ..
434*
435*
436*
437*
438 INTEGER IDUM1( 1 ), IDUM2( 1 )
439 DOUBLE PRECISION CC( 3 ), DTMP( 5 )
440* ..
441* .. External Subroutines ..
442 EXTERNAL blacs_gridinfo, chk1mat, dcombnrm2, dgebr2d,
443 $ dgebs2d, dgemm, dgemv, dgerv2d, dgesd2d,
444 $ dgsum2d, dlamov, dscal, dtrmvt, pchk1mat,
446* ..
447* .. External Functions ..
448*
449 LOGICAL LSAME
450 INTEGER ICEIL, NUMROC, PJLAENV
451 DOUBLE PRECISION DNRM2, PDLAMCH
452 EXTERNAL lsame, iceil, numroc, pjlaenv, dnrm2, pdlamch
453* ..
454* .. Intrinsic Functions ..
455 INTRINSIC dble, ichar, max, min, mod, sign, sqrt
456* ..
457*
458*
459* .. Executable Statements ..
460* This is just to keep ftnchek and toolpack/1 happy
461 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
462 $ rsrc_.LT.0 )RETURN
463*
464*
465*
466* Further details
467* ===============
468*
469* At the top of the loop, v and nh have been computed but not
470* spread across. Hence, A is out-of-date even after the
471* rank 2k update. Furthermore, we compute the next v before
472* nh is spread across.
473*
474* I claim that if we used a sum-to-all on NV, by summing CC within
475* each column, that we could compute NV locally and could avoid
476* spreading V across. Bruce claims that sum-to-all can be made
477* to cost no more than sum-to-one on the Paragon. If that is
478* true, this would be a win. But,
479* the BLACS sum-to-all is just a sum-to-one followed by a broadcast,
480* and hence the present scheme is better for now.
481*
482* Get grid parameters
483*
484 ictxt = desca( ctxt_ )
485 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
486*
487 safmax = sqrt( pdlamch( ictxt, 'O' ) ) / n
488 safmin = sqrt( pdlamch( ictxt, 'S' ) )
489*
490* Test the input parameters
491*
492 info = 0
493 IF( nprow.EQ.-1 ) THEN
494 info = -( 600+ctxt_ )
495 ELSE
496*
497* Here we set execution options for PDSYTTRD
498*
499 pnb = pjlaenv( ictxt, 2, 'PDSYTTRD', 'L', 0, 0, 0, 0 )
500 anb = pjlaenv( ictxt, 3, 'PDSYTTRD', 'L', 0, 0, 0, 0 )
501*
502 interleave = ( pjlaenv( ictxt, 4, 'PDSYTTRD', 'L', 1, 0, 0,
503 $ 0 ).EQ.1 )
504 twogemms = ( pjlaenv( ictxt, 4, 'PDSYTTRD', 'L', 2, 0, 0,
505 $ 0 ).EQ.1 )
506 balanced = ( pjlaenv( ictxt, 4, 'PDSYTTRD', 'L', 3, 0, 0,
507 $ 0 ).EQ.1 )
508*
509 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
510*
511*
512 upper = lsame( uplo, 'U' )
513 IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
514 $ info = 600 + nb_
515 IF( info.EQ.0 ) THEN
516*
517*
518* Here is the arithmetic:
519* Let maxnpq = max( np, nq, 2 * ANB )
520* LDV = 4 * max( np, nq ) + 2
521* LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB )
522* = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS
523*
524* This overestimates memory requirements when ANB > NP/2
525* Memory requirements are lower when interleave = .false.
526* Hence, we could have two sets of memory requirements,
527* one for interleave and one for
528*
529*
530 nps = max( numroc( n, 1, 0, 0, nprow ), 2*anb )
531 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
532*
533 work( 1 ) = dble( lwmin )
534 IF( .NOT.lsame( uplo, 'L' ) ) THEN
535 info = -1
536 ELSE IF( ia.NE.1 ) THEN
537 info = -4
538 ELSE IF( ja.NE.1 ) THEN
539 info = -5
540 ELSE IF( nprow.NE.npcol ) THEN
541 info = -( 600+ctxt_ )
542 ELSE IF( desca( dtype_ ).NE.1 ) THEN
543 info = -( 600+dtype_ )
544 ELSE IF( desca( mb_ ).NE.1 ) THEN
545 info = -( 600+mb_ )
546 ELSE IF( desca( nb_ ).NE.1 ) THEN
547 info = -( 600+nb_ )
548 ELSE IF( desca( rsrc_ ).NE.0 ) THEN
549 info = -( 600+rsrc_ )
550 ELSE IF( desca( csrc_ ).NE.0 ) THEN
551 info = -( 600+csrc_ )
552 ELSE IF( lwork.LT.lwmin ) THEN
553 info = -11
554 END IF
555 END IF
556 IF( upper ) THEN
557 idum1( 1 ) = ichar( 'U' )
558 ELSE
559 idum1( 1 ) = ichar( 'L' )
560 END IF
561 idum2( 1 ) = 1
562*
563 CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
564 $ info )
565 END IF
566*
567 IF( info.NE.0 ) THEN
568 CALL pxerbla( ictxt, 'PDSYTTRD', -info )
569 RETURN
570 END IF
571*
572* Quick return if possible
573*
574 IF( n.EQ.0 )
575 $ RETURN
576*
577*
578*
579* Reduce the lower triangle of sub( A )
580 np = numroc( n, 1, myrow, 0, nprow )
581 nq = numroc( n, 1, mycol, 0, npcol )
582*
583 nxtrow = 0
584 nxtcol = 0
585*
586 liip1 = 1
587 lijp1 = 1
588 npm1 = np
589 nqm1 = nq
590*
591 lda = desca( lld_ )
592 ictxt = desca( ctxt_ )
593*
594*
595*
596* Miscellaneous details:
597* Put tau, D and E in the right places
598* Check signs
599* Place all the arrays in WORK, control their placement
600* in memory.
601*
602*
603*
604* Loop invariants
605* A(LIIP1, LIJ) points to the first element of A(I+1,J)
606* NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N )
607* A(LII:N,LIJ:N) is one step out of date.
608* proc( CURROW, CURCOL ) owns A(LII,LIJ)
609* proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ)
610*
611 inh = 1
612*
613 IF( interleave ) THEN
614*
615* H and V are interleaved to minimize memory movement
616* LDV has to be twice as large to accomodate interleaving.
617* In addition, LDV is doubled again to allow v, h and
618* toptau to be spreaad across and transposed in a
619* single communication operation with minimum memory
620* movement.
621*
622* We could reduce LDV back to 2*MAX(NPM1,NQM1)
623* by increasing the memory movement required in
624* the spread and transpose of v, h and toptau.
625* However, since the non-interleaved path already
626* provides a mear minimum memory requirement option,
627* we did not provide this additional path.
628*
629 ldv = 4*( max( npm1, nqm1 ) ) + 2
630*
631 inh = 1
632*
633 inv = inh + ldv / 2
634 invt = inh + ( anb+1 )*ldv
635*
636 inht = invt + ldv / 2
637 intmp = invt + ldv*( anb+1 )
638*
639 ELSE
640 ldv = max( npm1, nqm1 )
641*
642 inht = inh + ldv*( anb+1 )
643 inv = inht + ldv*( anb+1 )
644*
645* The code works without this +1, but only because of a
646* coincidence. Without the +1, WORK(INVT) gets trashed, but
647* WORK(INVT) is only used once and when it is used, it is
648* multiplied by WORK( INH ) which is zero. Hence, the fact
649* that WORK(INVT) is trashed has no effect.
650*
651 invt = inv + ldv*( anb+1 ) + 1
652 intmp = invt + ldv*( 2*anb )
653*
654 END IF
655*
656 IF( info.NE.0 ) THEN
657 CALL pxerbla( ictxt, 'PDSYTTRD', -info )
658 work( 1 ) = dble( lwmin )
659 RETURN
660 END IF
661*
662*
663* The satisfies the loop invariant: trueA = A - V * HT - H * VT,
664* (where V, H, VT and HT all have BINDEX+1 rows/columns)
665* the first ANB times through the loop.
666*
667*
668*
669* Setting either ( InH and InHT ) or InV to Z_ZERO
670* is adequate except in the face of NaNs.
671*
672*
673 DO 10 i = 1, np
674 work( inh+i-1 ) = z_zero
675 work( inv+i-1 ) = z_zero
676 10 CONTINUE
677 DO 20 i = 1, nq
678 work( inht+i-1 ) = z_zero
679 20 CONTINUE
680*
681*
682*
683 topnv = z_zero
684*
685 ltlip1 = lijp1
686 ltnm1 = npm1
687 IF( mycol.GT.myrow ) THEN
688 ltlip1 = ltlip1 + 1
689 ltnm1 = ltnm1 - 1
690 END IF
691*
692*
693 DO 210 minindex = 1, n - 1, anb
694*
695*
696 maxindex = min( minindex+anb-1, n )
697 lijb = numroc( maxindex, 1, mycol, 0, npcol ) + 1
698 liib = numroc( maxindex, 1, myrow, 0, nprow ) + 1
699*
700 nqb = nq - lijb + 1
701 npb = np - liib + 1
702 inhtb = inht + lijb - 1
703 invtb = invt + lijb - 1
704 inhb = inh + liib - 1
705 invb = inv + liib - 1
706*
707*
708*
709*
710 DO 160 index = minindex, min( maxindex, n-1 )
711*
712 bindex = index - minindex
713*
714 currow = nxtrow
715 curcol = nxtcol
716*
717 nxtrow = mod( currow+1, nprow )
718 nxtcol = mod( curcol+1, npcol )
719*
720 lii = liip1
721 lij = lijp1
722 npm0 = npm1
723*
724 IF( myrow.EQ.currow ) THEN
725 npm1 = npm1 - 1
726 liip1 = liip1 + 1
727 END IF
728 IF( mycol.EQ.curcol ) THEN
729 nqm1 = nqm1 - 1
730 lijp1 = lijp1 + 1
731 ltlip1 = ltlip1 + 1
732 ltnm1 = ltnm1 - 1
733 END IF
734*
735*
736*
737*
738* V = NV, VT = NVT, H = NH, HT = NHT
739*
740*
741* Update the current column of A
742*
743*
744 IF( mycol.EQ.curcol ) THEN
745*
746 indexa = lii + ( lij-1 )*lda
747 indexinv = inv + lii - 1 + ( bindex-1 )*ldv
748 indexinh = inh + lii - 1 + ( bindex-1 )*ldv
749 conjtoph = work( inht+lij-1+bindex*ldv )
750 conjtopv = topnv
751*
752 IF( index.GT.1 ) THEN
753 DO 30 i = 0, npm0 - 1
754* A( INDEXA+I ) = A( INDEXA+I )
755 a( indexa+i ) = a( indexa+i ) -
756 $ work( indexinv+ldv+i )*conjtoph -
757 $ work( indexinh+ldv+i )*conjtopv
758 30 CONTINUE
759 END IF
760*
761*
762 END IF
763*
764*
765 IF( mycol.EQ.curcol ) THEN
766*
767* Compute the householder vector
768*
769 IF( myrow.EQ.currow ) THEN
770 dtmp( 2 ) = a( lii+( lij-1 )*lda )
771 ELSE
772 dtmp( 2 ) = zero
773 END IF
774 IF( myrow.EQ.nxtrow ) THEN
775 dtmp( 3 ) = a( liip1+( lij-1 )*lda )
776 dtmp( 4 ) = zero
777 ELSE
778 dtmp( 3 ) = zero
779 dtmp( 4 ) = zero
780 END IF
781*
782 norm = dnrm2( npm1, a( liip1+( lij-1 )*lda ), 1 )
783 dtmp( 1 ) = norm
784*
785* IF DTMP(5) = 1.0, NORM is too large and might cause
786* overflow, hence PDTREECOMB must be called. IF DTMP(5)
787* is zero on output, DTMP(1) can be trusted.
788*
789 dtmp( 5 ) = zero
790 IF( dtmp( 1 ).GE.safmax .OR. dtmp( 1 ).LT.safmin ) THEN
791 dtmp( 5 ) = one
792 dtmp( 1 ) = zero
793 END IF
794*
795 dtmp( 1 ) = dtmp( 1 )*dtmp( 1 )
796 CALL dgsum2d( ictxt, 'C', ' ', 5, 1, dtmp, 5, -1,
797 $ curcol )
798 IF( dtmp( 5 ).EQ.zero ) THEN
799 dtmp( 1 ) = sqrt( dtmp( 1 ) )
800 ELSE
801 dtmp( 1 ) = norm
802 CALL pdtreecomb( ictxt, 'C', 1, dtmp, -1, mycol,
803 $ dcombnrm2 )
804 END IF
805*
806 norm = dtmp( 1 )
807*
808 d( lij ) = dtmp( 2 )
809 IF( myrow.EQ.currow .AND. mycol.EQ.curcol ) THEN
810 a( lii+( lij-1 )*lda ) = d( lij )
811 END IF
812*
813*
814 alpha = dtmp( 3 )
815*
816 norm = sign( norm, alpha )
817*
818 IF( norm.EQ.zero ) THEN
819 toptau = zero
820 ELSE
821 beta = norm + alpha
822 toptau = beta / norm
823 oneoverbeta = 1.0d0 / beta
824*
825 CALL dscal( npm1, oneoverbeta,
826 $ a( liip1+( lij-1 )*lda ), 1 )
827 END IF
828*
829 IF( myrow.EQ.nxtrow ) THEN
830 a( liip1+( lij-1 )*lda ) = z_one
831 END IF
832*
833 tau( lij ) = toptau
834 e( lij ) = -norm
835*
836 END IF
837*
838*
839* Spread v, nh, toptau across
840*
841 DO 40 i = 0, npm1 - 1
842 work( inv+liip1-1+bindex*ldv+npm1+i ) = a( liip1+i+
843 $ ( lij-1 )*lda )
844 40 CONTINUE
845*
846 IF( mycol.EQ.curcol ) THEN
847 work( inv+liip1-1+bindex*ldv+npm1+npm1 ) = toptau
848 CALL dgebs2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
849 $ work( inv+liip1-1+bindex*ldv ),
850 $ npm1+npm1+1 )
851 ELSE
852 CALL dgebr2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
853 $ work( inv+liip1-1+bindex*ldv ),
854 $ npm1+npm1+1, myrow, curcol )
855 toptau = work( inv+liip1-1+bindex*ldv+npm1+npm1 )
856 END IF
857 DO 50 i = 0, npm1 - 1
858 work( inh+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
859 $ 1+bindex*ldv+npm1+i )
860 50 CONTINUE
861*
862 IF( index.LT.n ) THEN
863 IF( myrow.EQ.nxtrow .AND. mycol.EQ.curcol )
864 $ a( liip1+( lij-1 )*lda ) = e( lij )
865 END IF
866*
867* Transpose v, nh
868*
869*
870 IF( myrow.EQ.mycol ) THEN
871 DO 60 i = 0, npm1 + npm1
872 work( invt+lijp1-1+bindex*ldv+i ) = work( inv+liip1-1+
873 $ bindex*ldv+i )
874 60 CONTINUE
875 ELSE
876 CALL dgesd2d( ictxt, npm1+npm1, 1,
877 $ work( inv+liip1-1+bindex*ldv ), npm1+npm1,
878 $ mycol, myrow )
879 CALL dgerv2d( ictxt, nqm1+nqm1, 1,
880 $ work( invt+lijp1-1+bindex*ldv ), nqm1+nqm1,
881 $ mycol, myrow )
882 END IF
883*
884 DO 70 i = 0, nqm1 - 1
885 work( inht+lijp1-1+( bindex+1 )*ldv+i ) = work( invt+
886 $ lijp1-1+bindex*ldv+nqm1+i )
887 70 CONTINUE
888*
889*
890* Update the current block column of A
891*
892 IF( index.GT.1 ) THEN
893 DO 90 j = lijp1, lijb - 1
894 DO 80 i = 0, npm1 - 1
895*
896 a( liip1+i+( j-1 )*lda ) = a( liip1+i+( j-1 )*lda )
897 $ - work( inv+liip1-1+bindex*ldv+i )*
898 $ work( inht+j-1+bindex*ldv ) -
899 $ work( inh+liip1-1+bindex*ldv+i )*
900 $ work( invt+j-1+bindex*ldv )
901 80 CONTINUE
902 90 CONTINUE
903 END IF
904*
905*
906*
907* Compute NV = A * NHT; NVT = A * NH
908*
909* These two lines are necessary because these elements
910* are not always involved in the calls to DTRMVT
911* for two reasons:
912* 1) On diagonal processors, the call to TRMVT
913* involves only LTNM1-1 elements
914* 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1
915* and when the results are combined across all processes,
916* uninitialized values may be included.
917 work( inv+liip1-1+( bindex+1 )*ldv ) = z_zero
918 work( invt+lijp1-1+( bindex+1 )*ldv+nqm1-1 ) = z_zero
919*
920*
921 IF( myrow.EQ.mycol ) THEN
922 IF( ltnm1.GT.1 ) THEN
923 CALL dtrmvt( 'L', ltnm1-1,
924 $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
925 $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
926 $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
927 $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
928 $ ldv ), 1, work( inht+lijp1-1+( bindex+
929 $ 1 )*ldv ), 1 )
930 END IF
931 DO 100 i = 1, ltnm1
932 work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
933 $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
934 $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
935 $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
936 100 CONTINUE
937 ELSE
938 IF( ltnm1.GT.0 )
939 $ CALL dtrmvt( 'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
940 $ lda, work( invt+lijp1-1+( bindex+1 )*
941 $ ldv ), 1, work( inh+ltlip1-1+( bindex+
942 $ 1 )*ldv ), 1, work( inv+ltlip1-1+
943 $ ( bindex+1 )*ldv ), 1,
944 $ work( inht+lijp1-1+( bindex+1 )*ldv ),
945 $ 1 )
946*
947 END IF
948*
949*
950* We take advantage of the fact that:
951* A * sum( B ) = sum ( A * B ) for matrices A,B
952*
953* trueA = A + V * HT + H * VT
954* hence: (trueA)v = Av' + V * HT * v + H * VT * v
955* VT * v = sum_p_in_NPROW ( VTp * v )
956* H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v )
957*
958* v = v + V * HT * h + H * VT * h
959*
960*
961*
962* tmp = HT * nh1
963 DO 110 i = 1, 2*( bindex+1 )
964 work( intmp-1+i ) = 0
965 110 CONTINUE
966*
967 IF( balanced ) THEN
968 npset = nprow
969 mysetnum = myrow
970 rowsperproc = iceil( nqb, npset )
971 myfirstrow = min( nqb+1, 1+rowsperproc*mysetnum )
972 numrows = min( rowsperproc, nqb-myfirstrow+1 )
973*
974*
975* tmp = HT * v
976*
977 CALL dgemv( 'C', numrows, bindex+1, z_one,
978 $ work( inhtb+myfirstrow-1 ), ldv,
979 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
980 $ 1, z_zero, work( intmp ), 1 )
981* tmp2 = VT * v
982 CALL dgemv( 'C', numrows, bindex+1, z_one,
983 $ work( invtb+myfirstrow-1 ), ldv,
984 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
985 $ 1, z_zero, work( intmp+bindex+1 ), 1 )
986*
987*
988 CALL dgsum2d( ictxt, 'C', ' ', 2*( bindex+1 ), 1,
989 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
990 ELSE
991* tmp = HT * v
992*
993 CALL dgemv( 'C', nqb, bindex+1, z_one, work( inhtb ),
994 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
995 $ z_zero, work( intmp ), 1 )
996* tmp2 = VT * v
997 CALL dgemv( 'C', nqb, bindex+1, z_one, work( invtb ),
998 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
999 $ z_zero, work( intmp+bindex+1 ), 1 )
1000*
1001 END IF
1002*
1003*
1004*
1005 IF( balanced ) THEN
1006 mysetnum = mycol
1007*
1008 rowsperproc = iceil( npb, npset )
1009 myfirstrow = min( npb+1, 1+rowsperproc*mysetnum )
1010 numrows = min( rowsperproc, npb-myfirstrow+1 )
1011*
1012 CALL dgsum2d( ictxt, 'R', ' ', 2*( bindex+1 ), 1,
1013 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1014*
1015*
1016* v = v + V * tmp
1017 IF( index.GT.1. ) THEN
1018 CALL dgemv( 'N', numrows, bindex+1, z_negone,
1019 $ work( invb+myfirstrow-1 ), ldv,
1020 $ work( intmp ), 1, z_one,
1021 $ work( invb+myfirstrow-1+( bindex+1 )*
1022 $ ldv ), 1 )
1023*
1024* v = v + H * tmp2
1025 CALL dgemv( 'N', numrows, bindex+1, z_negone,
1026 $ work( inhb+myfirstrow-1 ), ldv,
1027 $ work( intmp+bindex+1 ), 1, z_one,
1028 $ work( invb+myfirstrow-1+( bindex+1 )*
1029 $ ldv ), 1 )
1030 END IF
1031*
1032 ELSE
1033* v = v + V * tmp
1034 CALL dgemv( 'N', npb, bindex+1, z_negone, work( invb ),
1035 $ ldv, work( intmp ), 1, z_one,
1036 $ work( invb+( bindex+1 )*ldv ), 1 )
1037*
1038*
1039* v = v + H * tmp2
1040 CALL dgemv( 'N', npb, bindex+1, z_negone, work( inhb ),
1041 $ ldv, work( intmp+bindex+1 ), 1, z_one,
1042 $ work( invb+( bindex+1 )*ldv ), 1 )
1043*
1044 END IF
1045*
1046*
1047* Transpose NV and add it back into NVT
1048*
1049 IF( myrow.EQ.mycol ) THEN
1050 DO 120 i = 0, nqm1 - 1
1051 work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1052 $ i )
1053 120 CONTINUE
1054 ELSE
1055 CALL dgesd2d( ictxt, nqm1, 1,
1056 $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1057 $ nqm1, mycol, myrow )
1058 CALL dgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1059 $ myrow )
1060*
1061 END IF
1062 DO 130 i = 0, npm1 - 1
1063 work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1064 $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1065 130 CONTINUE
1066*
1067* Sum-to-one NV rowwise (within a row)
1068*
1069 CALL dgsum2d( ictxt, 'R', ' ', npm1, 1,
1070 $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1071 $ myrow, nxtcol )
1072*
1073*
1074* Dot product c = NV * NH
1075* Sum-to-all c within next processor column
1076*
1077*
1078 IF( mycol.EQ.nxtcol ) THEN
1079 cc( 1 ) = z_zero
1080 DO 140 i = 0, npm1 - 1
1081 cc( 1 ) = cc( 1 ) + work( inv+liip1-1+( bindex+1 )*
1082 $ ldv+i )*work( inh+liip1-1+( bindex+1 )*ldv+
1083 $ i )
1084 140 CONTINUE
1085 IF( myrow.EQ.nxtrow ) THEN
1086 cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1087 cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1088 ELSE
1089 cc( 2 ) = z_zero
1090 cc( 3 ) = z_zero
1091 END IF
1092 CALL dgsum2d( ictxt, 'C', ' ', 3, 1, cc, 3, -1, nxtcol )
1093*
1094 topv = cc( 2 )
1095 c = cc( 1 )
1096 toph = cc( 3 )
1097*
1098 topnv = toptau*( topv-c*toptau / 2*toph )
1099*
1100*
1101* Compute V = Tau * (V - C * Tau' / 2 * H )
1102*
1103*
1104 DO 150 i = 0, npm1 - 1
1105 work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1106 $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*toptau /
1107 $ 2*work( inh+liip1-1+( bindex+1 )*ldv+i ) )
1108 150 CONTINUE
1109*
1110 END IF
1111*
1112*
1113 160 CONTINUE
1114*
1115*
1116* Perform the rank2k update
1117*
1118 IF( maxindex.LT.n ) THEN
1119*
1120 DO 170 i = 0, npm1 - 1
1121 work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1122 170 CONTINUE
1123*
1124*
1125*
1126 IF( .NOT.twogemms ) THEN
1127 IF( interleave ) THEN
1128 ldzg = ldv / 2
1129 ELSE
1130 CALL dlamov( 'A', ltnm1, anb, work( inht+lijp1-1 ),
1131 $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1132*
1133 CALL dlamov( 'A', ltnm1, anb, work( inv+ltlip1-1 ),
1134 $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1135 ldzg = ldv
1136 END IF
1137 nbzg = anb*2
1138 ELSE
1139 ldzg = ldv
1140 nbzg = anb
1141 END IF
1142*
1143*
1144 DO 180 pbmin = 1, ltnm1, pnb
1145*
1146 pbsize = min( pnb, ltnm1-pbmin+1 )
1147 pbmax = min( ltnm1, pbmin+pnb-1 )
1148 CALL dgemm( 'N', 'C', pbsize, pbmax, nbzg, z_negone,
1149 $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1150 $ work( invt+lijp1-1 ), ldzg, z_one,
1151 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1152 IF( twogemms ) THEN
1153 CALL dgemm( 'N', 'C', pbsize, pbmax, anb, z_negone,
1154 $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1155 $ work( inht+lijp1-1 ), ldzg, z_one,
1156 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1157 END IF
1158 180 CONTINUE
1159*
1160*
1161*
1162 DO 190 i = 0, npm1 - 1
1163 work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1164 work( inh+liip1-1+i ) = work( intmp+i )
1165 190 CONTINUE
1166 DO 200 i = 0, nqm1 - 1
1167 work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1168 200 CONTINUE
1169*
1170*
1171 END IF
1172*
1173* End of the update A code
1174*
1175 210 CONTINUE
1176*
1177 IF( mycol.EQ.nxtcol ) THEN
1178 IF( myrow.EQ.nxtrow ) THEN
1179*
1180 d( nq ) = a( np+( nq-1 )*lda )
1181*
1182 CALL dgebs2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1 )
1183 ELSE
1184 CALL dgebr2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1, nxtrow,
1185 $ nxtcol )
1186 END IF
1187 END IF
1188*
1189*
1190*
1191*
1192 work( 1 ) = dble( lwmin )
1193 RETURN
1194*
1195* End of PDSYTTRD
1196*
1197*
1198 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine dtrmvt(uplo, n, t, ldt, x, incx, y, incy, w, incw, z, incz)
Definition dtrmvt.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pdsyttrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pdsyttrd.f:3
subroutine dcombnrm2(x, y)
Definition pdtreecomb.f:307
subroutine pdtreecomb(ictxt, scope, n, mine, rdest0, cdest0, subptr)
Definition pdtreecomb.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2