SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pchettrd.f
Go to the documentation of this file.
1 SUBROUTINE pchettrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
2 $ LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER UPLO
10 INTEGER IA, INFO, JA, LWORK, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 REAL D( * ), E( * )
15 COMPLEX A( * ), TAU( * ), WORK( * )
16* ..
17*
18* Purpose
19*
20* =======
21*
22* PCHETTRD reduces a complex Hermitian matrix sub( A ) to Hermitian
23* tridiagonal form T by an unitary similarity transformation:
24* Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
25*
26* Notes
27* =====
28*
29* Each global data object is described by an associated description
30* vector. This vector stores the information required to establish
31* the mapping between an object element and its corresponding
32* process and memory location.
33*
34* Let A be a generic term for any 2D block cyclicly distributed
35* array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- -----------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle,
45* indicating the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to
54* distribute the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to
56* distribute the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the
58* first row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCp(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCp( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCq( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes
72* of its process row.
73* The values of LOCp() and LOCq() may be determined via a call to
74* the ScaLAPACK tool function, NUMROC:
75* LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81* Arguments
82* =========
83*
84* UPLO (global input) CHARACTER
85* Specifies whether the upper or lower triangular part of the
86* Hermitian matrix sub( A ) is stored:
87* = 'U': Upper triangular
88* = 'L': Lower triangular
89*
90* N (global input) INTEGER
91* The number of rows and columns to be operated on, i.e. the
92* order of the distributed submatrix sub( A ). N >= 0.
93*
94* A (local input/local output) COMPLEX pointer into the
95* local memory to an array of dimension (LLD_A,LOCq(JA+N-1)).
96* On entry, this array contains the local pieces of the
97* Hermitian distributed matrix sub( A ). If UPLO = 'U', the
98* leading N-by-N upper triangular part of sub( A ) contains
99* the upper triangular part of the matrix, and its strictly
100* lower triangular part is not referenced. If UPLO = 'L', the
101* leading N-by-N lower triangular part of sub( A ) contains the
102* lower triangular part of the matrix, and its strictly upper
103* triangular part is not referenced. On exit, if UPLO = 'U',
104* the diagonal and first superdiagonal of sub( A ) are over-
105* written by the corresponding elements of the tridiagonal
106* matrix T, and the elements above the first superdiagonal,
107* with the array TAU, represent the unitary matrix Q as a
108* product of elementary reflectors; if UPLO = 'L', the diagonal
109* and first subdiagonal of sub( A ) are overwritten by the
110* corresponding elements of the tridiagonal matrix T, and the
111* elements below the first subdiagonal, with the array TAU,
112* represent the unitary matrix Q as a product of elementary
113* reflectors. See Further Details.
114*
115* IA (global input) INTEGER
116* The row index in the global array A indicating the first
117* row of sub( A ).
118*
119* JA (global input) INTEGER
120* The column index in the global array A indicating the
121* first column of sub( A ).
122*
123* DESCA (global and local input) INTEGER array of dimension DLEN_.
124* The array descriptor for the distributed matrix A.
125*
126* D (local output) REAL array, dim LOCq(JA+N-1)
127* The diagonal elements of the tridiagonal matrix T:
128* D(i) = A(i,i). D is tied to the distributed matrix A.
129*
130* E (local output) REAL array, dim LOCq(JA+N-1)
131* if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal
132* elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
133* UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
134* distributed matrix A.
135*
136* TAU (local output) COMPLEX, array, dimension
137* LOCq(JA+N-1). This array contains the scalar factors TAU of
138* the elementary reflectors. TAU is tied to the distributed
139* matrix A.
140*
141* WORK (local workspace) COMPLEX array, dimension (LWORK)
142* On exit, WORK( 1 ) returns the minimal and optimal workspace
143*
144* LWORK (local input) INTEGER
145* The dimension of the array WORK.
146* LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS
147* Where:
148* NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
149* ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PCHETTRD', 'L', 0, 0,
150* 0, 0 )
151*
152* NUMROC is a ScaLAPACK tool function;
153* PJLAENV is a ScaLAPACK envionmental inquiry function
154* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
155* the subroutine BLACS_GRIDINFO.
156*
157* INFO (global output) INTEGER
158* = 0: successful exit
159* < 0: If the i-th argument is an array and the j-entry had
160* an illegal value, then INFO = -(i*100+j), if the i-th
161* argument is a scalar and had an illegal value, then
162* INFO = -i.
163*
164* Further Details
165* ===============
166*
167* If UPLO = 'U', the matrix Q is represented as a product of
168* elementary reflectors
169*
170* Q = H(n-1) . . . H(2) H(1).
171*
172* Each H(i) has the form
173*
174* H(i) = I - tau * v * v'
175*
176* where tau is a complex scalar, and v is a complex vector with
177* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
178* A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
179*
180* If UPLO = 'L', the matrix Q is represented as a product of
181* elementary reflectors
182*
183* Q = H(1) H(2) . . . H(n-1).
184*
185* Each H(i) has the form
186*
187* H(i) = I - tau * v * v'
188*
189* where tau is a complex scalar, and v is a complex vector with
190* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
191* A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
192*
193* The contents of sub( A ) on exit are illustrated by the following
194* examples with n = 5:
195*
196* if UPLO = 'U': if UPLO = 'L':
197*
198* ( d e v2 v3 v4 ) ( d )
199* ( d e v3 v4 ) ( e d )
200* ( d e v4 ) ( v1 e d )
201* ( d e ) ( v1 v2 e d )
202* ( d ) ( v1 v2 v3 e d )
203*
204* where d and e denote diagonal and off-diagonal elements of T, and
205* vi denotes an element of the vector defining H(i).
206*
207* Data storage requirements
208* =========================
209*
210* PCHETTRD is not intended to be called directly. All users are
211* encourage to call PCHETRD which will then call PCHETTRD if
212* appropriate. A must be in cyclic format (i.e. MB = NB = 1),
213* the process grid must be square ( i.e. NPROW = NPCOL ) and
214* only lower triangular storage is supported.
215*
216* Local variables
217* ===============
218*
219* PCHETTRD uses five local arrays:
220* WORK ( InV ) dimension ( NP, ANB+1): array V
221* WORK ( InH ) dimension ( NP, ANB+1): array H
222* WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V
223* WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H
224* WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT
225*
226* Arrays V and H are replicated across all processor columns.
227* Arrays V^T and H^T are replicated across all processor rows.
228*
229* WORK ( InVT ), or V^T, is stored as a tall skinny
230* array ( NQ x ANB-1 ) for efficiency. Since only the lower
231* triangular portion of A is updated, Av is computed as:
232* tril(A) * v + v^T * tril(A,-1). This is performed as
233* two local triangular matrix-vector multiplications (both in
234* MVR2) followed by a transpose and a sum across the columns.
235* In the local computation, WORK( InVT ) is used to compute
236* tril(A) * v and WORK( InV ) is used to compute
237* v^T * tril(A,-1)
238*
239* The following variables are global indices into A:
240* INDEX: The current global row and column number.
241* MAXINDEX: The global row and column for the first row and
242* column in the trailing block of A.
243* LIIB, LIJB: The first row, column in
244*
245* The following variables point into the arrays A, V, H, V^T, H^T:
246* BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T.
247* LII: local index I: The local row number for row INDEX
248* LIJ: local index J: The local column number for column INDEX
249* LIIP1: local index I+1: The local row number for row INDEX+1
250* LIJP1: local index J+1: The local col number for col INDEX+1
251* LTLI: lower triangular local index I: The local row for the
252* upper left entry in tril( A(INDEX, INDEX) )
253* LTLIP1: lower triangular local index I+1: The local row for the
254* upper left entry in tril( A(INDEX+1, INDEX+1) )
255*
256* Details: The distinction between LII and LTLI (and between
257* LIIP1 and LTLIP1) is subtle. Within the current processor
258* column (i.e. MYCOL .eq. CURCOL) they are the same. However,
259* on some processors, A( LII, LIJ ) points to an element
260* above the diagonal, on these processors, LTLI = LII+1.
261*
262* The following variables give the number of rows and/or columns
263* in various matrices:
264* NP: The number of local rows in A( 1:N, 1:N )
265* NQ: The number of local columns in A( 1:N, 1:N )
266* NPM0: The number of local rows in A( INDEX:N, INDEX:N )
267* NQM0: The number of local columns in A( INDEX:N, INDEX:N )
268* NPM1: The number of local rows in A( INDEX+1:N, INDEX:N )
269* NQM1: The number of local columns in A( INDEX+1:N, INDEX:N )
270* LTNM0: The number of local rows & columns in
271* tril( A( INDEX:N, INDEX:N ) )
272* LTNM1: The number of local rows & columns in
273* tril( A( INDEX+1:N, INDEX+1:N ) )
274* NOTE: LTNM0 == LTNM1 on all processors except the diagonal
275* processors, i.e. those where MYCOL == MYROW.
276*
277* Invariants:
278* NP = NPM0 + LII - 1
279* NQ = NQM0 + LIJ - 1
280* NP = NPM1 + LIIP1 - 1
281* NQ = NQM1 + LIJP1 - 1
282* NP = LTLI + LTNM0 - 1
283* NP = LTLIP1 + LTNM1 - 1
284*
285* Temporary variables. The following variables are used within
286* a few lines after they are set and do hold state from one loop
287* iteration to the next:
288*
289* The matrix A:
290* The matrix A does not hold the same values that it would
291* in an unblocked code nor the values that it would hold in
292* in a blocked code.
293*
294* The value of A is confusing. It is easiest to state the
295* difference between trueA and A at the point that MVR2 is called,
296* so we will start there.
297*
298* Let trueA be the value that A would
299* have at a given point in an unblocked code and A
300* be the value that A has in this code at the same point.
301*
302* At the time of the call to MVR2,
303* trueA = A + V' * H + H' * V
304* where H = H( MAXINDEX:N, 1:BINDEX ) and
305* V = V( MAXINDEX:N, 1:BINDEX ).
306*
307* At the bottom of the inner loop,
308* trueA = A + V' * H + H' * V + v' * h + h' * v
309* where H = H( MAXINDEX:N, 1:BINDEX ) and
310* V = V( MAXINDEX:N, 1:BINDEX ) and
311* v = V( liip1:N, BINDEX+1 ) and
312* h = H( liip1:N, BINDEX+1 )
313*
314* At the top of the loop, BINDEX gets incremented, hence:
315* trueA = A + V' * H + H' * V + v' * h + h' * v
316* where H = H( MAXINDEX:N, 1:BINDEX-1 ) and
317* V = V( MAXINDEX:N, 1:BINDEX-1 ) and
318* v = V( liip1:N, BINDEX ) and
319* h = H( liip1:N, BINDEX )
320*
321*
322* A gets updated at the bottom of the outer loop
323* After this update, trueA = A + v' * h + h' * v
324* where v = V( liip1:N, BINDEX ) and
325* h = H( liip1:N, BINDEX ) and BINDEX = 0
326* Indeed, the previous loop invariant as stated above for the
327* top of the loop still holds, but with BINDEX = 0, H and V
328* are null matrices.
329*
330* After the current column of A is updated,
331* trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N )
332* the rest of A is untouched.
333*
334* After the current block column of A is updated,
335* trueA = A + V' * H + H' * V
336* where H = H( MAXINDEX:N, 1:BINDEX ) and
337* V = V( MAXINDEX:N, 1:BINDEX )
338*
339* This brings us back to the point at which mvr2 is called.
340*
341*
342* Details of the parallelization:
343*
344* We delay spreading v across to all processor columns (which
345* would naturally happen at the bottom of the loop) in order to
346* combine the spread of v( : , i-1 ) with the spread of h( : , i )
347*
348* In order to compute h( :, i ), we must update A( :, i )
349* which means that the processor column owning A( :, i ) must
350* have: c, tau, v( i, i ) and h( i, i ).
351*
352* The traditional
353* way of computing v (and the one used in pzlatrd.f and
354* zlatrd.f) is:
355* v = tau * v
356* c = v' * h
357* alpha = - tau * c / 2
358* v = v + alpha * h
359* However, the traditional way of computing v requires that tau
360* be broadcast to all processors in the current column (to compute
361* v = tau * v) and then a sum-to-all is required (to
362* compute v' * h ). We use the following formula instead:
363* c = v' * h
364* v = tau * ( v - c * tau' * h / 2 )
365* The above formula allows tau to be spread down in the
366* same call to SGSUM2D which performs the sum-to-all of c.
367*
368* The computation of v, which could be performed in any processor
369* column (or other procesor subsets), is performed in the
370* processor column that owns A( :, i+1 ) so that A( :, i+1 )
371* can be updated prior to spreading v across.
372*
373* We keep the block column of A up-to-date to minimize the
374* work required in updating the current column of A. Updating
375* the block column of A is reasonably load balanced whereas
376* updating the current column of A is not (only the current
377* processor column is involved).
378*
379* In the following overview of the steps performed, M in the
380* margin indicates message traffic and C indicates O(n^2 nb/sqrt(p))
381* or more flops per processor.
382*
383* Inner loop:
384* A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) )
385*M h = house( A(index:n, index) )
386*M Spread v, h across
387*M vt = v^T; ht = h^T
388* A( index+1:n, index+1:maxindex ) -=
389* ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) )
390*C v = tril(A) * h; vt = ht * tril(A,-1)
391*MorC v = v - H*V*h - V*H*h
392*M v = v + vt^T
393*M c = v' * h
394* v = tau * ( v - c * tau' * h / 2 )
395*C A = A - H*V - V*H
396*
397*
398*
399* =================================================================
400*
401* .. Parameters ..
402 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
403 $ mb_, nb_, rsrc_, csrc_, lld_
404 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
405 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
406 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
407 REAL ONE
408 parameter( one = 1.0e0 )
409 COMPLEX Z_ONE, Z_NEGONE, Z_ZERO
410 parameter( z_one = 1.0e0, z_negone = -1.0e0,
411 $ z_zero = 0.0e0 )
412 REAL ZERO
413 parameter( zero = 0.0e+0 )
414* ..
415*
416*
417* .. Local Scalars ..
418*
419*
420 LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
421 INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
422 $ indexa, indexinh, indexinv, inh, inhb, inht,
423 $ inhtb, intmp, inv, invb, invt, invtb, j, lda,
424 $ ldv, ldzg, lii, liib, liip1, lij, lijb, lijp1,
425 $ ltlip1, ltnm1, lwmin, maxindex, minindex,
426 $ mycol, myfirstrow, myrow, mysetnum, nbzg, np,
427 $ npb, npcol, npm0, npm1, nprow, nps, npset, nq,
428 $ nqb, nqm1, numrows, nxtcol, nxtrow, pbmax,
429 $ pbmin, pbsize, pnb, rowsperproc
430 REAL NORM, SAFMAX, SAFMIN
431 COMPLEX ALPHA, BETA, C, ONEOVERBETA, TOPH, TOPNV,
432 $ toptau, topv, ttoph, ttopv
433* ..
434* .. Local Arrays ..
435*
436*
437*
438*
439 INTEGER IDUM1( 1 ), IDUM2( 1 )
440 REAL DTMP( 5 )
441 COMPLEX CC( 3 )
442* ..
443* .. External Subroutines ..
444 EXTERNAL blacs_gridinfo, cgebr2d, cgebs2d, cgemm, cgemv,
445 $ cgerv2d, cgesd2d, cgsum2d, chk1mat, clamov,
446 $ cscal, ctrmvt, pchk1mat, pstreecomb, pxerbla,
447 $ scombnrm2, sgebr2d, sgebs2d, sgsum2d
448* ..
449* .. External Functions ..
450*
451 LOGICAL LSAME
452 INTEGER ICEIL, NUMROC, PJLAENV
453 REAL PSLAMCH, SCNRM2
454 EXTERNAL lsame, iceil, numroc, pjlaenv, pslamch, scnrm2
455* ..
456* .. Intrinsic Functions ..
457 INTRINSIC aimag, cmplx, conjg, ichar, max, min, mod,
458 $ real, sign, sqrt
459* ..
460*
461*
462* .. Executable Statements ..
463* This is just to keep ftnchek and toolpack/1 happy
464 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
465 $ rsrc_.LT.0 )RETURN
466*
467*
468*
469* Further details
470* ===============
471*
472* At the top of the loop, v and nh have been computed but not
473* spread across. Hence, A is out-of-date even after the
474* rank 2k update. Furthermore, we compute the next v before
475* nh is spread across.
476*
477* I claim that if we used a sum-to-all on NV, by summing CC within
478* each column, that we could compute NV locally and could avoid
479* spreading V across. Bruce claims that sum-to-all can be made
480* to cost no more than sum-to-one on the Paragon. If that is
481* true, this would be a win. But,
482* the BLACS sum-to-all is just a sum-to-one followed by a broadcast,
483* and hence the present scheme is better for now.
484*
485* Get grid parameters
486*
487 ictxt = desca( ctxt_ )
488 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
489*
490 safmax = sqrt( pslamch( ictxt, 'O' ) ) / n
491 safmin = sqrt( pslamch( ictxt, 'S' ) )
492*
493* Test the input parameters
494*
495 info = 0
496 IF( nprow.EQ.-1 ) THEN
497 info = -( 600+ctxt_ )
498 ELSE
499*
500* Here we set execution options for PCHETTRD
501*
502 pnb = pjlaenv( ictxt, 2, 'PCHETTRD', 'L', 0, 0, 0, 0 )
503 anb = pjlaenv( ictxt, 3, 'PCHETTRD', 'L', 0, 0, 0, 0 )
504*
505 interleave = ( pjlaenv( ictxt, 4, 'PCHETTRD', 'L', 1, 0, 0,
506 $ 0 ).EQ.1 )
507 twogemms = ( pjlaenv( ictxt, 4, 'PCHETTRD', 'L', 2, 0, 0,
508 $ 0 ).EQ.1 )
509 balanced = ( pjlaenv( ictxt, 4, 'PCHETTRD', 'L', 3, 0, 0,
510 $ 0 ).EQ.1 )
511*
512 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
513*
514*
515 upper = lsame( uplo, 'U' )
516 IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
517 $ info = 600 + nb_
518 IF( info.EQ.0 ) THEN
519*
520*
521* Here is the arithmetic:
522* Let maxnpq = max( np, nq, 2 * ANB )
523* LDV = 4 * max( np, nq ) + 2
524* LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB )
525* = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS
526*
527* This overestimates memory requirements when ANB > NP/2
528* Memory requirements are lower when interleave = .false.
529* Hence, we could have two sets of memory requirements,
530* one for interleave and one for
531*
532*
533 nps = max( numroc( n, 1, 0, 0, nprow ), 2*anb )
534 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
535*
536 work( 1 ) = cmplx( lwmin )
537 IF( .NOT.lsame( uplo, 'L' ) ) THEN
538 info = -1
539 ELSE IF( ia.NE.1 ) THEN
540 info = -4
541 ELSE IF( ja.NE.1 ) THEN
542 info = -5
543 ELSE IF( nprow.NE.npcol ) THEN
544 info = -( 600+ctxt_ )
545 ELSE IF( desca( dtype_ ).NE.1 ) THEN
546 info = -( 600+dtype_ )
547 ELSE IF( desca( mb_ ).NE.1 ) THEN
548 info = -( 600+mb_ )
549 ELSE IF( desca( nb_ ).NE.1 ) THEN
550 info = -( 600+nb_ )
551 ELSE IF( desca( rsrc_ ).NE.0 ) THEN
552 info = -( 600+rsrc_ )
553 ELSE IF( desca( csrc_ ).NE.0 ) THEN
554 info = -( 600+csrc_ )
555 ELSE IF( lwork.LT.lwmin ) THEN
556 info = -11
557 END IF
558 END IF
559 IF( upper ) THEN
560 idum1( 1 ) = ichar( 'U' )
561 ELSE
562 idum1( 1 ) = ichar( 'L' )
563 END IF
564 idum2( 1 ) = 1
565*
566 CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
567 $ info )
568 END IF
569*
570 IF( info.NE.0 ) THEN
571 CALL pxerbla( ictxt, 'PCHETTRD', -info )
572 RETURN
573 END IF
574*
575* Quick return if possible
576*
577 IF( n.EQ.0 )
578 $ RETURN
579*
580*
581*
582* Reduce the lower triangle of sub( A )
583 np = numroc( n, 1, myrow, 0, nprow )
584 nq = numroc( n, 1, mycol, 0, npcol )
585*
586 nxtrow = 0
587 nxtcol = 0
588*
589 liip1 = 1
590 lijp1 = 1
591 npm1 = np
592 nqm1 = nq
593*
594 lda = desca( lld_ )
595 ictxt = desca( ctxt_ )
596*
597*
598*
599* Miscellaneous details:
600* Put tau, D and E in the right places
601* Check signs
602* Place all the arrays in WORK, control their placement
603* in memory.
604*
605*
606*
607* Loop invariants
608* A(LIIP1, LIJ) points to the first element of A(I+1,J)
609* NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N )
610* A(LII:N,LIJ:N) is one step out of date.
611* proc( CURROW, CURCOL ) owns A(LII,LIJ)
612* proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ)
613*
614 inh = 1
615*
616 IF( interleave ) THEN
617*
618* H and V are interleaved to minimize memory movement
619* LDV has to be twice as large to accomodate interleaving.
620* In addition, LDV is doubled again to allow v, h and
621* toptau to be spreaad across and transposed in a
622* single communication operation with minimum memory
623* movement.
624*
625* We could reduce LDV back to 2*MAX(NPM1,NQM1)
626* by increasing the memory movement required in
627* the spread and transpose of v, h and toptau.
628* However, since the non-interleaved path already
629* provides a mear minimum memory requirement option,
630* we did not provide this additional path.
631*
632 ldv = 4*( max( npm1, nqm1 ) ) + 2
633*
634 inh = 1
635*
636 inv = inh + ldv / 2
637 invt = inh + ( anb+1 )*ldv
638*
639 inht = invt + ldv / 2
640 intmp = invt + ldv*( anb+1 )
641*
642 ELSE
643 ldv = max( npm1, nqm1 )
644*
645 inht = inh + ldv*( anb+1 )
646 inv = inht + ldv*( anb+1 )
647*
648* The code works without this +1, but only because of a
649* coincidence. Without the +1, WORK(INVT) gets trashed, but
650* WORK(INVT) is only used once and when it is used, it is
651* multiplied by WORK( INH ) which is zero. Hence, the fact
652* that WORK(INVT) is trashed has no effect.
653*
654 invt = inv + ldv*( anb+1 ) + 1
655 intmp = invt + ldv*( 2*anb )
656*
657 END IF
658*
659 IF( info.NE.0 ) THEN
660 CALL pxerbla( ictxt, 'PCHETTRD', -info )
661 work( 1 ) = cmplx( lwmin )
662 RETURN
663 END IF
664*
665*
666* The satisfies the loop invariant: trueA = A - V * HT - H * VT,
667* (where V, H, VT and HT all have BINDEX+1 rows/columns)
668* the first ANB times through the loop.
669*
670*
671*
672* Setting either ( InH and InHT ) or InV to Z_ZERO
673* is adequate except in the face of NaNs.
674*
675*
676 DO 10 i = 1, np
677 work( inh+i-1 ) = z_zero
678 work( inv+i-1 ) = z_zero
679 10 CONTINUE
680 DO 20 i = 1, nq
681 work( inht+i-1 ) = z_zero
682 20 CONTINUE
683*
684*
685*
686 topnv = z_zero
687*
688 ltlip1 = lijp1
689 ltnm1 = npm1
690 IF( mycol.GT.myrow ) THEN
691 ltlip1 = ltlip1 + 1
692 ltnm1 = ltnm1 - 1
693 END IF
694*
695*
696 DO 210 minindex = 1, n - 1, anb
697*
698*
699 maxindex = min( minindex+anb-1, n )
700 lijb = numroc( maxindex, 1, mycol, 0, npcol ) + 1
701 liib = numroc( maxindex, 1, myrow, 0, nprow ) + 1
702*
703 nqb = nq - lijb + 1
704 npb = np - liib + 1
705 inhtb = inht + lijb - 1
706 invtb = invt + lijb - 1
707 inhb = inh + liib - 1
708 invb = inv + liib - 1
709*
710*
711*
712*
713 DO 160 index = minindex, min( maxindex, n-1 )
714*
715 bindex = index - minindex
716*
717 currow = nxtrow
718 curcol = nxtcol
719*
720 nxtrow = mod( currow+1, nprow )
721 nxtcol = mod( curcol+1, npcol )
722*
723 lii = liip1
724 lij = lijp1
725 npm0 = npm1
726*
727 IF( myrow.EQ.currow ) THEN
728 npm1 = npm1 - 1
729 liip1 = liip1 + 1
730 END IF
731 IF( mycol.EQ.curcol ) THEN
732 nqm1 = nqm1 - 1
733 lijp1 = lijp1 + 1
734 ltlip1 = ltlip1 + 1
735 ltnm1 = ltnm1 - 1
736 END IF
737*
738*
739*
740*
741* V = NV, VT = NVT, H = NH, HT = NHT
742*
743*
744* Update the current column of A
745*
746*
747 IF( mycol.EQ.curcol ) THEN
748*
749 indexa = lii + ( lij-1 )*lda
750 indexinv = inv + lii - 1 + ( bindex-1 )*ldv
751 indexinh = inh + lii - 1 + ( bindex-1 )*ldv
752 ttoph = conjg( work( inht+lij-1+bindex*ldv ) )
753 ttopv = conjg( topnv )
754*
755 IF( index.GT.1 ) THEN
756 DO 30 i = 0, npm0 - 1
757* A( INDEXA+I ) = A( INDEXA+I )
758 a( indexa+i ) = a( indexa+i ) -
759 $ work( indexinv+ldv+i )*ttoph -
760 $ work( indexinh+ldv+i )*ttopv
761 30 CONTINUE
762 END IF
763*
764*
765 END IF
766*
767*
768 IF( mycol.EQ.curcol ) THEN
769*
770* Compute the householder vector
771*
772 IF( myrow.EQ.currow ) THEN
773 dtmp( 2 ) = real( a( lii+( lij-1 )*lda ) )
774 ELSE
775 dtmp( 2 ) = zero
776 END IF
777 IF( myrow.EQ.nxtrow ) THEN
778 dtmp( 3 ) = real( a( liip1+( lij-1 )*lda ) )
779 dtmp( 4 ) = aimag( a( liip1+( lij-1 )*lda ) )
780 ELSE
781 dtmp( 3 ) = zero
782 dtmp( 4 ) = zero
783 END IF
784*
785 norm = scnrm2( npm1, a( liip1+( lij-1 )*lda ), 1 )
786 dtmp( 1 ) = norm
787*
788* IF DTMP(5) = 1.0, NORM is too large and might cause
789* overflow, hence PSTREECOMB must be called. IF DTMP(5)
790* is zero on output, DTMP(1) can be trusted.
791*
792 dtmp( 5 ) = zero
793 IF( dtmp( 1 ).GE.safmax .OR. dtmp( 1 ).LT.safmin ) THEN
794 dtmp( 5 ) = one
795 dtmp( 1 ) = zero
796 END IF
797*
798 dtmp( 1 ) = dtmp( 1 )*dtmp( 1 )
799 CALL sgsum2d( ictxt, 'C', ' ', 5, 1, dtmp, 5, -1,
800 $ curcol )
801 IF( dtmp( 5 ).EQ.zero ) THEN
802 dtmp( 1 ) = sqrt( dtmp( 1 ) )
803 ELSE
804 dtmp( 1 ) = norm
805 CALL pstreecomb( ictxt, 'C', 1, dtmp, -1, mycol,
806 $ scombnrm2 )
807 END IF
808*
809 norm = dtmp( 1 )
810*
811 d( lij ) = dtmp( 2 )
812 IF( myrow.EQ.currow .AND. mycol.EQ.curcol ) THEN
813 a( lii+( lij-1 )*lda ) = cmplx( d( lij ), zero )
814 END IF
815*
816*
817 alpha = cmplx( dtmp( 3 ), dtmp( 4 ) )
818*
819 norm = sign( norm, real( alpha ) )
820*
821 IF( norm.EQ.zero ) THEN
822 toptau = zero
823 ELSE
824 beta = norm + alpha
825 toptau = beta / norm
826 oneoverbeta = 1.0e0 / beta
827*
828 CALL cscal( npm1, oneoverbeta,
829 $ a( liip1+( lij-1 )*lda ), 1 )
830 END IF
831*
832 IF( myrow.EQ.nxtrow ) THEN
833 a( liip1+( lij-1 )*lda ) = z_one
834 END IF
835*
836 tau( lij ) = toptau
837 e( lij ) = -norm
838*
839 END IF
840*
841*
842* Spread v, nh, toptau across
843*
844 DO 40 i = 0, npm1 - 1
845 work( inv+liip1-1+bindex*ldv+npm1+i ) = a( liip1+i+
846 $ ( lij-1 )*lda )
847 40 CONTINUE
848*
849 IF( mycol.EQ.curcol ) THEN
850 work( inv+liip1-1+bindex*ldv+npm1+npm1 ) = toptau
851 CALL cgebs2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
852 $ work( inv+liip1-1+bindex*ldv ),
853 $ npm1+npm1+1 )
854 ELSE
855 CALL cgebr2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
856 $ work( inv+liip1-1+bindex*ldv ),
857 $ npm1+npm1+1, myrow, curcol )
858 toptau = work( inv+liip1-1+bindex*ldv+npm1+npm1 )
859 END IF
860 DO 50 i = 0, npm1 - 1
861 work( inh+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
862 $ 1+bindex*ldv+npm1+i )
863 50 CONTINUE
864*
865 IF( index.LT.n ) THEN
866 IF( myrow.EQ.nxtrow .AND. mycol.EQ.curcol )
867 $ a( liip1+( lij-1 )*lda ) = e( lij )
868 END IF
869*
870* Transpose v, nh
871*
872*
873 IF( myrow.EQ.mycol ) THEN
874 DO 60 i = 0, npm1 + npm1
875 work( invt+lijp1-1+bindex*ldv+i ) = work( inv+liip1-1+
876 $ bindex*ldv+i )
877 60 CONTINUE
878 ELSE
879 CALL cgesd2d( ictxt, npm1+npm1, 1,
880 $ work( inv+liip1-1+bindex*ldv ), npm1+npm1,
881 $ mycol, myrow )
882 CALL cgerv2d( ictxt, nqm1+nqm1, 1,
883 $ work( invt+lijp1-1+bindex*ldv ), nqm1+nqm1,
884 $ mycol, myrow )
885 END IF
886*
887 DO 70 i = 0, nqm1 - 1
888 work( inht+lijp1-1+( bindex+1 )*ldv+i ) = work( invt+
889 $ lijp1-1+bindex*ldv+nqm1+i )
890 70 CONTINUE
891*
892*
893* Update the current block column of A
894*
895 IF( index.GT.1 ) THEN
896 DO 90 j = lijp1, lijb - 1
897 DO 80 i = 0, npm1 - 1
898*
899 a( liip1+i+( j-1 )*lda ) = a( liip1+i+( j-1 )*lda )
900 $ - work( inv+liip1-1+bindex*ldv+i )*
901 $ conjg( work( inht+j-1+bindex*ldv ) ) -
902 $ work( inh+liip1-1+bindex*ldv+i )*
903 $ conjg( work( invt+j-1+bindex*ldv ) )
904 80 CONTINUE
905 90 CONTINUE
906 END IF
907*
908*
909*
910* Compute NV = A * NHT; NVT = A * NH
911*
912* These two lines are necessary because these elements
913* are not always involved in the calls to CTRMVT
914* for two reasons:
915* 1) On diagonal processors, the call to TRMVT
916* involves only LTNM1-1 elements
917* 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1
918* and when the results are combined across all processes,
919* uninitialized values may be included.
920 work( inv+liip1-1+( bindex+1 )*ldv ) = z_zero
921 work( invt+lijp1-1+( bindex+1 )*ldv+nqm1-1 ) = z_zero
922*
923*
924 IF( myrow.EQ.mycol ) THEN
925 IF( ltnm1.GT.1 ) THEN
926 CALL ctrmvt( 'L', ltnm1-1,
927 $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
928 $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
929 $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
930 $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
931 $ ldv ), 1, work( inht+lijp1-1+( bindex+
932 $ 1 )*ldv ), 1 )
933 END IF
934 DO 100 i = 1, ltnm1
935 work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
936 $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
937 $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
938 $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
939 100 CONTINUE
940 ELSE
941 IF( ltnm1.GT.0 )
942 $ CALL ctrmvt( 'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
943 $ lda, work( invt+lijp1-1+( bindex+1 )*
944 $ ldv ), 1, work( inh+ltlip1-1+( bindex+
945 $ 1 )*ldv ), 1, work( inv+ltlip1-1+
946 $ ( bindex+1 )*ldv ), 1,
947 $ work( inht+lijp1-1+( bindex+1 )*ldv ),
948 $ 1 )
949*
950 END IF
951*
952*
953* We take advantage of the fact that:
954* A * sum( B ) = sum ( A * B ) for matrices A,B
955*
956* trueA = A + V * HT + H * VT
957* hence: (trueA)v = Av' + V * HT * v + H * VT * v
958* VT * v = sum_p_in_NPROW ( VTp * v )
959* H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v )
960*
961* v = v + V * HT * h + H * VT * h
962*
963*
964*
965* tmp = HT * nh1
966 DO 110 i = 1, 2*( bindex+1 )
967 work( intmp-1+i ) = 0
968 110 CONTINUE
969*
970 IF( balanced ) THEN
971 npset = nprow
972 mysetnum = myrow
973 rowsperproc = iceil( nqb, npset )
974 myfirstrow = min( nqb+1, 1+rowsperproc*mysetnum )
975 numrows = min( rowsperproc, nqb-myfirstrow+1 )
976*
977*
978* tmp = HT * v
979*
980 CALL cgemv( 'C', numrows, bindex+1, z_one,
981 $ work( inhtb+myfirstrow-1 ), ldv,
982 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
983 $ 1, z_zero, work( intmp ), 1 )
984* tmp2 = VT * v
985 CALL cgemv( 'C', numrows, bindex+1, z_one,
986 $ work( invtb+myfirstrow-1 ), ldv,
987 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
988 $ 1, z_zero, work( intmp+bindex+1 ), 1 )
989*
990*
991 CALL cgsum2d( ictxt, 'C', ' ', 2*( bindex+1 ), 1,
992 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
993 ELSE
994* tmp = HT * v
995*
996 CALL cgemv( 'C', nqb, bindex+1, z_one, work( inhtb ),
997 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
998 $ z_zero, work( intmp ), 1 )
999* tmp2 = VT * v
1000 CALL cgemv( 'C', nqb, bindex+1, z_one, work( invtb ),
1001 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
1002 $ z_zero, work( intmp+bindex+1 ), 1 )
1003*
1004 END IF
1005*
1006*
1007*
1008 IF( balanced ) THEN
1009 mysetnum = mycol
1010*
1011 rowsperproc = iceil( npb, npset )
1012 myfirstrow = min( npb+1, 1+rowsperproc*mysetnum )
1013 numrows = min( rowsperproc, npb-myfirstrow+1 )
1014*
1015 CALL cgsum2d( ictxt, 'R', ' ', 2*( bindex+1 ), 1,
1016 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1017*
1018*
1019* v = v + V * tmp
1020 IF( index.GT.1. ) THEN
1021 CALL cgemv( 'N', numrows, bindex+1, z_negone,
1022 $ work( invb+myfirstrow-1 ), ldv,
1023 $ work( intmp ), 1, z_one,
1024 $ work( invb+myfirstrow-1+( bindex+1 )*
1025 $ ldv ), 1 )
1026*
1027* v = v + H * tmp2
1028 CALL cgemv( 'N', numrows, bindex+1, z_negone,
1029 $ work( inhb+myfirstrow-1 ), ldv,
1030 $ work( intmp+bindex+1 ), 1, z_one,
1031 $ work( invb+myfirstrow-1+( bindex+1 )*
1032 $ ldv ), 1 )
1033 END IF
1034*
1035 ELSE
1036* v = v + V * tmp
1037 CALL cgemv( 'N', npb, bindex+1, z_negone, work( invb ),
1038 $ ldv, work( intmp ), 1, z_one,
1039 $ work( invb+( bindex+1 )*ldv ), 1 )
1040*
1041*
1042* v = v + H * tmp2
1043 CALL cgemv( 'N', npb, bindex+1, z_negone, work( inhb ),
1044 $ ldv, work( intmp+bindex+1 ), 1, z_one,
1045 $ work( invb+( bindex+1 )*ldv ), 1 )
1046*
1047 END IF
1048*
1049*
1050* Transpose NV and add it back into NVT
1051*
1052 IF( myrow.EQ.mycol ) THEN
1053 DO 120 i = 0, nqm1 - 1
1054 work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1055 $ i )
1056 120 CONTINUE
1057 ELSE
1058 CALL cgesd2d( ictxt, nqm1, 1,
1059 $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1060 $ nqm1, mycol, myrow )
1061 CALL cgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1062 $ myrow )
1063*
1064 END IF
1065 DO 130 i = 0, npm1 - 1
1066 work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1067 $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1068 130 CONTINUE
1069*
1070* Sum-to-one NV rowwise (within a row)
1071*
1072 CALL cgsum2d( ictxt, 'R', ' ', npm1, 1,
1073 $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1074 $ myrow, nxtcol )
1075*
1076*
1077* Dot product c = NV * NH
1078* Sum-to-all c within next processor column
1079*
1080*
1081 IF( mycol.EQ.nxtcol ) THEN
1082 cc( 1 ) = z_zero
1083 DO 140 i = 0, npm1 - 1
1084 cc( 1 ) = cc( 1 ) + conjg( work( inv+liip1-1+( bindex+
1085 $ 1 )*ldv+i ) )*work( inh+liip1-1+
1086 $ ( bindex+1 )*ldv+i )
1087 140 CONTINUE
1088 IF( myrow.EQ.nxtrow ) THEN
1089 cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1090 cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1091 ELSE
1092 cc( 2 ) = z_zero
1093 cc( 3 ) = z_zero
1094 END IF
1095 CALL cgsum2d( ictxt, 'C', ' ', 3, 1, cc, 3, -1, nxtcol )
1096*
1097 topv = cc( 2 )
1098 c = cc( 1 )
1099 toph = cc( 3 )
1100*
1101 topnv = toptau*( topv-c*conjg( toptau ) / 2*toph )
1102*
1103*
1104* Compute V = Tau * (V - C * Tau' / 2 * H )
1105*
1106*
1107 DO 150 i = 0, npm1 - 1
1108 work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1109 $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*
1110 $ conjg( toptau ) / 2*work( inh+liip1-1+( bindex+1 )*
1111 $ ldv+i ) )
1112 150 CONTINUE
1113*
1114 END IF
1115*
1116*
1117 160 CONTINUE
1118*
1119*
1120* Perform the rank2k update
1121*
1122 IF( maxindex.LT.n ) THEN
1123*
1124 DO 170 i = 0, npm1 - 1
1125 work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1126 170 CONTINUE
1127*
1128*
1129*
1130 IF( .NOT.twogemms ) THEN
1131 IF( interleave ) THEN
1132 ldzg = ldv / 2
1133 ELSE
1134 CALL clamov( 'A', ltnm1, anb, work( inht+lijp1-1 ),
1135 $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1136*
1137 CALL clamov( 'A', ltnm1, anb, work( inv+ltlip1-1 ),
1138 $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1139 ldzg = ldv
1140 END IF
1141 nbzg = anb*2
1142 ELSE
1143 ldzg = ldv
1144 nbzg = anb
1145 END IF
1146*
1147*
1148 DO 180 pbmin = 1, ltnm1, pnb
1149*
1150 pbsize = min( pnb, ltnm1-pbmin+1 )
1151 pbmax = min( ltnm1, pbmin+pnb-1 )
1152 CALL cgemm( 'N', 'C', pbsize, pbmax, nbzg, z_negone,
1153 $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1154 $ work( invt+lijp1-1 ), ldzg, z_one,
1155 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1156 IF( twogemms ) THEN
1157 CALL cgemm( 'N', 'C', pbsize, pbmax, anb, z_negone,
1158 $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1159 $ work( inht+lijp1-1 ), ldzg, z_one,
1160 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1161 END IF
1162 180 CONTINUE
1163*
1164*
1165*
1166 DO 190 i = 0, npm1 - 1
1167 work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1168 work( inh+liip1-1+i ) = work( intmp+i )
1169 190 CONTINUE
1170 DO 200 i = 0, nqm1 - 1
1171 work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1172 200 CONTINUE
1173*
1174*
1175 END IF
1176*
1177* End of the update A code
1178*
1179 210 CONTINUE
1180*
1181 IF( mycol.EQ.nxtcol ) THEN
1182 IF( myrow.EQ.nxtrow ) THEN
1183*
1184 d( nq ) = real( a( np+( nq-1 )*lda ) )
1185 a( np+( nq-1 )*lda ) = d( nq )
1186*
1187 CALL sgebs2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1 )
1188 ELSE
1189 CALL sgebr2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1, nxtrow,
1190 $ nxtcol )
1191 END IF
1192 END IF
1193*
1194*
1195*
1196*
1197 work( 1 ) = cmplx( lwmin )
1198 RETURN
1199*
1200* End of PCHETTRD
1201*
1202*
1203 END
float cmplx[2]
Definition pblas.h:136
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine ctrmvt(uplo, n, t, ldt, x, incx, y, incy, w, incw, z, incz)
Definition ctrmvt.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchettrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pchettrd.f:3
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine scombnrm2(x, y)
Definition pstreecomb.f:306
subroutine pstreecomb(ictxt, scope, n, mine, rdest0, cdest0, subptr)
Definition pstreecomb.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2