SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzhettrd.f
Go to the documentation of this file.
1 SUBROUTINE pzhettrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
2 $ LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 2.0.2) --
5* Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver
6* May 1 2012
7*
8* .. Scalar Arguments ..
9 CHARACTER UPLO
10 INTEGER IA, INFO, JA, LWORK, N
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 DOUBLE PRECISION D( * ), E( * )
15 COMPLEX*16 A( * ), TAU( * ), WORK( * )
16* ..
17*
18* Purpose
19*
20* =======
21*
22* PZHETTRD reduces a complex Hermitian matrix sub( A ) to Hermitian
23* tridiagonal form T by an unitary similarity transformation:
24* Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
25*
26* Notes
27* =====
28*
29* Each global data object is described by an associated description
30* vector. This vector stores the information required to establish
31* the mapping between an object element and its corresponding
32* process and memory location.
33*
34* Let A be a generic term for any 2D block cyclicly distributed
35* array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- -----------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle,
45* indicating the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to
54* distribute the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to
56* distribute the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the
58* first row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCp(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCp( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCq( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes
72* of its process row.
73* The values of LOCp() and LOCq() may be determined via a call to
74* the ScaLAPACK tool function, NUMROC:
75* LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81* Arguments
82* =========
83*
84* UPLO (global input) CHARACTER
85* Specifies whether the upper or lower triangular part of the
86* Hermitian matrix sub( A ) is stored:
87* = 'U': Upper triangular
88* = 'L': Lower triangular
89*
90* N (global input) INTEGER
91* The number of rows and columns to be operated on, i.e. the
92* order of the distributed submatrix sub( A ). N >= 0.
93*
94* A (local input/local output) COMPLEX*16 pointer into the
95* local memory to an array of dimension (LLD_A,LOCq(JA+N-1)).
96* On entry, this array contains the local pieces of the
97* Hermitian distributed matrix sub( A ). If UPLO = 'U', the
98* leading N-by-N upper triangular part of sub( A ) contains
99* the upper triangular part of the matrix, and its strictly
100* lower triangular part is not referenced. If UPLO = 'L', the
101* leading N-by-N lower triangular part of sub( A ) contains the
102* lower triangular part of the matrix, and its strictly upper
103* triangular part is not referenced. On exit, if UPLO = 'U',
104* the diagonal and first superdiagonal of sub( A ) are over-
105* written by the corresponding elements of the tridiagonal
106* matrix T, and the elements above the first superdiagonal,
107* with the array TAU, represent the unitary matrix Q as a
108* product of elementary reflectors; if UPLO = 'L', the diagonal
109* and first subdiagonal of sub( A ) are overwritten by the
110* corresponding elements of the tridiagonal matrix T, and the
111* elements below the first subdiagonal, with the array TAU,
112* represent the unitary matrix Q as a product of elementary
113* reflectors. See Further Details.
114*
115* IA (global input) INTEGER
116* The row index in the global array A indicating the first
117* row of sub( A ).
118*
119* JA (global input) INTEGER
120* The column index in the global array A indicating the
121* first column of sub( A ).
122*
123* DESCA (global and local input) INTEGER array of dimension DLEN_.
124* The array descriptor for the distributed matrix A.
125*
126* D (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1)
127* The diagonal elements of the tridiagonal matrix T:
128* D(i) = A(i,i). D is tied to the distributed matrix A.
129*
130* E (local output) DOUBLE PRECISION array, dim LOCq(JA+N-1)
131* if UPLO = 'U', LOCq(JA+N-2) otherwise. The off-diagonal
132* elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
133* UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
134* distributed matrix A.
135*
136* TAU (local output) COMPLEX*16, array, dimension
137* LOCq(JA+N-1). This array contains the scalar factors TAU of
138* the elementary reflectors. TAU is tied to the distributed
139* matrix A.
140*
141* WORK (local workspace) COMPLEX*16 array, dimension (LWORK)
142* On exit, WORK( 1 ) returns the minimal and optimal workspace
143*
144* LWORK (local input) INTEGER
145* The dimension of the array WORK.
146* LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + NPS
147* Where:
148* NPS = MAX( NUMROC( N, 1, 0, 0, NPROW ), 2*ANB )
149* ANB = PJLAENV( DESCA( CTXT_ ), 3, 'PZHETTRD', 'L', 0, 0,
150* 0, 0 )
151*
152* NUMROC is a ScaLAPACK tool function;
153* PJLAENV is a ScaLAPACK envionmental inquiry function
154* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
155* the subroutine BLACS_GRIDINFO.
156*
157* INFO (global output) INTEGER
158* = 0: successful exit
159* < 0: If the i-th argument is an array and the j-entry had
160* an illegal value, then INFO = -(i*100+j), if the i-th
161* argument is a scalar and had an illegal value, then
162* INFO = -i.
163*
164* Further Details
165* ===============
166*
167* If UPLO = 'U', the matrix Q is represented as a product of
168* elementary reflectors
169*
170* Q = H(n-1) . . . H(2) H(1).
171*
172* Each H(i) has the form
173*
174* H(i) = I - tau * v * v'
175*
176* where tau is a complex scalar, and v is a complex vector with
177* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
178* A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
179*
180* If UPLO = 'L', the matrix Q is represented as a product of
181* elementary reflectors
182*
183* Q = H(1) H(2) . . . H(n-1).
184*
185* Each H(i) has the form
186*
187* H(i) = I - tau * v * v'
188*
189* where tau is a complex scalar, and v is a complex vector with
190* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
191* A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
192*
193* The contents of sub( A ) on exit are illustrated by the following
194* examples with n = 5:
195*
196* if UPLO = 'U': if UPLO = 'L':
197*
198* ( d e v2 v3 v4 ) ( d )
199* ( d e v3 v4 ) ( e d )
200* ( d e v4 ) ( v1 e d )
201* ( d e ) ( v1 v2 e d )
202* ( d ) ( v1 v2 v3 e d )
203*
204* where d and e denote diagonal and off-diagonal elements of T, and
205* vi denotes an element of the vector defining H(i).
206*
207* Data storage requirements
208* =========================
209*
210* PZHETTRD is not intended to be called directly. All users are
211* encourage to call PZHETRD which will then call PZHETTRD if
212* appropriate. A must be in cyclic format (i.e. MB = NB = 1),
213* the process grid must be square ( i.e. NPROW = NPCOL ) and
214* only lower triangular storage is supported.
215*
216* Local variables
217* ===============
218*
219* PZHETTRD uses five local arrays:
220* WORK ( InV ) dimension ( NP, ANB+1): array V
221* WORK ( InH ) dimension ( NP, ANB+1): array H
222* WORK ( InVT ) dimension ( NQ, ANB+1): transpose of the array V
223* WORK ( InHT ) dimension ( NQ, ANB+1): transpose of the array H
224* WORK ( InVTT ) dimension ( NQ, 1): transpose of the array VT
225*
226* Arrays V and H are replicated across all processor columns.
227* Arrays V^T and H^T are replicated across all processor rows.
228*
229* WORK ( InVT ), or V^T, is stored as a tall skinny
230* array ( NQ x ANB-1 ) for efficiency. Since only the lower
231* triangular portion of A is updated, Av is computed as:
232* tril(A) * v + v^T * tril(A,-1). This is performed as
233* two local triangular matrix-vector multiplications (both in
234* MVR2) followed by a transpose and a sum across the columns.
235* In the local computation, WORK( InVT ) is used to compute
236* tril(A) * v and WORK( InV ) is used to compute
237* v^T * tril(A,-1)
238*
239* The following variables are global indices into A:
240* INDEX: The current global row and column number.
241* MAXINDEX: The global row and column for the first row and
242* column in the trailing block of A.
243* LIIB, LIJB: The first row, column in
244*
245* The following variables point into the arrays A, V, H, V^T, H^T:
246* BINDEX =INDEX-MININDEX: The column index in V, H, V^T, H^T.
247* LII: local index I: The local row number for row INDEX
248* LIJ: local index J: The local column number for column INDEX
249* LIIP1: local index I+1: The local row number for row INDEX+1
250* LIJP1: local index J+1: The local col number for col INDEX+1
251* LTLI: lower triangular local index I: The local row for the
252* upper left entry in tril( A(INDEX, INDEX) )
253* LTLIP1: lower triangular local index I+1: The local row for the
254* upper left entry in tril( A(INDEX+1, INDEX+1) )
255*
256* Details: The distinction between LII and LTLI (and between
257* LIIP1 and LTLIP1) is subtle. Within the current processor
258* column (i.e. MYCOL .eq. CURCOL) they are the same. However,
259* on some processors, A( LII, LIJ ) points to an element
260* above the diagonal, on these processors, LTLI = LII+1.
261*
262* The following variables give the number of rows and/or columns
263* in various matrices:
264* NP: The number of local rows in A( 1:N, 1:N )
265* NQ: The number of local columns in A( 1:N, 1:N )
266* NPM0: The number of local rows in A( INDEX:N, INDEX:N )
267* NQM0: The number of local columns in A( INDEX:N, INDEX:N )
268* NPM1: The number of local rows in A( INDEX+1:N, INDEX:N )
269* NQM1: The number of local columns in A( INDEX+1:N, INDEX:N )
270* LTNM0: The number of local rows & columns in
271* tril( A( INDEX:N, INDEX:N ) )
272* LTNM1: The number of local rows & columns in
273* tril( A( INDEX+1:N, INDEX+1:N ) )
274* NOTE: LTNM0 == LTNM1 on all processors except the diagonal
275* processors, i.e. those where MYCOL == MYROW.
276*
277* Invariants:
278* NP = NPM0 + LII - 1
279* NQ = NQM0 + LIJ - 1
280* NP = NPM1 + LIIP1 - 1
281* NQ = NQM1 + LIJP1 - 1
282* NP = LTLI + LTNM0 - 1
283* NP = LTLIP1 + LTNM1 - 1
284*
285* Temporary variables. The following variables are used within
286* a few lines after they are set and do hold state from one loop
287* iteration to the next:
288*
289* The matrix A:
290* The matrix A does not hold the same values that it would
291* in an unblocked code nor the values that it would hold in
292* in a blocked code.
293*
294* The value of A is confusing. It is easiest to state the
295* difference between trueA and A at the point that MVR2 is called,
296* so we will start there.
297*
298* Let trueA be the value that A would
299* have at a given point in an unblocked code and A
300* be the value that A has in this code at the same point.
301*
302* At the time of the call to MVR2,
303* trueA = A + V' * H + H' * V
304* where H = H( MAXINDEX:N, 1:BINDEX ) and
305* V = V( MAXINDEX:N, 1:BINDEX ).
306*
307* At the bottom of the inner loop,
308* trueA = A + V' * H + H' * V + v' * h + h' * v
309* where H = H( MAXINDEX:N, 1:BINDEX ) and
310* V = V( MAXINDEX:N, 1:BINDEX ) and
311* v = V( liip1:N, BINDEX+1 ) and
312* h = H( liip1:N, BINDEX+1 )
313*
314* At the top of the loop, BINDEX gets incremented, hence:
315* trueA = A + V' * H + H' * V + v' * h + h' * v
316* where H = H( MAXINDEX:N, 1:BINDEX-1 ) and
317* V = V( MAXINDEX:N, 1:BINDEX-1 ) and
318* v = V( liip1:N, BINDEX ) and
319* h = H( liip1:N, BINDEX )
320*
321*
322* A gets updated at the bottom of the outer loop
323* After this update, trueA = A + v' * h + h' * v
324* where v = V( liip1:N, BINDEX ) and
325* h = H( liip1:N, BINDEX ) and BINDEX = 0
326* Indeed, the previous loop invariant as stated above for the
327* top of the loop still holds, but with BINDEX = 0, H and V
328* are null matrices.
329*
330* After the current column of A is updated,
331* trueA( INDEX, INDEX:N ) = A( INDEX, INDEX:N )
332* the rest of A is untouched.
333*
334* After the current block column of A is updated,
335* trueA = A + V' * H + H' * V
336* where H = H( MAXINDEX:N, 1:BINDEX ) and
337* V = V( MAXINDEX:N, 1:BINDEX )
338*
339* This brings us back to the point at which mvr2 is called.
340*
341*
342* Details of the parallelization:
343*
344* We delay spreading v across to all processor columns (which
345* would naturally happen at the bottom of the loop) in order to
346* combine the spread of v( : , i-1 ) with the spread of h( : , i )
347*
348* In order to compute h( :, i ), we must update A( :, i )
349* which means that the processor column owning A( :, i ) must
350* have: c, tau, v( i, i ) and h( i, i ).
351*
352* The traditional
353* way of computing v (and the one used in pzlatrd.f and
354* zlatrd.f) is:
355* v = tau * v
356* c = v' * h
357* alpha = - tau * c / 2
358* v = v + alpha * h
359* However, the traditional way of computing v requires that tau
360* be broadcast to all processors in the current column (to compute
361* v = tau * v) and then a sum-to-all is required (to
362* compute v' * h ). We use the following formula instead:
363* c = v' * h
364* v = tau * ( v - c * tau' * h / 2 )
365* The above formula allows tau to be spread down in the
366* same call to DGSUM2D which performs the sum-to-all of c.
367*
368* The computation of v, which could be performed in any processor
369* column (or other procesor subsets), is performed in the
370* processor column that owns A( :, i+1 ) so that A( :, i+1 )
371* can be updated prior to spreading v across.
372*
373* We keep the block column of A up-to-date to minimize the
374* work required in updating the current column of A. Updating
375* the block column of A is reasonably load balanced whereas
376* updating the current column of A is not (only the current
377* processor column is involved).
378*
379* In the following overview of the steps performed, M in the
380* margin indicates message traffic and C indicates O(n^2 nb/sqrt(p))
381* or more flops per processor.
382*
383* Inner loop:
384* A( index:n, index ) -= ( v * ht(bindex) + h * vt( bindex) )
385*M h = house( A(index:n, index) )
386*M Spread v, h across
387*M vt = v^T; ht = h^T
388* A( index+1:n, index+1:maxindex ) -=
389* ( v * ht(index+1:maxindex) + h *vt(index+1:maxindex) )
390*C v = tril(A) * h; vt = ht * tril(A,-1)
391*MorC v = v - H*V*h - V*H*h
392*M v = v + vt^T
393*M c = v' * h
394* v = tau * ( v - c * tau' * h / 2 )
395*C A = A - H*V - V*H
396*
397*
398*
399* =================================================================
400*
401* .. Parameters ..
402 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
403 $ mb_, nb_, rsrc_, csrc_, lld_
404 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
405 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
406 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
407 DOUBLE PRECISION ONE
408 parameter( one = 1.0d0 )
409 COMPLEX*16 Z_ONE, Z_NEGONE, Z_ZERO
410 parameter( z_one = 1.0d0, z_negone = -1.0d0,
411 $ z_zero = 0.0d0 )
412 DOUBLE PRECISION ZERO
413 parameter( zero = 0.0d+0 )
414* ..
415*
416*
417* .. Local Scalars ..
418*
419*
420 LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
421 INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
422 $ indexa, indexinh, indexinv, inh, inhb, inht,
423 $ inhtb, intmp, inv, invb, invt, invtb, j, lda,
424 $ ldv, ldzg, lii, liib, liip1, lij, lijb, lijp1,
425 $ ltlip1, ltnm1, lwmin, maxindex, minindex,
426 $ mycol, myfirstrow, myrow, mysetnum, nbzg, np,
427 $ npb, npcol, npm0, npm1, nprow, nps, npset, nq,
428 $ nqb, nqm1, numrows, nxtcol, nxtrow, pbmax,
429 $ pbmin, pbsize, pnb, rowsperproc
430 DOUBLE PRECISION NORM, SAFMAX, SAFMIN
431 COMPLEX*16 ALPHA, BETA, C, CONJTOPH, CONJTOPV,
432 $ oneoverbeta, toph, topnv, toptau, topv
433* ..
434* .. Local Arrays ..
435*
436*
437*
438*
439 INTEGER IDUM1( 1 ), IDUM2( 1 )
440 DOUBLE PRECISION DTMP( 5 )
441 COMPLEX*16 CC( 3 )
442* ..
443* .. External Subroutines ..
444 EXTERNAL blacs_gridinfo, chk1mat, dcombnrm2, dgebr2d,
445 $ dgebs2d, dgsum2d, pchk1mat, pdtreecomb,
446 $ pxerbla, zgebr2d, zgebs2d, zgemm, zgemv,
447 $ zgerv2d, zgesd2d, zgsum2d, zlamov, zscal,
448 $ ztrmvt
449* ..
450* .. External Functions ..
451*
452 LOGICAL LSAME
453 INTEGER ICEIL, NUMROC, PJLAENV
454 DOUBLE PRECISION DZNRM2, PDLAMCH
455 EXTERNAL lsame, iceil, numroc, pjlaenv, dznrm2, pdlamch
456* ..
457* .. Intrinsic Functions ..
458 INTRINSIC dble, dcmplx, dconjg, dimag, ichar, max, min,
459 $ mod, sign, sqrt
460* ..
461*
462*
463* .. Executable Statements ..
464* This is just to keep ftnchek and toolpack/1 happy
465 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
466 $ rsrc_.LT.0 )RETURN
467*
468*
469*
470* Further details
471* ===============
472*
473* At the top of the loop, v and nh have been computed but not
474* spread across. Hence, A is out-of-date even after the
475* rank 2k update. Furthermore, we compute the next v before
476* nh is spread across.
477*
478* I claim that if we used a sum-to-all on NV, by summing CC within
479* each column, that we could compute NV locally and could avoid
480* spreading V across. Bruce claims that sum-to-all can be made
481* to cost no more than sum-to-one on the Paragon. If that is
482* true, this would be a win. But,
483* the BLACS sum-to-all is just a sum-to-one followed by a broadcast,
484* and hence the present scheme is better for now.
485*
486* Get grid parameters
487*
488 ictxt = desca( ctxt_ )
489 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
490*
491 safmax = sqrt( pdlamch( ictxt, 'O' ) ) / n
492 safmin = sqrt( pdlamch( ictxt, 'S' ) )
493*
494* Test the input parameters
495*
496 info = 0
497 IF( nprow.EQ.-1 ) THEN
498 info = -( 600+ctxt_ )
499 ELSE
500*
501* Here we set execution options for PZHETTRD
502*
503 pnb = pjlaenv( ictxt, 2, 'PZHETTRD', 'L', 0, 0, 0, 0 )
504 anb = pjlaenv( ictxt, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 )
505*
506 interleave = ( pjlaenv( ictxt, 4, 'PZHETTRD', 'L', 1, 0, 0,
507 $ 0 ).EQ.1 )
508 twogemms = ( pjlaenv( ictxt, 4, 'PZHETTRD', 'L', 2, 0, 0,
509 $ 0 ).EQ.1 )
510 balanced = ( pjlaenv( ictxt, 4, 'PZHETTRD', 'L', 3, 0, 0,
511 $ 0 ).EQ.1 )
512*
513 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
514*
515*
516 upper = lsame( uplo, 'U' )
517 IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
518 $ info = 600 + nb_
519 IF( info.EQ.0 ) THEN
520*
521*
522* Here is the arithmetic:
523* Let maxnpq = max( np, nq, 2 * ANB )
524* LDV = 4 * max( np, nq ) + 2
525* LWMIN = 2 * ( ANB + 1 ) * LDV + MAX( np, 2 * ANB )
526* = 2 * ( ANB + 1 ) * ( 4 * NPS + 2 ) + NPS
527*
528* This overestimates memory requirements when ANB > NP/2
529* Memory requirements are lower when interleave = .false.
530* Hence, we could have two sets of memory requirements,
531* one for interleave and one for
532*
533*
534 nps = max( numroc( n, 1, 0, 0, nprow ), 2*anb )
535 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
536*
537 work( 1 ) = dcmplx( lwmin )
538 IF( .NOT.lsame( uplo, 'L' ) ) THEN
539 info = -1
540 ELSE IF( ia.NE.1 ) THEN
541 info = -4
542 ELSE IF( ja.NE.1 ) THEN
543 info = -5
544 ELSE IF( nprow.NE.npcol ) THEN
545 info = -( 600+ctxt_ )
546 ELSE IF( desca( dtype_ ).NE.1 ) THEN
547 info = -( 600+dtype_ )
548 ELSE IF( desca( mb_ ).NE.1 ) THEN
549 info = -( 600+mb_ )
550 ELSE IF( desca( nb_ ).NE.1 ) THEN
551 info = -( 600+nb_ )
552 ELSE IF( desca( rsrc_ ).NE.0 ) THEN
553 info = -( 600+rsrc_ )
554 ELSE IF( desca( csrc_ ).NE.0 ) THEN
555 info = -( 600+csrc_ )
556 ELSE IF( lwork.LT.lwmin ) THEN
557 info = -11
558 END IF
559 END IF
560 IF( upper ) THEN
561 idum1( 1 ) = ichar( 'U' )
562 ELSE
563 idum1( 1 ) = ichar( 'L' )
564 END IF
565 idum2( 1 ) = 1
566*
567 CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
568 $ info )
569 END IF
570*
571 IF( info.NE.0 ) THEN
572 CALL pxerbla( ictxt, 'PZHETTRD', -info )
573 RETURN
574 END IF
575*
576* Quick return if possible
577*
578 IF( n.EQ.0 )
579 $ RETURN
580*
581*
582*
583* Reduce the lower triangle of sub( A )
584 np = numroc( n, 1, myrow, 0, nprow )
585 nq = numroc( n, 1, mycol, 0, npcol )
586*
587 nxtrow = 0
588 nxtcol = 0
589*
590 liip1 = 1
591 lijp1 = 1
592 npm1 = np
593 nqm1 = nq
594*
595 lda = desca( lld_ )
596 ictxt = desca( ctxt_ )
597*
598*
599*
600* Miscellaneous details:
601* Put tau, D and E in the right places
602* Check signs
603* Place all the arrays in WORK, control their placement
604* in memory.
605*
606*
607*
608* Loop invariants
609* A(LIIP1, LIJ) points to the first element of A(I+1,J)
610* NPM1,NQM1 = the number of rows, cols in A( LII+1:N,LIJ+1:N )
611* A(LII:N,LIJ:N) is one step out of date.
612* proc( CURROW, CURCOL ) owns A(LII,LIJ)
613* proc( NXTROW, CURCOL ) owns A(LIIP1,LIJ)
614*
615 inh = 1
616*
617 IF( interleave ) THEN
618*
619* H and V are interleaved to minimize memory movement
620* LDV has to be twice as large to accomodate interleaving.
621* In addition, LDV is doubled again to allow v, h and
622* toptau to be spreaad across and transposed in a
623* single communication operation with minimum memory
624* movement.
625*
626* We could reduce LDV back to 2*MAX(NPM1,NQM1)
627* by increasing the memory movement required in
628* the spread and transpose of v, h and toptau.
629* However, since the non-interleaved path already
630* provides a mear minimum memory requirement option,
631* we did not provide this additional path.
632*
633 ldv = 4*( max( npm1, nqm1 ) ) + 2
634*
635 inh = 1
636*
637 inv = inh + ldv / 2
638 invt = inh + ( anb+1 )*ldv
639*
640 inht = invt + ldv / 2
641 intmp = invt + ldv*( anb+1 )
642*
643 ELSE
644 ldv = max( npm1, nqm1 )
645*
646 inht = inh + ldv*( anb+1 )
647 inv = inht + ldv*( anb+1 )
648*
649* The code works without this +1, but only because of a
650* coincidence. Without the +1, WORK(INVT) gets trashed, but
651* WORK(INVT) is only used once and when it is used, it is
652* multiplied by WORK( INH ) which is zero. Hence, the fact
653* that WORK(INVT) is trashed has no effect.
654*
655 invt = inv + ldv*( anb+1 ) + 1
656 intmp = invt + ldv*( 2*anb )
657*
658 END IF
659*
660 IF( info.NE.0 ) THEN
661 CALL pxerbla( ictxt, 'PZHETTRD', -info )
662 work( 1 ) = dcmplx( lwmin )
663 RETURN
664 END IF
665*
666*
667* The satisfies the loop invariant: trueA = A - V * HT - H * VT,
668* (where V, H, VT and HT all have BINDEX+1 rows/columns)
669* the first ANB times through the loop.
670*
671*
672*
673* Setting either ( InH and InHT ) or InV to Z_ZERO
674* is adequate except in the face of NaNs.
675*
676*
677 DO 10 i = 1, np
678 work( inh+i-1 ) = z_zero
679 work( inv+i-1 ) = z_zero
680 10 CONTINUE
681 DO 20 i = 1, nq
682 work( inht+i-1 ) = z_zero
683 20 CONTINUE
684*
685*
686*
687 topnv = z_zero
688*
689 ltlip1 = lijp1
690 ltnm1 = npm1
691 IF( mycol.GT.myrow ) THEN
692 ltlip1 = ltlip1 + 1
693 ltnm1 = ltnm1 - 1
694 END IF
695*
696*
697 DO 210 minindex = 1, n - 1, anb
698*
699*
700 maxindex = min( minindex+anb-1, n )
701 lijb = numroc( maxindex, 1, mycol, 0, npcol ) + 1
702 liib = numroc( maxindex, 1, myrow, 0, nprow ) + 1
703*
704 nqb = nq - lijb + 1
705 npb = np - liib + 1
706 inhtb = inht + lijb - 1
707 invtb = invt + lijb - 1
708 inhb = inh + liib - 1
709 invb = inv + liib - 1
710*
711*
712*
713*
714 DO 160 index = minindex, min( maxindex, n-1 )
715*
716 bindex = index - minindex
717*
718 currow = nxtrow
719 curcol = nxtcol
720*
721 nxtrow = mod( currow+1, nprow )
722 nxtcol = mod( curcol+1, npcol )
723*
724 lii = liip1
725 lij = lijp1
726 npm0 = npm1
727*
728 IF( myrow.EQ.currow ) THEN
729 npm1 = npm1 - 1
730 liip1 = liip1 + 1
731 END IF
732 IF( mycol.EQ.curcol ) THEN
733 nqm1 = nqm1 - 1
734 lijp1 = lijp1 + 1
735 ltlip1 = ltlip1 + 1
736 ltnm1 = ltnm1 - 1
737 END IF
738*
739*
740*
741*
742* V = NV, VT = NVT, H = NH, HT = NHT
743*
744*
745* Update the current column of A
746*
747*
748 IF( mycol.EQ.curcol ) THEN
749*
750 indexa = lii + ( lij-1 )*lda
751 indexinv = inv + lii - 1 + ( bindex-1 )*ldv
752 indexinh = inh + lii - 1 + ( bindex-1 )*ldv
753 conjtoph = dconjg( work( inht+lij-1+bindex*ldv ) )
754 conjtopv = dconjg( topnv )
755*
756 IF( index.GT.1 ) THEN
757 DO 30 i = 0, npm0 - 1
758* A( INDEXA+I ) = A( INDEXA+I )
759 a( indexa+i ) = a( indexa+i ) -
760 $ work( indexinv+ldv+i )*conjtoph -
761 $ work( indexinh+ldv+i )*conjtopv
762 30 CONTINUE
763 END IF
764*
765*
766 END IF
767*
768*
769 IF( mycol.EQ.curcol ) THEN
770*
771* Compute the householder vector
772*
773 IF( myrow.EQ.currow ) THEN
774 dtmp( 2 ) = dble( a( lii+( lij-1 )*lda ) )
775 ELSE
776 dtmp( 2 ) = zero
777 END IF
778 IF( myrow.EQ.nxtrow ) THEN
779 dtmp( 3 ) = dble( a( liip1+( lij-1 )*lda ) )
780 dtmp( 4 ) = dimag( a( liip1+( lij-1 )*lda ) )
781 ELSE
782 dtmp( 3 ) = zero
783 dtmp( 4 ) = zero
784 END IF
785*
786 norm = dznrm2( npm1, a( liip1+( lij-1 )*lda ), 1 )
787 dtmp( 1 ) = norm
788*
789* IF DTMP(5) = 1.0, NORM is too large and might cause
790* overflow, hence PDTREECOMB must be called. IF DTMP(5)
791* is zero on output, DTMP(1) can be trusted.
792*
793 dtmp( 5 ) = zero
794 IF( dtmp( 1 ).GE.safmax .OR. dtmp( 1 ).LT.safmin ) THEN
795 dtmp( 5 ) = one
796 dtmp( 1 ) = zero
797 END IF
798*
799 dtmp( 1 ) = dtmp( 1 )*dtmp( 1 )
800 CALL dgsum2d( ictxt, 'C', ' ', 5, 1, dtmp, 5, -1,
801 $ curcol )
802 IF( dtmp( 5 ).EQ.zero ) THEN
803 dtmp( 1 ) = sqrt( dtmp( 1 ) )
804 ELSE
805 dtmp( 1 ) = norm
806 CALL pdtreecomb( ictxt, 'C', 1, dtmp, -1, mycol,
807 $ dcombnrm2 )
808 END IF
809*
810 norm = dtmp( 1 )
811*
812 d( lij ) = dtmp( 2 )
813 IF( myrow.EQ.currow .AND. mycol.EQ.curcol ) THEN
814 a( lii+( lij-1 )*lda ) = dcmplx( d( lij ), zero )
815 END IF
816*
817*
818 alpha = dcmplx( dtmp( 3 ), dtmp( 4 ) )
819*
820 norm = sign( norm, dble( alpha ) )
821*
822 IF( norm.EQ.zero ) THEN
823 toptau = zero
824 ELSE
825 beta = norm + alpha
826 toptau = beta / norm
827 oneoverbeta = 1.0d0 / beta
828*
829 CALL zscal( npm1, oneoverbeta,
830 $ a( liip1+( lij-1 )*lda ), 1 )
831 END IF
832*
833 IF( myrow.EQ.nxtrow ) THEN
834 a( liip1+( lij-1 )*lda ) = z_one
835 END IF
836*
837 tau( lij ) = toptau
838 e( lij ) = -norm
839*
840 END IF
841*
842*
843* Spread v, nh, toptau across
844*
845 DO 40 i = 0, npm1 - 1
846 work( inv+liip1-1+bindex*ldv+npm1+i ) = a( liip1+i+
847 $ ( lij-1 )*lda )
848 40 CONTINUE
849*
850 IF( mycol.EQ.curcol ) THEN
851 work( inv+liip1-1+bindex*ldv+npm1+npm1 ) = toptau
852 CALL zgebs2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
853 $ work( inv+liip1-1+bindex*ldv ),
854 $ npm1+npm1+1 )
855 ELSE
856 CALL zgebr2d( ictxt, 'R', ' ', npm1+npm1+1, 1,
857 $ work( inv+liip1-1+bindex*ldv ),
858 $ npm1+npm1+1, myrow, curcol )
859 toptau = work( inv+liip1-1+bindex*ldv+npm1+npm1 )
860 END IF
861 DO 50 i = 0, npm1 - 1
862 work( inh+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
863 $ 1+bindex*ldv+npm1+i )
864 50 CONTINUE
865*
866 IF( index.LT.n ) THEN
867 IF( myrow.EQ.nxtrow .AND. mycol.EQ.curcol )
868 $ a( liip1+( lij-1 )*lda ) = e( lij )
869 END IF
870*
871* Transpose v, nh
872*
873*
874 IF( myrow.EQ.mycol ) THEN
875 DO 60 i = 0, npm1 + npm1
876 work( invt+lijp1-1+bindex*ldv+i ) = work( inv+liip1-1+
877 $ bindex*ldv+i )
878 60 CONTINUE
879 ELSE
880 CALL zgesd2d( ictxt, npm1+npm1, 1,
881 $ work( inv+liip1-1+bindex*ldv ), npm1+npm1,
882 $ mycol, myrow )
883 CALL zgerv2d( ictxt, nqm1+nqm1, 1,
884 $ work( invt+lijp1-1+bindex*ldv ), nqm1+nqm1,
885 $ mycol, myrow )
886 END IF
887*
888 DO 70 i = 0, nqm1 - 1
889 work( inht+lijp1-1+( bindex+1 )*ldv+i ) = work( invt+
890 $ lijp1-1+bindex*ldv+nqm1+i )
891 70 CONTINUE
892*
893*
894* Update the current block column of A
895*
896 IF( index.GT.1 ) THEN
897 DO 90 j = lijp1, lijb - 1
898 DO 80 i = 0, npm1 - 1
899*
900 a( liip1+i+( j-1 )*lda ) = a( liip1+i+( j-1 )*lda )
901 $ - work( inv+liip1-1+bindex*ldv+i )*
902 $ dconjg( work( inht+j-1+bindex*ldv ) ) -
903 $ work( inh+liip1-1+bindex*ldv+i )*
904 $ dconjg( work( invt+j-1+bindex*ldv ) )
905 80 CONTINUE
906 90 CONTINUE
907 END IF
908*
909*
910*
911* Compute NV = A * NHT; NVT = A * NH
912*
913* These two lines are necessary because these elements
914* are not always involved in the calls to ZTRMVT
915* for two reasons:
916* 1) On diagonal processors, the call to TRMVT
917* involves only LTNM1-1 elements
918* 2) On some processes, NQM1 < LTM1 or LIIP1 < LTLIP1
919* and when the results are combined across all processes,
920* uninitialized values may be included.
921 work( inv+liip1-1+( bindex+1 )*ldv ) = z_zero
922 work( invt+lijp1-1+( bindex+1 )*ldv+nqm1-1 ) = z_zero
923*
924*
925 IF( myrow.EQ.mycol ) THEN
926 IF( ltnm1.GT.1 ) THEN
927 CALL ztrmvt( 'L', ltnm1-1,
928 $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
929 $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
930 $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
931 $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
932 $ ldv ), 1, work( inht+lijp1-1+( bindex+
933 $ 1 )*ldv ), 1 )
934 END IF
935 DO 100 i = 1, ltnm1
936 work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
937 $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
938 $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
939 $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
940 100 CONTINUE
941 ELSE
942 IF( ltnm1.GT.0 )
943 $ CALL ztrmvt( 'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
944 $ lda, work( invt+lijp1-1+( bindex+1 )*
945 $ ldv ), 1, work( inh+ltlip1-1+( bindex+
946 $ 1 )*ldv ), 1, work( inv+ltlip1-1+
947 $ ( bindex+1 )*ldv ), 1,
948 $ work( inht+lijp1-1+( bindex+1 )*ldv ),
949 $ 1 )
950*
951 END IF
952*
953*
954* We take advantage of the fact that:
955* A * sum( B ) = sum ( A * B ) for matrices A,B
956*
957* trueA = A + V * HT + H * VT
958* hence: (trueA)v = Av' + V * HT * v + H * VT * v
959* VT * v = sum_p_in_NPROW ( VTp * v )
960* H * VT * v = H * sum (VTp * v) = sum ( H * VTp * v )
961*
962* v = v + V * HT * h + H * VT * h
963*
964*
965*
966* tmp = HT * nh1
967 DO 110 i = 1, 2*( bindex+1 )
968 work( intmp-1+i ) = 0
969 110 CONTINUE
970*
971 IF( balanced ) THEN
972 npset = nprow
973 mysetnum = myrow
974 rowsperproc = iceil( nqb, npset )
975 myfirstrow = min( nqb+1, 1+rowsperproc*mysetnum )
976 numrows = min( rowsperproc, nqb-myfirstrow+1 )
977*
978*
979* tmp = HT * v
980*
981 CALL zgemv( 'C', numrows, bindex+1, z_one,
982 $ work( inhtb+myfirstrow-1 ), ldv,
983 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
984 $ 1, z_zero, work( intmp ), 1 )
985* tmp2 = VT * v
986 CALL zgemv( 'C', numrows, bindex+1, z_one,
987 $ work( invtb+myfirstrow-1 ), ldv,
988 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
989 $ 1, z_zero, work( intmp+bindex+1 ), 1 )
990*
991*
992 CALL zgsum2d( ictxt, 'C', ' ', 2*( bindex+1 ), 1,
993 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
994 ELSE
995* tmp = HT * v
996*
997 CALL zgemv( 'C', nqb, bindex+1, z_one, work( inhtb ),
998 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
999 $ z_zero, work( intmp ), 1 )
1000* tmp2 = VT * v
1001 CALL zgemv( 'C', nqb, bindex+1, z_one, work( invtb ),
1002 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
1003 $ z_zero, work( intmp+bindex+1 ), 1 )
1004*
1005 END IF
1006*
1007*
1008*
1009 IF( balanced ) THEN
1010 mysetnum = mycol
1011*
1012 rowsperproc = iceil( npb, npset )
1013 myfirstrow = min( npb+1, 1+rowsperproc*mysetnum )
1014 numrows = min( rowsperproc, npb-myfirstrow+1 )
1015*
1016 CALL zgsum2d( ictxt, 'R', ' ', 2*( bindex+1 ), 1,
1017 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1018*
1019*
1020* v = v + V * tmp
1021 IF( index.GT.1. ) THEN
1022 CALL zgemv( 'N', numrows, bindex+1, z_negone,
1023 $ work( invb+myfirstrow-1 ), ldv,
1024 $ work( intmp ), 1, z_one,
1025 $ work( invb+myfirstrow-1+( bindex+1 )*
1026 $ ldv ), 1 )
1027*
1028* v = v + H * tmp2
1029 CALL zgemv( 'N', numrows, bindex+1, z_negone,
1030 $ work( inhb+myfirstrow-1 ), ldv,
1031 $ work( intmp+bindex+1 ), 1, z_one,
1032 $ work( invb+myfirstrow-1+( bindex+1 )*
1033 $ ldv ), 1 )
1034 END IF
1035*
1036 ELSE
1037* v = v + V * tmp
1038 CALL zgemv( 'N', npb, bindex+1, z_negone, work( invb ),
1039 $ ldv, work( intmp ), 1, z_one,
1040 $ work( invb+( bindex+1 )*ldv ), 1 )
1041*
1042*
1043* v = v + H * tmp2
1044 CALL zgemv( 'N', npb, bindex+1, z_negone, work( inhb ),
1045 $ ldv, work( intmp+bindex+1 ), 1, z_one,
1046 $ work( invb+( bindex+1 )*ldv ), 1 )
1047*
1048 END IF
1049*
1050*
1051* Transpose NV and add it back into NVT
1052*
1053 IF( myrow.EQ.mycol ) THEN
1054 DO 120 i = 0, nqm1 - 1
1055 work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1056 $ i )
1057 120 CONTINUE
1058 ELSE
1059 CALL zgesd2d( ictxt, nqm1, 1,
1060 $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1061 $ nqm1, mycol, myrow )
1062 CALL zgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1063 $ myrow )
1064*
1065 END IF
1066 DO 130 i = 0, npm1 - 1
1067 work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1068 $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1069 130 CONTINUE
1070*
1071* Sum-to-one NV rowwise (within a row)
1072*
1073 CALL zgsum2d( ictxt, 'R', ' ', npm1, 1,
1074 $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1075 $ myrow, nxtcol )
1076*
1077*
1078* Dot product c = NV * NH
1079* Sum-to-all c within next processor column
1080*
1081*
1082 IF( mycol.EQ.nxtcol ) THEN
1083 cc( 1 ) = z_zero
1084 DO 140 i = 0, npm1 - 1
1085 cc( 1 ) = cc( 1 ) + dconjg( work( inv+liip1-1+
1086 $ ( bindex+1 )*ldv+i ) )*
1087 $ work( inh+liip1-1+( bindex+1 )*ldv+i )
1088 140 CONTINUE
1089 IF( myrow.EQ.nxtrow ) THEN
1090 cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1091 cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1092 ELSE
1093 cc( 2 ) = z_zero
1094 cc( 3 ) = z_zero
1095 END IF
1096 CALL zgsum2d( ictxt, 'C', ' ', 3, 1, cc, 3, -1, nxtcol )
1097*
1098 topv = cc( 2 )
1099 c = cc( 1 )
1100 toph = cc( 3 )
1101*
1102 topnv = toptau*( topv-c*dconjg( toptau ) / 2*toph )
1103*
1104*
1105* Compute V = Tau * (V - C * Tau' / 2 * H )
1106*
1107*
1108 DO 150 i = 0, npm1 - 1
1109 work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1110 $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*
1111 $ dconjg( toptau ) / 2*work( inh+liip1-1+( bindex+
1112 $ 1 )*ldv+i ) )
1113 150 CONTINUE
1114*
1115 END IF
1116*
1117*
1118 160 CONTINUE
1119*
1120*
1121* Perform the rank2k update
1122*
1123 IF( maxindex.LT.n ) THEN
1124*
1125 DO 170 i = 0, npm1 - 1
1126 work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1127 170 CONTINUE
1128*
1129*
1130*
1131 IF( .NOT.twogemms ) THEN
1132 IF( interleave ) THEN
1133 ldzg = ldv / 2
1134 ELSE
1135 CALL zlamov( 'A', ltnm1, anb, work( inht+lijp1-1 ),
1136 $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1137*
1138 CALL zlamov( 'A', ltnm1, anb, work( inv+ltlip1-1 ),
1139 $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1140 ldzg = ldv
1141 END IF
1142 nbzg = anb*2
1143 ELSE
1144 ldzg = ldv
1145 nbzg = anb
1146 END IF
1147*
1148*
1149 DO 180 pbmin = 1, ltnm1, pnb
1150*
1151 pbsize = min( pnb, ltnm1-pbmin+1 )
1152 pbmax = min( ltnm1, pbmin+pnb-1 )
1153 CALL zgemm( 'N', 'C', pbsize, pbmax, nbzg, z_negone,
1154 $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1155 $ work( invt+lijp1-1 ), ldzg, z_one,
1156 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1157 IF( twogemms ) THEN
1158 CALL zgemm( 'N', 'C', pbsize, pbmax, anb, z_negone,
1159 $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1160 $ work( inht+lijp1-1 ), ldzg, z_one,
1161 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1162 END IF
1163 180 CONTINUE
1164*
1165*
1166*
1167 DO 190 i = 0, npm1 - 1
1168 work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1169 work( inh+liip1-1+i ) = work( intmp+i )
1170 190 CONTINUE
1171 DO 200 i = 0, nqm1 - 1
1172 work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1173 200 CONTINUE
1174*
1175*
1176 END IF
1177*
1178* End of the update A code
1179*
1180 210 CONTINUE
1181*
1182 IF( mycol.EQ.nxtcol ) THEN
1183 IF( myrow.EQ.nxtrow ) THEN
1184*
1185 d( nq ) = dble( a( np+( nq-1 )*lda ) )
1186 a( np+( nq-1 )*lda ) = d( nq )
1187*
1188 CALL dgebs2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1 )
1189 ELSE
1190 CALL dgebr2d( ictxt, 'C', ' ', 1, 1, d( nq ), 1, nxtrow,
1191 $ nxtcol )
1192 END IF
1193 END IF
1194*
1195*
1196*
1197*
1198 work( 1 ) = dcmplx( lwmin )
1199 RETURN
1200*
1201* End of PZHETTRD
1202*
1203*
1204 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine dcombnrm2(x, y)
Definition pdtreecomb.f:307
subroutine pdtreecomb(ictxt, scope, n, mine, rdest0, cdest0, subptr)
Definition pdtreecomb.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine pzhettrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pzhettrd.f:3
subroutine ztrmvt(uplo, n, t, ldt, x, incx, y, incy, w, incw, z, incz)
Definition ztrmvt.f:3