ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzhentrd.f
Go to the documentation of this file.
1  SUBROUTINE pzhentrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
2  $ LWORK, RWORK, LRWORK, INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * October 15, 1999
8 *
9 * .. Scalar Arguments ..
10  CHARACTER UPLO
11  INTEGER IA, INFO, JA, LRWORK, LWORK, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * )
15  DOUBLE PRECISION D( * ), E( * ), RWORK( * )
16  COMPLEX*16 A( * ), TAU( * ), WORK( * )
17 * ..
18 * Bugs
19 * ====
20 *
21 *
22 * Support for UPLO='U' is limited to calling the old, slow, PZHETRD
23 * code.
24 *
25 *
26 * Purpose
27 *
28 * =======
29 *
30 * PZHENTRD is a prototype version of PZHETRD which uses tailored
31 * codes (either the serial, ZHETRD, or the parallel code, PZHETTRD)
32 * when the workspace provided by the user is adequate.
33 *
34 *
35 * PZHENTRD reduces a complex Hermitian matrix sub( A ) to Hermitian
36 * tridiagonal form T by an unitary similarity transformation:
37 * Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
38 *
39 * Features
40 * ========
41 *
42 * PZHENTRD is faster than PZHETRD on almost all matrices,
43 * particularly small ones (i.e. N < 500 * sqrt(P) ), provided that
44 * enough workspace is available to use the tailored codes.
45 *
46 * The tailored codes provide performance that is essentially
47 * independent of the input data layout.
48 *
49 * The tailored codes place no restrictions on IA, JA, MB or NB.
50 * At present, IA, JA, MB and NB are restricted to those values allowed
51 * by PZHETRD to keep the interface simple. These restrictions are
52 * documented below. (Search for "restrictions".)
53 *
54 * Notes
55 * =====
56 *
57 *
58 * Each global data object is described by an associated description
59 * vector. This vector stores the information required to establish
60 * the mapping between an object element and its corresponding process
61 * and memory location.
62 *
63 * Let A be a generic term for any 2D block cyclicly distributed array.
64 * Such a global array has an associated description vector DESCA.
65 * In the following comments, the character _ should be read as
66 * "of the global array".
67 *
68 * NOTATION STORED IN EXPLANATION
69 * --------------- -------------- --------------------------------------
70 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
71 * DTYPE_A = 1.
72 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
73 * the BLACS process grid A is distribu-
74 * ted over. The context itself is glo-
75 * bal, but the handle (the integer
76 * value) may vary.
77 * M_A (global) DESCA( M_ ) The number of rows in the global
78 * array A.
79 * N_A (global) DESCA( N_ ) The number of columns in the global
80 * array A.
81 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
82 * the rows of the array.
83 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
84 * the columns of the array.
85 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
86 * row of the array A is distributed.
87 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
88 * first column of the array A is
89 * distributed.
90 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
91 * array. LLD_A >= MAX(1,LOCr(M_A)).
92 *
93 * Let K be the number of rows or columns of a distributed matrix,
94 * and assume that its process grid has dimension p x q.
95 * LOCr( K ) denotes the number of elements of K that a process
96 * would receive if K were distributed over the p processes of its
97 * process column.
98 * Similarly, LOCc( K ) denotes the number of elements of K that a
99 * process would receive if K were distributed over the q processes of
100 * its process row.
101 * The values of LOCr() and LOCc() may be determined via a call to the
102 * ScaLAPACK tool function, NUMROC:
103 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
104 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
105 * An upper bound for these quantities may be computed by:
106 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
107 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
108 *
109 *
110 * Arguments
111 * =========
112 *
113 * UPLO (global input) CHARACTER
114 * Specifies whether the upper or lower triangular part of the
115 * Hermitian matrix sub( A ) is stored:
116 * = 'U': Upper triangular
117 * = 'L': Lower triangular
118 *
119 * N (global input) INTEGER
120 * The number of rows and columns to be operated on, i.e. the
121 * order of the distributed submatrix sub( A ). N >= 0.
122 *
123 * A (local input/local output) COMPLEX*16 pointer into the
124 * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
125 * On entry, this array contains the local pieces of the
126 * Hermitian distributed matrix sub( A ). If UPLO = 'U', the
127 * leading N-by-N upper triangular part of sub( A ) contains
128 * the upper triangular part of the matrix, and its strictly
129 * lower triangular part is not referenced. If UPLO = 'L', the
130 * leading N-by-N lower triangular part of sub( A ) contains the
131 * lower triangular part of the matrix, and its strictly upper
132 * triangular part is not referenced. On exit, if UPLO = 'U',
133 * the diagonal and first superdiagonal of sub( A ) are over-
134 * written by the corresponding elements of the tridiagonal
135 * matrix T, and the elements above the first superdiagonal,
136 * with the array TAU, represent the unitary matrix Q as a
137 * product of elementary reflectors; if UPLO = 'L', the diagonal
138 * and first subdiagonal of sub( A ) are overwritten by the
139 * corresponding elements of the tridiagonal matrix T, and the
140 * elements below the first subdiagonal, with the array TAU,
141 * represent the unitary matrix Q as a product of elementary
142 * reflectors. See Further Details.
143 *
144 * IA (global input) INTEGER
145 * The row index in the global array A indicating the first
146 * row of sub( A ).
147 *
148 * JA (global input) INTEGER
149 * The column index in the global array A indicating the
150 * first column of sub( A ).
151 *
152 * DESCA (global and local input) INTEGER array of dimension DLEN_.
153 * The array descriptor for the distributed matrix A.
154 *
155 * D (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1)
156 * The diagonal elements of the tridiagonal matrix T:
157 * D(i) = A(i,i). D is tied to the distributed matrix A.
158 *
159 * E (local output) DOUBLE PRECISION array, dimension LOCc(JA+N-1)
160 * if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal
161 * elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
162 * UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
163 * distributed matrix A.
164 *
165 * TAU (local output) COMPLEX*16, array, dimension
166 * LOCc(JA+N-1). This array contains the scalar factors TAU of
167 * the elementary reflectors. TAU is tied to the distributed
168 * matrix A.
169 *
170 * WORK (local workspace/local output) COMPLEX*16 array,
171 * dimension (LWORK)
172 * On exit, WORK( 1 ) returns the optimal LWORK.
173 *
174 * LWORK (local or global input) INTEGER
175 * The dimension of the array WORK.
176 * LWORK is local input and must be at least
177 * LWORK >= MAX( NB * ( NP +1 ), 3 * NB )
178 *
179 * For optimal performance, greater workspace is needed, i.e.
180 * LWORK >= 2*( ANB+1 )*( 4*NPS+2 ) + ( NPS + 4 ) * NPS
181 * ICTXT = DESCA( CTXT_ )
182 * ANB = PJLAENV( ICTXT, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 )
183 * SQNPC = INT( SQRT( DBLE( NPROW * NPCOL ) ) )
184 * NPS = MAX( NUMROC( N, 1, 0, 0, SQNPC ), 2*ANB )
185 *
186 * NUMROC is a ScaLAPACK tool functions;
187 * PJLAENV is a ScaLAPACK envionmental inquiry function
188 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
189 * the subroutine BLACS_GRIDINFO.
190 *
191 *
192 * RWORK (local workspace/local output) COMPLEX*16 array,
193 * dimension (LRWORK)
194 * On exit, RWORK( 1 ) returns the optimal LRWORK.
195 *
196 * LRWORK (local or global input) INTEGER
197 * The dimension of the array RWORK.
198 * LRWORK is local input and must be at least
199 * LRWORK >= 1
200 *
201 * For optimal performance, greater workspace is needed, i.e.
202 * LRWORK >= MAX( 2 * N )
203 *
204 *
205 * INFO (global output) INTEGER
206 * = 0: successful exit
207 * < 0: If the i-th argument is an array and the j-entry had
208 * an illegal value, then INFO = -(i*100+j), if the i-th
209 * argument is a scalar and had an illegal value, then
210 * INFO = -i.
211 *
212 * Further Details
213 * ===============
214 *
215 * If UPLO = 'U', the matrix Q is represented as a product of elementary
216 * reflectors
217 *
218 * Q = H(n-1) . . . H(2) H(1).
219 *
220 * Each H(i) has the form
221 *
222 * H(i) = I - tau * v * v'
223 *
224 * where tau is a complex scalar, and v is a complex vector with
225 * v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
226 * A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
227 *
228 * If UPLO = 'L', the matrix Q is represented as a product of elementary
229 * reflectors
230 *
231 * Q = H(1) H(2) . . . H(n-1).
232 *
233 * Each H(i) has the form
234 *
235 * H(i) = I - tau * v * v'
236 *
237 * where tau is a complex scalar, and v is a complex vector with
238 * v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
239 * A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
240 *
241 * The contents of sub( A ) on exit are illustrated by the following
242 * examples with n = 5:
243 *
244 * if UPLO = 'U': if UPLO = 'L':
245 *
246 * ( d e v2 v3 v4 ) ( d )
247 * ( d e v3 v4 ) ( e d )
248 * ( d e v4 ) ( v1 e d )
249 * ( d e ) ( v1 v2 e d )
250 * ( d ) ( v1 v2 v3 e d )
251 *
252 * where d and e denote diagonal and off-diagonal elements of T, and vi
253 * denotes an element of the vector defining H(i).
254 *
255 * Alignment requirements
256 * ======================
257 *
258 * The distributed submatrix sub( A ) must verify some alignment proper-
259 * ties, namely the following expression should be true:
260 * ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA .AND. IROFFA.EQ.0 ) with
261 * IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
262 *
263 * =====================================================================
264 *
265 * .. Parameters ..
266  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
267  $ mb_, nb_, rsrc_, csrc_, lld_
268  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
269  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
270  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
271  DOUBLE PRECISION ONE
272  parameter( one = 1.0d+0 )
273  COMPLEX*16 CONE
274  parameter( cone = ( 1.0d+0, 0.0d+0 ) )
275 * ..
276 * .. Local Scalars ..
277  LOGICAL LQUERY, UPPER
278  CHARACTER COLCTOP, ROWCTOP
279  INTEGER ANB, CTXTB, I, IACOL, IAROW, ICOFFA, ICTXT,
280  $ iinfo, indb, indrd, indre, indtau, indw, ipw,
281  $ iroffa, j, jb, jx, k, kk, llrwork, llwork,
282  $ lrwmin, lwmin, minsz, mycol, mycolb, myrow,
283  $ myrowb, nb, np, npcol, npcolb, nprow, nprowb,
284  $ nps, nq, onepmin, oneprmin, sqnpc, ttlrwmin,
285  $ ttlwmin
286 * ..
287 * .. Local Arrays ..
288  INTEGER DESCB( DLEN_ ), DESCW( DLEN_ ), IDUM1( 3 ),
289  $ idum2( 3 )
290 * ..
291 * .. External Subroutines ..
292  EXTERNAL blacs_get, blacs_gridexit, blacs_gridinfo,
293  $ blacs_gridinit, chk1mat, descset, igamn2d,
294  $ pchk1mat, pdlamr1d, pb_topget, pb_topset,
295  $ pxerbla, pzelset, pzher2k, pzhetd2, pzhettrd,
296  $ pzlamr1d, pzlatrd, pztrmr2d, zhetrd
297 * ..
298 * .. External Functions ..
299  LOGICAL LSAME
300  INTEGER INDXG2L, INDXG2P, NUMROC, PJLAENV
301  EXTERNAL lsame, indxg2l, indxg2p, numroc, pjlaenv
302 * ..
303 * .. Intrinsic Functions ..
304  INTRINSIC dble, dcmplx, ichar, int, max, min, mod, sqrt
305 * ..
306 * .. Executable Statements ..
307 *
308 * This is just to keep ftnchek and toolpack/1 happy
309  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
310  $ rsrc_.LT.0 )RETURN
311 * Get grid parameters
312 *
313  ictxt = desca( ctxt_ )
314  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
315 *
316 * Test the input parameters
317 *
318  info = 0
319  IF( nprow.EQ.-1 ) THEN
320  info = -( 600+ctxt_ )
321  ELSE
322  CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
323  upper = lsame( uplo, 'U' )
324  IF( info.EQ.0 ) THEN
325  nb = desca( nb_ )
326  iroffa = mod( ia-1, desca( mb_ ) )
327  icoffa = mod( ja-1, desca( nb_ ) )
328  iarow = indxg2p( ia, nb, myrow, desca( rsrc_ ), nprow )
329  iacol = indxg2p( ja, nb, mycol, desca( csrc_ ), npcol )
330  np = numroc( n, nb, myrow, iarow, nprow )
331  nq = max( 1, numroc( n+ja-1, nb, mycol, desca( csrc_ ),
332  $ npcol ) )
333  lwmin = max( ( np+1 )*nb, 3*nb )
334  anb = pjlaenv( ictxt, 3, 'PZHETTRD', 'L', 0, 0, 0, 0 )
335  minsz = pjlaenv( ictxt, 5, 'PZHETTRD', 'L', 0, 0, 0, 0 )
336  sqnpc = int( sqrt( dble( nprow*npcol ) ) )
337  nps = max( numroc( n, 1, 0, 0, sqnpc ), 2*anb )
338  ttlwmin = 2*( anb+1 )*( 4*nps+2 ) + ( nps+2 )*nps
339  lrwmin = 1
340  ttlrwmin = 2*nps
341 *
342  work( 1 ) = dcmplx( dble( ttlwmin ) )
343  rwork( 1 ) = dble( ttlrwmin )
344  lquery = ( lwork.EQ.-1 .OR. lrwork.EQ.-1 )
345  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
346  info = -1
347 *
348 * The following two restrictions are not necessary provided
349 * that either of the tailored codes are used.
350 *
351  ELSE IF( iroffa.NE.icoffa .OR. icoffa.NE.0 ) THEN
352  info = -5
353  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
354  info = -( 600+nb_ )
355  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
356  info = -11
357  ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
358  info = -13
359  END IF
360  END IF
361  IF( upper ) THEN
362  idum1( 1 ) = ichar( 'U' )
363  ELSE
364  idum1( 1 ) = ichar( 'L' )
365  END IF
366  idum2( 1 ) = 1
367  IF( lwork.EQ.-1 ) THEN
368  idum1( 2 ) = -1
369  ELSE
370  idum1( 2 ) = 1
371  END IF
372  idum2( 2 ) = 11
373  IF( lrwork.EQ.-1 ) THEN
374  idum1( 3 ) = -1
375  ELSE
376  idum1( 3 ) = 1
377  END IF
378  idum2( 3 ) = 13
379  CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 3, idum1, idum2,
380  $ info )
381  END IF
382 *
383  IF( info.NE.0 ) THEN
384  CALL pxerbla( ictxt, 'PZHENTRD', -info )
385  RETURN
386  ELSE IF( lquery ) THEN
387  RETURN
388  END IF
389 *
390 * Quick return if possible
391 *
392  IF( n.EQ.0 )
393  $ RETURN
394 *
395 *
396  onepmin = n*n + 3*n + 1
397  llwork = lwork
398  CALL igamn2d( ictxt, 'A', ' ', 1, 1, llwork, 1, 1, -1, -1, -1,
399  $ -1 )
400 *
401  oneprmin = 2*n
402  llrwork = lrwork
403  CALL igamn2d( ictxt, 'A', ' ', 1, 1, llrwork, 1, 1, -1, -1, -1,
404  $ -1 )
405 *
406 *
407 * Use the serial, LAPACK, code: ZTRD on small matrices if we
408 * we have enough space.
409 *
410  nprowb = 0
411  IF( ( n.LT.minsz .OR. sqnpc.EQ.1 ) .AND. llwork.GE.onepmin .AND.
412  $ llrwork.GE.oneprmin .AND. .NOT.upper ) THEN
413  nprowb = 1
414  nps = n
415  ELSE
416  IF( llwork.GE.ttlwmin .AND. llrwork.GE.ttlrwmin .AND. .NOT.
417  $ upper ) THEN
418  nprowb = sqnpc
419  END IF
420  END IF
421 *
422  IF( nprowb.GE.1 ) THEN
423  npcolb = nprowb
424  sqnpc = nprowb
425  indb = 1
426  indrd = 1
427  indre = indrd + nps
428  indtau = indb + nps*nps
429  indw = indtau + nps
430  llwork = llwork - indw + 1
431 *
432  CALL blacs_get( ictxt, 10, ctxtb )
433  CALL blacs_gridinit( ctxtb, 'Row major', sqnpc, sqnpc )
434  CALL blacs_gridinfo( ctxtb, nprowb, npcolb, myrowb, mycolb )
435  CALL descset( descb, n, n, 1, 1, 0, 0, ctxtb, nps )
436 *
437  CALL pztrmr2d( uplo, 'N', n, n, a, ia, ja, desca, work( indb ),
438  $ 1, 1, descb, ictxt )
439 *
440 *
441 * Only those processors in context CTXTB are needed for a while
442 *
443  IF( nprowb.GT.0 ) THEN
444 *
445  IF( nprowb.EQ.1 ) THEN
446  CALL zhetrd( uplo, n, work( indb ), nps, rwork( indrd ),
447  $ rwork( indre ), work( indtau ),
448  $ work( indw ), llwork, info )
449  ELSE
450 *
451  CALL pzhettrd( 'L', n, work( indb ), 1, 1, descb,
452  $ rwork( indrd ), rwork( indre ),
453  $ work( indtau ), work( indw ), llwork,
454  $ info )
455 *
456  END IF
457  END IF
458 *
459 * All processors participate in moving the data back to the
460 * way that PZHENTRD expects it.
461 *
462  CALL pdlamr1d( n-1, rwork( indre ), 1, 1, descb, e, 1, ja,
463  $ desca )
464 *
465  CALL pdlamr1d( n, rwork( indrd ), 1, 1, descb, d, 1, ja,
466  $ desca )
467 *
468  CALL pzlamr1d( n, work( indtau ), 1, 1, descb, tau, 1, ja,
469  $ desca )
470 *
471  CALL pztrmr2d( uplo, 'N', n, n, work( indb ), 1, 1, descb, a,
472  $ ia, ja, desca, ictxt )
473 *
474  IF( myrowb.GE.0 )
475  $ CALL blacs_gridexit( ctxtb )
476 *
477  ELSE
478 *
479  CALL pb_topget( ictxt, 'Combine', 'Columnwise', colctop )
480  CALL pb_topget( ictxt, 'Combine', 'Rowwise', rowctop )
481  CALL pb_topset( ictxt, 'Combine', 'Columnwise', '1-tree' )
482  CALL pb_topset( ictxt, 'Combine', 'Rowwise', '1-tree' )
483 *
484  ipw = np*nb + 1
485 *
486  IF( upper ) THEN
487 *
488 * Reduce the upper triangle of sub( A ).
489 *
490  kk = mod( ja+n-1, nb )
491  IF( kk.EQ.0 )
492  $ kk = nb
493  CALL descset( descw, n, nb, nb, nb, iarow,
494  $ indxg2p( ja+n-kk, nb, mycol, desca( csrc_ ),
495  $ npcol ), ictxt, max( 1, np ) )
496 *
497  DO 10 k = n - kk + 1, nb + 1, -nb
498  jb = min( n-k+1, nb )
499  i = ia + k - 1
500  j = ja + k - 1
501 *
502 * Reduce columns I:I+NB-1 to tridiagonal form and form
503 * the matrix W which is needed to update the unreduced part of
504 * the matrix
505 *
506  CALL pzlatrd( uplo, k+jb-1, jb, a, ia, ja, desca, d, e,
507  $ tau, work, 1, 1, descw, work( ipw ) )
508 *
509 * Update the unreduced submatrix A(IA:I-1,JA:J-1), using an
510 * update of the form:
511 * A(IA:I-1,JA:J-1) := A(IA:I-1,JA:J-1) - V*W' - W*V'
512 *
513  CALL pzher2k( uplo, 'No transpose', k-1, jb, -cone, a,
514  $ ia, j, desca, work, 1, 1, descw, one, a,
515  $ ia, ja, desca )
516 *
517 * Copy last superdiagonal element back into sub( A )
518 *
519  jx = min( indxg2l( j, nb, 0, iacol, npcol ), nq )
520  CALL pzelset( a, i-1, j, desca, dcmplx( e( jx ) ) )
521 *
522  descw( csrc_ ) = mod( descw( csrc_ )+npcol-1, npcol )
523 *
524  10 CONTINUE
525 *
526 * Use unblocked code to reduce the last or only block
527 *
528  CALL pzhetd2( uplo, min( n, nb ), a, ia, ja, desca, d, e,
529  $ tau, work, lwork, iinfo )
530 *
531  ELSE
532 *
533 * Reduce the lower triangle of sub( A )
534 *
535  kk = mod( ja+n-1, nb )
536  IF( kk.EQ.0 )
537  $ kk = nb
538  CALL descset( descw, n, nb, nb, nb, iarow, iacol, ictxt,
539  $ max( 1, np ) )
540 *
541  DO 20 k = 1, n - nb, nb
542  i = ia + k - 1
543  j = ja + k - 1
544 *
545 * Reduce columns I:I+NB-1 to tridiagonal form and form
546 * the matrix W which is needed to update the unreduced part
547 * of the matrix
548 *
549  CALL pzlatrd( uplo, n-k+1, nb, a, i, j, desca, d, e, tau,
550  $ work, k, 1, descw, work( ipw ) )
551 *
552 * Update the unreduced submatrix A(I+NB:IA+N-1,I+NB:IA+N-1),
553 * using an update of the form: A(I+NB:IA+N-1,I+NB:IA+N-1) :=
554 * A(I+NB:IA+N-1,I+NB:IA+N-1) - V*W' - W*V'
555 *
556  CALL pzher2k( uplo, 'No transpose', n-k-nb+1, nb, -cone,
557  $ a, i+nb, j, desca, work, k+nb, 1, descw,
558  $ one, a, i+nb, j+nb, desca )
559 *
560 * Copy last subdiagonal element back into sub( A )
561 *
562  jx = min( indxg2l( j+nb-1, nb, 0, iacol, npcol ), nq )
563  CALL pzelset( a, i+nb, j+nb-1, desca, dcmplx( e( jx ) ) )
564 *
565  descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
566 *
567  20 CONTINUE
568 *
569 * Use unblocked code to reduce the last or only block
570 *
571  CALL pzhetd2( uplo, kk, a, ia+k-1, ja+k-1, desca, d, e, tau,
572  $ work, lwork, iinfo )
573  END IF
574 *
575  CALL pb_topset( ictxt, 'Combine', 'Columnwise', colctop )
576  CALL pb_topset( ictxt, 'Combine', 'Rowwise', rowctop )
577 *
578  END IF
579 *
580  work( 1 ) = dcmplx( dble( ttlwmin ) )
581  rwork( 1 ) = dble( ttlrwmin )
582 *
583  RETURN
584 *
585 * End of PZHENTRD
586 *
587  END
pzlamr1d
subroutine pzlamr1d(N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pzlamr1d.f:2
max
#define max(A, B)
Definition: pcgemr.c:180
pzlatrd
subroutine pzlatrd(UPLO, N, NB, A, IA, JA, DESCA, D, E, TAU, W, IW, JW, DESCW, WORK)
Definition: pzlatrd.f:3
pzhettrd
subroutine pzhettrd(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, INFO)
Definition: pzhettrd.f:3
pzhetd2
subroutine pzhetd2(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, INFO)
Definition: pzhetd2.f:3
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pdlamr1d
subroutine pdlamr1d(N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pdlamr1d.f:2
pzhentrd
subroutine pzhentrd(UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK, LWORK, RWORK, LRWORK, INFO)
Definition: pzhentrd.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pzelset
subroutine pzelset(A, IA, JA, DESCA, ALPHA)
Definition: pzelset.f:2
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181