SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pchetrd.f
Go to the documentation of this file.
1 SUBROUTINE pchetrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
2 $ LWORK, INFO )
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 CHARACTER UPLO
11 INTEGER IA, INFO, JA, LWORK, N
12* ..
13* .. Array Arguments ..
14 INTEGER DESCA( * )
15 REAL D( * ), E( * )
16 COMPLEX A( * ), TAU( * ), WORK( * )
17* ..
18*
19* Purpose
20* =======
21*
22* PCHETRD reduces a complex Hermitian matrix sub( A ) to Hermitian
23* tridiagonal form T by an unitary similarity transformation:
24* Q' * sub( A ) * Q = T, where sub( A ) = A(IA:IA+N-1,JA:JA+N-1).
25*
26* Notes
27* =====
28*
29* Each global data object is described by an associated description
30* vector. This vector stores the information required to establish
31* the mapping between an object element and its corresponding process
32* and memory location.
33*
34* Let A be a generic term for any 2D block cyclicly distributed array.
35* Such a global array has an associated description vector DESCA.
36* In the following comments, the character _ should be read as
37* "of the global array".
38*
39* NOTATION STORED IN EXPLANATION
40* --------------- -------------- --------------------------------------
41* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42* DTYPE_A = 1.
43* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
44* the BLACS process grid A is distribu-
45* ted over. The context itself is glo-
46* bal, but the handle (the integer
47* value) may vary.
48* M_A (global) DESCA( M_ ) The number of rows in the global
49* array A.
50* N_A (global) DESCA( N_ ) The number of columns in the global
51* array A.
52* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
53* the rows of the array.
54* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
55* the columns of the array.
56* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
57* row of the array A is distributed.
58* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59* first column of the array A is
60* distributed.
61* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62* array. LLD_A >= MAX(1,LOCr(M_A)).
63*
64* Let K be the number of rows or columns of a distributed matrix,
65* and assume that its process grid has dimension p x q.
66* LOCr( K ) denotes the number of elements of K that a process
67* would receive if K were distributed over the p processes of its
68* process column.
69* Similarly, LOCc( K ) denotes the number of elements of K that a
70* process would receive if K were distributed over the q processes of
71* its process row.
72* The values of LOCr() and LOCc() may be determined via a call to the
73* ScaLAPACK tool function, NUMROC:
74* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76* An upper bound for these quantities may be computed by:
77* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79*
80* Arguments
81* =========
82*
83* UPLO (global input) CHARACTER
84* Specifies whether the upper or lower triangular part of the
85* Hermitian matrix sub( A ) is stored:
86* = 'U': Upper triangular
87* = 'L': Lower triangular
88*
89* N (global input) INTEGER
90* The number of rows and columns to be operated on, i.e. the
91* order of the distributed submatrix sub( A ). N >= 0.
92*
93* A (local input/local output) COMPLEX pointer into the
94* local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
95* On entry, this array contains the local pieces of the
96* Hermitian distributed matrix sub( A ). If UPLO = 'U', the
97* leading N-by-N upper triangular part of sub( A ) contains
98* the upper triangular part of the matrix, and its strictly
99* lower triangular part is not referenced. If UPLO = 'L', the
100* leading N-by-N lower triangular part of sub( A ) contains the
101* lower triangular part of the matrix, and its strictly upper
102* triangular part is not referenced. On exit, if UPLO = 'U',
103* the diagonal and first superdiagonal of sub( A ) are over-
104* written by the corresponding elements of the tridiagonal
105* matrix T, and the elements above the first superdiagonal,
106* with the array TAU, represent the unitary matrix Q as a
107* product of elementary reflectors; if UPLO = 'L', the diagonal
108* and first subdiagonal of sub( A ) are overwritten by the
109* corresponding elements of the tridiagonal matrix T, and the
110* elements below the first subdiagonal, with the array TAU,
111* represent the unitary matrix Q as a product of elementary
112* reflectors. See Further Details.
113*
114* IA (global input) INTEGER
115* The row index in the global array A indicating the first
116* row of sub( A ).
117*
118* JA (global input) INTEGER
119* The column index in the global array A indicating the
120* first column of sub( A ).
121*
122* DESCA (global and local input) INTEGER array of dimension DLEN_.
123* The array descriptor for the distributed matrix A.
124*
125* D (local output) REAL array, dimension LOCc(JA+N-1)
126* The diagonal elements of the tridiagonal matrix T:
127* D(i) = A(i,i). D is tied to the distributed matrix A.
128*
129* E (local output) REAL array, dimension LOCc(JA+N-1)
130* if UPLO = 'U', LOCc(JA+N-2) otherwise. The off-diagonal
131* elements of the tridiagonal matrix T: E(i) = A(i,i+1) if
132* UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. E is tied to the
133* distributed matrix A.
134*
135* TAU (local output) COMPLEX, array, dimension
136* LOCc(JA+N-1). This array contains the scalar factors TAU of
137* the elementary reflectors. TAU is tied to the distributed
138* matrix A.
139*
140* WORK (local workspace/local output) COMPLEX array,
141* dimension (LWORK)
142* On exit, WORK( 1 ) returns the minimal and optimal LWORK.
143*
144* LWORK (local or global input) INTEGER
145* The dimension of the array WORK.
146* LWORK is local input and must be at least
147* LWORK >= MAX( NB * ( NP +1 ), 3 * NB )
148*
149* where NB = MB_A = NB_A,
150* NP = NUMROC( N, NB, MYROW, IAROW, NPROW ),
151* IAROW = INDXG2P( IA, NB, MYROW, RSRC_A, NPROW ).
152*
153* INDXG2P and NUMROC are ScaLAPACK tool functions;
154* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
155* the subroutine BLACS_GRIDINFO.
156*
157* If LWORK = -1, then LWORK is global input and a workspace
158* query is assumed; the routine only calculates the minimum
159* and optimal size for all work arrays. Each of these
160* values is returned in the first entry of the corresponding
161* work array, and no error message is issued by PXERBLA.
162*
163* INFO (global output) INTEGER
164* = 0: successful exit
165* < 0: If the i-th argument is an array and the j-entry had
166* an illegal value, then INFO = -(i*100+j), if the i-th
167* argument is a scalar and had an illegal value, then
168* INFO = -i.
169*
170* Further Details
171* ===============
172*
173* If UPLO = 'U', the matrix Q is represented as a product of elementary
174* reflectors
175*
176* Q = H(n-1) . . . H(2) H(1).
177*
178* Each H(i) has the form
179*
180* H(i) = I - tau * v * v'
181*
182* where tau is a complex scalar, and v is a complex vector with
183* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
184* A(ia:ia+i-2,ja+i), and tau in TAU(ja+i-1).
185*
186* If UPLO = 'L', the matrix Q is represented as a product of elementary
187* reflectors
188*
189* Q = H(1) H(2) . . . H(n-1).
190*
191* Each H(i) has the form
192*
193* H(i) = I - tau * v * v'
194*
195* where tau is a complex scalar, and v is a complex vector with
196* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in
197* A(ia+i+1:ia+n-1,ja+i-1), and tau in TAU(ja+i-1).
198*
199* The contents of sub( A ) on exit are illustrated by the following
200* examples with n = 5:
201*
202* if UPLO = 'U': if UPLO = 'L':
203*
204* ( d e v2 v3 v4 ) ( d )
205* ( d e v3 v4 ) ( e d )
206* ( d e v4 ) ( v1 e d )
207* ( d e ) ( v1 v2 e d )
208* ( d ) ( v1 v2 v3 e d )
209*
210* where d and e denote diagonal and off-diagonal elements of T, and vi
211* denotes an element of the vector defining H(i).
212*
213* Alignment requirements
214* ======================
215*
216* The distributed submatrix sub( A ) must verify some alignment proper-
217* ties, namely the following expression should be true:
218* ( MB_A.EQ.NB_A .AND. IROFFA.EQ.ICOFFA .AND. IROFFA.EQ.0 ) with
219* IROFFA = MOD( IA-1, MB_A ) and ICOFFA = MOD( JA-1, NB_A ).
220*
221* =====================================================================
222*
223* .. Parameters ..
224 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
225 $ lld_, mb_, m_, nb_, n_, rsrc_
226 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
227 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
228 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
229 REAL ONE
230 parameter( one = 1.0e+0 )
231 COMPLEX CONE
232 parameter( cone = ( 1.0e+0, 0.0e+0 ) )
233* ..
234* .. Local Scalars ..
235 LOGICAL LQUERY, UPPER
236 CHARACTER COLCTOP, ROWCTOP
237 INTEGER I, IACOL, IAROW, ICOFFA, ICTXT, IINFO, IPW,
238 $ iroffa, j, jb, jx, k, kk, lwmin, mycol, myrow,
239 $ nb, np, npcol, nprow, nq
240* ..
241* .. Local Arrays ..
242 INTEGER DESCW( DLEN_ ), IDUM1( 2 ), IDUM2( 2 )
243* ..
244* .. External Subroutines ..
245 EXTERNAL blacs_gridinfo, chk1mat, descset, pcher2k,
246 $ pchetd2, pchk1mat, pclatrd, pb_topget,
247 $ pb_topset, pxerbla
248* ..
249* .. External Functions ..
250 LOGICAL LSAME
251 INTEGER INDXG2L, INDXG2P, NUMROC
252 EXTERNAL lsame, indxg2l, indxg2p, numroc
253* ..
254* .. Intrinsic Functions ..
255 INTRINSIC cmplx, ichar, max, min, mod, real
256* ..
257* .. Executable Statements ..
258*
259* Get grid parameters
260*
261 ictxt = desca( ctxt_ )
262 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
263*
264* Test the input parameters
265*
266 info = 0
267 IF( nprow.EQ.-1 ) THEN
268 info = -(600+ctxt_)
269 ELSE
270 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
271 upper = lsame( uplo, 'U' )
272 IF( info.EQ.0 ) THEN
273 nb = desca( nb_ )
274 iroffa = mod( ia-1, desca( mb_ ) )
275 icoffa = mod( ja-1, desca( nb_ ) )
276 iarow = indxg2p( ia, nb, myrow, desca( rsrc_ ), nprow )
277 iacol = indxg2p( ja, nb, mycol, desca( csrc_ ), npcol )
278 np = numroc( n, nb, myrow, iarow, nprow )
279 nq = max( 1, numroc( n+ja-1, nb, mycol, desca( csrc_ ),
280 $ npcol ) )
281 lwmin = max( (np+1)*nb, 3*nb )
282*
283 work( 1 ) = cmplx( real( lwmin ) )
284 lquery = ( lwork.EQ.-1 )
285 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
286 info = -1
287 ELSE IF( iroffa.NE.icoffa .OR. icoffa.NE.0 ) THEN
288 info = -5
289 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
290 info = -(600+nb_)
291 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
292 info = -11
293 END IF
294 END IF
295 IF( upper ) THEN
296 idum1( 1 ) = ichar( 'U' )
297 ELSE
298 idum1( 1 ) = ichar( 'L' )
299 END IF
300 idum2( 1 ) = 1
301 IF( lwork.EQ.-1 ) THEN
302 idum1( 2 ) = -1
303 ELSE
304 idum1( 2 ) = 1
305 END IF
306 idum2( 2 ) = 11
307 CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 2, idum1, idum2,
308 $ info )
309 END IF
310*
311 IF( info.NE.0 ) THEN
312 CALL pxerbla( ictxt, 'PCHETRD', -info )
313 RETURN
314 ELSE IF( lquery ) THEN
315 RETURN
316 END IF
317*
318* Quick return if possible
319*
320 IF( n.EQ.0 )
321 $ RETURN
322*
323 CALL pb_topget( ictxt, 'Combine', 'Columnwise', colctop )
324 CALL pb_topget( ictxt, 'Combine', 'Rowwise', rowctop )
325 CALL pb_topset( ictxt, 'Combine', 'Columnwise', '1-tree' )
326 CALL pb_topset( ictxt, 'Combine', 'Rowwise', '1-tree' )
327*
328 ipw = np * nb + 1
329*
330 IF( upper ) THEN
331*
332* Reduce the upper triangle of sub( A ).
333*
334 kk = mod( ja+n-1, nb )
335 IF( kk.EQ.0 )
336 $ kk = nb
337 CALL descset( descw, n, nb, nb, nb, iarow, indxg2p( ja+n-kk,
338 $ nb, mycol, desca( csrc_ ), npcol ), ictxt,
339 $ max( 1, np ) )
340*
341 DO 10 k = n-kk+1, nb+1, -nb
342 jb = min( n-k+1, nb )
343 i = ia + k - 1
344 j = ja + k - 1
345*
346* Reduce columns I:I+NB-1 to tridiagonal form and form
347* the matrix W which is needed to update the unreduced part of
348* the matrix
349*
350 CALL pclatrd( uplo, k+jb-1, jb, a, ia, ja, desca, d, e, tau,
351 $ work, 1, 1, descw, work( ipw ) )
352*
353* Update the unreduced submatrix A(IA:I-1,JA:J-1), using an
354* update of the form:
355* A(IA:I-1,JA:J-1) := A(IA:I-1,JA:J-1) - V*W' - W*V'
356*
357 CALL pcher2k( uplo, 'No transpose', k-1, jb, -cone, a, ia,
358 $ j, desca, work, 1, 1, descw, one, a, ia, ja,
359 $ desca )
360*
361* Copy last superdiagonal element back into sub( A )
362*
363 jx = min( indxg2l( j, nb, 0, iacol, npcol ), nq )
364 CALL pcelset( a, i-1, j, desca, cmplx( e( jx ) ) )
365*
366 descw( csrc_ ) = mod( descw( csrc_ ) + npcol - 1, npcol )
367*
368 10 CONTINUE
369*
370* Use unblocked code to reduce the last or only block
371*
372 CALL pchetd2( uplo, min( n, nb ), a, ia, ja, desca, d, e,
373 $ tau, work, lwork, iinfo )
374*
375 ELSE
376*
377* Reduce the lower triangle of sub( A )
378*
379 kk = mod( ja+n-1, nb )
380 IF( kk.EQ.0 )
381 $ kk = nb
382 CALL descset( descw, n, nb, nb, nb, iarow, iacol, ictxt,
383 $ max( 1, np ) )
384*
385 DO 20 k = 1, n-nb, nb
386 i = ia + k - 1
387 j = ja + k - 1
388*
389* Reduce columns I:I+NB-1 to tridiagonal form and form
390* the matrix W which is needed to update the unreduced part
391* of the matrix
392*
393 CALL pclatrd( uplo, n-k+1, nb, a, i, j, desca, d, e, tau,
394 $ work, k, 1, descw, work( ipw ) )
395*
396* Update the unreduced submatrix A(I+NB:IA+N-1,I+NB:IA+N-1),
397* using an update of the form: A(I+NB:IA+N-1,I+NB:IA+N-1) :=
398* A(I+NB:IA+N-1,I+NB:IA+N-1) - V*W' - W*V'
399*
400 CALL pcher2k( uplo, 'No transpose', n-k-nb+1, nb, -cone, a,
401 $ i+nb, j, desca, work, k+nb, 1, descw, one, a,
402 $ i+nb, j+nb, desca )
403*
404* Copy last subdiagonal element back into sub( A )
405*
406 jx = min( indxg2l( j+nb-1, nb, 0, iacol, npcol ), nq )
407 CALL pcelset( a, i+nb, j+nb-1, desca, cmplx( e( jx ) ) )
408*
409 descw( csrc_ ) = mod( descw( csrc_ ) + 1, npcol )
410*
411 20 CONTINUE
412*
413* Use unblocked code to reduce the last or only block
414*
415 CALL pchetd2( uplo, kk, a, ia+k-1, ja+k-1, desca, d, e,
416 $ tau, work, lwork, iinfo )
417 END IF
418*
419 CALL pb_topset( ictxt, 'Combine', 'Columnwise', colctop )
420 CALL pb_topset( ictxt, 'Combine', 'Rowwise', rowctop )
421*
422 work( 1 ) = cmplx( real( lwmin ) )
423*
424 RETURN
425*
426* End of PCHETRD
427*
428 END
float cmplx[2]
Definition pblas.h:136
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine pcelset(a, ia, ja, desca, alpha)
Definition pcelset.f:2
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchetd2(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pchetd2.f:3
subroutine pchetrd(uplo, n, a, ia, ja, desca, d, e, tau, work, lwork, info)
Definition pchetrd.f:3
subroutine pchk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, nextra, ex, expos, info)
Definition pchkxmat.f:3
subroutine pclatrd(uplo, n, nb, a, ia, ja, desca, d, e, tau, w, iw, jw, descw, work)
Definition pclatrd.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2