SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pzher2_.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "pblas.h"
14#include "PBpblas.h"
15#include "PBtools.h"
16#include "PBblacs.h"
17#include "PBblas.h"
18
19#ifdef __STDC__
20void pzher2_( F_CHAR_T UPLO, Int * N, double * ALPHA,
21 double * X, Int * IX, Int * JX, Int * DESCX, Int * INCX,
22 double * Y, Int * IY, Int * JY, Int * DESCY, Int * INCY,
23 double * A, Int * IA, Int * JA, Int * DESCA )
24#else
25void pzher2_( UPLO, N, ALPHA, X, IX, JX, DESCX, INCX, Y, IY, JY,
26 DESCY, INCY, A, IA, JA, DESCA )
27/*
28* .. Scalar Arguments ..
29*/
30 F_CHAR_T UPLO;
31 Int * IA, * INCX, * INCY, * IX, * IY, * JA, * JX, * JY,
32 * N;
33 double * ALPHA;
34/*
35* .. Array Arguments ..
36*/
37 Int * DESCA, * DESCX, * DESCY;
38 double * A, * X, * Y;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PZHER2 performs the Hermitian rank 2 operation
46*
47* sub( A ) := alpha*sub( X )*conjg( sub( Y )' ) +
48* conjg( alpha )*sub( Y )*conjg( sub( X )' ) + sub( A ),
49*
50* where
51*
52* sub( A ) denotes A(IA:IA+N-1,JA:JA+N-1),
53*
54* sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
55* X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and,
56*
57* sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
58* Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
59*
60* Alpha is a scalar, sub( X ) and sub( Y ) are n element subvectors and
61* sub( A ) is an n by n Hermitian submatrix.
62*
63* Notes
64* =====
65*
66* A description vector is associated with each 2D block-cyclicly dis-
67* tributed matrix. This vector stores the information required to
68* establish the mapping between a matrix entry and its corresponding
69* process and memory location.
70*
71* In the following comments, the character _ should be read as
72* "of the distributed matrix". Let A be a generic term for any 2D
73* block cyclicly distributed matrix. Its description vector is DESC_A:
74*
75* NOTATION STORED IN EXPLANATION
76* ---------------- --------------- ------------------------------------
77* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
78* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
79* the NPROW x NPCOL BLACS process grid
80* A is distributed over. The context
81* itself is global, but the handle
82* (the integer value) may vary.
83* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
84* ted matrix A, M_A >= 0.
85* N_A (global) DESCA[ N_ ] The number of columns in the distri-
86* buted matrix A, N_A >= 0.
87* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
88* block of the matrix A, IMB_A > 0.
89* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
90* left block of the matrix A,
91* INB_A > 0.
92* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
93* bute the last M_A-IMB_A rows of A,
94* MB_A > 0.
95* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
96* bute the last N_A-INB_A columns of
97* A, NB_A > 0.
98* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
99* row of the matrix A is distributed,
100* NPROW > RSRC_A >= 0.
101* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
102* first column of A is distributed.
103* NPCOL > CSRC_A >= 0.
104* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
105* array storing the local blocks of
106* the distributed matrix A,
107* IF( Lc( 1, N_A ) > 0 )
108* LLD_A >= MAX( 1, Lr( 1, M_A ) )
109* ELSE
110* LLD_A >= 1.
111*
112* Let K be the number of rows of a matrix A starting at the global in-
113* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
114* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
115* receive if these K rows were distributed over NPROW processes. If K
116* is the number of columns of a matrix A starting at the global index
117* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
118* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
119* these K columns were distributed over NPCOL processes.
120*
121* The values of Lr() and Lc() may be determined via a call to the func-
122* tion PB_Cnumroc:
123* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
124* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
125*
126* Arguments
127* =========
128*
129* UPLO (global input) CHARACTER*1
130* On entry, UPLO specifies whether the local pieces of
131* the array A containing the upper or lower triangular part
132* of the Hermitian submatrix sub( A ) are to be referenced as
133* follows:
134*
135* UPLO = 'U' or 'u' Only the local pieces corresponding to
136* the upper triangular part of the
137* Hermitian submatrix sub( A ) are to be
138* referenced,
139*
140* UPLO = 'L' or 'l' Only the local pieces corresponding to
141* the lower triangular part of the
142* Hermitian submatrix sub( A ) are to be
143* referenced.
144*
145* N (global input) INTEGER
146* On entry, N specifies the order of the submatrix sub( A ).
147* N must be at least zero.
148*
149* ALPHA (global input) COMPLEX*16
150* On entry, ALPHA specifies the scalar alpha. When ALPHA is
151* supplied as zero then the local entries of the arrays X
152* and Y corresponding to the entries of the subvectors sub( X )
153* and sub( Y ) respectively need not be set on input.
154*
155* X (local input) COMPLEX*16 array
156* On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
157* is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
158* MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least
159* Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
160* Before entry, this array contains the local entries of the
161* matrix X.
162*
163* IX (global input) INTEGER
164* On entry, IX specifies X's global row index, which points to
165* the beginning of the submatrix sub( X ).
166*
167* JX (global input) INTEGER
168* On entry, JX specifies X's global column index, which points
169* to the beginning of the submatrix sub( X ).
170*
171* DESCX (global and local input) INTEGER array
172* On entry, DESCX is an integer array of dimension DLEN_. This
173* is the array descriptor for the matrix X.
174*
175* INCX (global input) INTEGER
176* On entry, INCX specifies the global increment for the
177* elements of X. Only two values of INCX are supported in
178* this version, namely 1 and M_X. INCX must not be zero.
179*
180* Y (local input) COMPLEX*16 array
181* On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
182* is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
183* MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least
184* Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
185* Before entry, this array contains the local entries of the
186* matrix Y.
187*
188* IY (global input) INTEGER
189* On entry, IY specifies Y's global row index, which points to
190* the beginning of the submatrix sub( Y ).
191*
192* JY (global input) INTEGER
193* On entry, JY specifies Y's global column index, which points
194* to the beginning of the submatrix sub( Y ).
195*
196* DESCY (global and local input) INTEGER array
197* On entry, DESCY is an integer array of dimension DLEN_. This
198* is the array descriptor for the matrix Y.
199*
200* INCY (global input) INTEGER
201* On entry, INCY specifies the global increment for the
202* elements of Y. Only two values of INCY are supported in
203* this version, namely 1 and M_Y. INCY must not be zero.
204*
205* A (local input/local output) COMPLEX*16 array
206* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
207* at least Lc( 1, JA+N-1 ). Before entry, this array contains
208* the local entries of the matrix A.
209* Before entry with UPLO = 'U' or 'u', this array contains
210* the local entries corresponding to the upper triangular part
211* of the Hermitian submatrix sub( A ), and the local entries
212* corresponding to the strictly lower triangular of sub( A )
213* are not referenced. On exit, the upper triangular part of
214* sub( A ) is overwritten by the upper triangular part of the
215* updated submatrix.
216* Before entry with UPLO = 'L' or 'l', this array contains
217* the local entries corresponding to the lower triangular part
218* of the Hermitian submatrix sub( A ), and the local entries
219* corresponding to the strictly upper triangular of sub( A )
220* are not referenced. On exit, the lower triangular part of
221* sub( A ) is overwritten by the lower triangular part of the
222* updated submatrix.
223* Note that the imaginary parts of the local entries corres-
224* ponding to the diagonal elements of sub( A ) need not be
225* set, they are assumed to be zero, and on exit they are set
226* to zero.
227*
228* IA (global input) INTEGER
229* On entry, IA specifies A's global row index, which points to
230* the beginning of the submatrix sub( A ).
231*
232* JA (global input) INTEGER
233* On entry, JA specifies A's global column index, which points
234* to the beginning of the submatrix sub( A ).
235*
236* DESCA (global and local input) INTEGER array
237* On entry, DESCA is an integer array of dimension DLEN_. This
238* is the array descriptor for the matrix A.
239*
240* -- Written on April 1, 1998 by
241* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
242*
243* ---------------------------------------------------------------------
244*/
245/*
246* .. Local Scalars ..
247*/
248 char UploA;
249 Int Acol, Ai, Aii, Aimb1, Ainb1, Aj, Ajj, Akp, Akq, Ald, Amb,
250 Amp, Amp0, Anb, Anq, Anq0, Arow, XCfr, XCld, XRfr, XRld,
251 Xi, Xj, YCfr, YCld, YRfr, YRld, Yi, Yj, ctxt, info, ione=1,
252 k, kb, ktmp, mycol, myrow, nb, npcol, nprow, size, upper;
253 cmplx16 Calpha;
254 PBTYP_T * type;
255/*
256* .. Local Arrays ..
257*/
258 Int Ad [DLEN_], Ad0 [DLEN_], XCd0[DLEN_], XRd0[DLEN_], Xd[DLEN_],
259 YCd0[DLEN_], YRd0[DLEN_], Yd [DLEN_];
260 char * Aptr = NULL, * XC = NULL, * XR = NULL, * YC = NULL,
261 * YR = NULL;
262/* ..
263* .. Executable Statements ..
264*
265*/
266 upper = ( ( UploA = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
267 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
268 PB_CargFtoC( *IX, *JX, DESCX, &Xi, &Xj, Xd );
269 PB_CargFtoC( *IY, *JY, DESCY, &Yi, &Yj, Yd );
270#ifndef NO_ARGCHK
271/*
272* Test the input parameters
273*/
274 Cblacs_gridinfo( ( ctxt = Xd[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
275 if( !( info = ( ( nprow == -1 ) ? -( 701 + CTXT_ ) : 0 ) ) )
276 {
277 if( ( !upper ) && ( UploA != CLOWER ) )
278 {
279 PB_Cwarn( ctxt, __LINE__, "PZHER2", "Illegal UPLO = %c\n", UploA );
280 info = -1;
281 }
282 PB_Cchkvec( ctxt, "PZHER2", "X", *N, 2, Xi, Xj, Xd, *INCX, 7, &info );
283 PB_Cchkvec( ctxt, "PZHER2", "Y", *N, 2, Yi, Yj, Yd, *INCY, 12, &info );
284 PB_Cchkmat( ctxt, "PZHER2", "A", *N, 2, *N, 2, Ai, Aj, Ad, 17, &info );
285 }
286 if( info ) { PB_Cabort( ctxt, "PZHER2", info ); return; }
287#endif
288/*
289* Quick return if possible
290*/
291 if( ( *N == 0 ) ||
292 ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) )
293 return;
294/*
295* Retrieve process grid information
296*/
297#ifdef NO_ARGCHK
298 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
299#endif
300/*
301* Get type structure
302*/
303 type = PB_Cztypeset();
304/*
305* Compute descriptor Ad0 for sub( A )
306*/
307 PB_Cdescribe( *N, *N, Ai, Aj, Ad, nprow, npcol, myrow, mycol, &Aii, &Ajj,
308 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
309/*
310* Replicate sub( X ) in process rows (XR) and process columns (XC) spanned by
311* sub( A )
312*/
313 if( *INCX == Xd[M_] )
314 {
315 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) X), Xi, Xj,
316 Xd, ROW, &XR, XRd0, &XRfr );
317 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, XR, 0, 0,
318 XRd0, ROW, &XC, XCd0, &XCfr );
319 }
320 else
321 {
322 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) X), Xi, Xj,
323 Xd, COLUMN, &XC, XCd0, &XCfr );
324 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, XC, 0, 0,
325 XCd0, COLUMN, &XR, XRd0, &XRfr );
326 }
327/*
328* Replicate sub( Y ) in process rows (YR) and process columns (YC) spanned by
329* sub( A )
330*/
331 if( *INCY == Yd[M_] )
332 {
333 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj,
334 Yd, ROW, &YR, YRd0, &YRfr );
335 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, YR, 0, 0,
336 YRd0, ROW, &YC, YCd0, &YCfr );
337 }
338 else
339 {
340 PB_CInV( type, NOCONJG, COLUMN, *N, *N, Ad0, 1, ((char *) Y), Yi, Yj,
341 Yd, COLUMN, &YC, YCd0, &YCfr );
342 PB_CInV( type, NOCONJG, ROW, *N, *N, Ad0, 1, YC, 0, 0,
343 YCd0, COLUMN, &YR, YRd0, &YRfr );
344 }
345/*
346* Local rank-2 update if I own some data
347*/
348 Amp = PB_Cnumroc( *N, 0, Aimb1, Amb, myrow, Arow, nprow );
349 Anq = PB_Cnumroc( *N, 0, Ainb1, Anb, mycol, Acol, npcol );
350
351 if( ( Amp > 0 ) && ( Anq > 0 ) )
352 {
353 size = type->size;
354 Aptr = Mptr( ((char *) A), Aii, Ajj, Ald, size );
355
356 XCld = XCd0[LLD_]; YCld = YCd0[LLD_];
357 XRld = XRd0[LLD_]; YRld = YRd0[LLD_];
358 Calpha[REAL_PART] = ALPHA[REAL_PART];
359 Calpha[IMAG_PART] = -ALPHA[IMAG_PART];
360/*
361* Computational partitioning size is computed as the product of the logical
362* value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
363*/
364 nb = 2 * pilaenv_( &ctxt, C2F_CHAR( &type->type ) ) *
365 PB_Clcm( ( Arow >= 0 ? nprow : 1 ), ( Acol >= 0 ? npcol : 1 ) );
366 if( upper )
367 {
368 for( k = 0; k < *N; k += nb )
369 {
370 kb = *N - k; kb = MIN( kb, nb );
371 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
372 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
373 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
374 if( Akp > 0 && Anq0 > 0 )
375 {
376 zgerc_( &Akp, &Anq0, ((char *) ALPHA), XC, &ione,
377 Mptr( YR, 0, Akq, YRld, size ), &YRld,
378 Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
379 zgerc_( &Akp, &Anq0, ((char *) Calpha), YC, &ione,
380 Mptr( XR, 0, Akq, XRld, size ), &XRld,
381 Mptr( Aptr, 0, Akq, Ald, size ), &Ald );
382 }
383 PB_Cpsyr2( type, UPPER, kb, 1, ((char *) ALPHA),
384 Mptr( XC, Akp, 0, XCld, size ), XCld,
385 Mptr( XR, 0, Akq, XRld, size ), XRld,
386 Mptr( YC, Akp, 0, YCld, size ), YCld,
387 Mptr( YR, 0, Akq, YRld, size ), YRld,
388 Aptr, k, k, Ad0, PB_Ctzher2 );
389 }
390 }
391 else
392 {
393 for( k = 0; k < *N; k += nb )
394 {
395 kb = *N - k; ktmp = k + ( kb = MIN( kb, nb ) );
396 Akp = PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
397 Akq = PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
398 PB_Cpsyr2( type, LOWER, kb, 1, ((char *) ALPHA),
399 Mptr( XC, Akp, 0, XCld, size ), XCld,
400 Mptr( XR, 0, Akq, XRld, size ), XRld,
401 Mptr( YC, Akp, 0, YCld, size ), YCld,
402 Mptr( YR, 0, Akq, YRld, size ), YRld,
403 Aptr, k, k, Ad0, PB_Ctzher2 );
404 Akp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
405 Amp0 = Amp - Akp;
406 Anq0 = PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
407 if( Amp0 > 0 && Anq0 > 0 )
408 {
409 zgerc_( &Amp0, &Anq0, ((char *) ALPHA),
410 Mptr( XC, Akp, 0, XCld, size ), &ione,
411 Mptr( YR, 0, Akq, YRld, size ), &YRld,
412 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
413 zgerc_( &Amp0, &Anq0, ((char *) Calpha),
414 Mptr( YC, Akp, 0, YCld, size ), &ione,
415 Mptr( XR, 0, Akq, XRld, size ), &XRld,
416 Mptr( Aptr, Akp, Akq, Ald, size ), &Ald );
417 }
418 }
419 }
420 }
421 if( XRfr ) free( XR );
422 if( XCfr ) free( XC );
423 if( YRfr ) free( YR );
424 if( YCfr ) free( YC );
425/*
426* End of PZHER2
427*/
428}
#define Int
Definition Bconfig.h:22
#define REAL_PART
Definition pblas.h:139
double cmplx16[2]
Definition pblas.h:137
#define F2C_CHAR(a)
Definition pblas.h:124
#define C2F_CHAR(a)
Definition pblas.h:125
#define IMAG_PART
Definition pblas.h:140
char * F_CHAR_T
Definition pblas.h:122
#define COLUMN
Definition PBblacs.h:45
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define zgerc_
Definition PBblas.h:164
#define LOWER
Definition PBblas.h:51
#define UPPER
Definition PBblas.h:52
#define CLOWER
Definition PBblas.h:25
#define pzher2_
Definition PBpblas.h:144
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
void PB_Cabort()
void PB_Cchkvec()
void PB_Ctzher2()
void PB_Cchkmat()
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
void PB_Cwarn()
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
void PB_CInV()
PBTYP_T * PB_Cztypeset()
#define M_
Definition PBtools.h:39
void PB_CargFtoC()
Int PB_Clcm()
#define ZERO
Definition PBtools.h:66
void PB_Cpsyr2()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
void PB_Cdescribe()
char type
Definition pblas.h:331
Int size
Definition pblas.h:333