SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzherk_.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "pblas.h"
14#include "PBpblas.h"
15#include "PBtools.h"
16#include "PBblacs.h"
17#include "PBblas.h"
18
19#ifdef __STDC__
20void pzherk_( F_CHAR_T UPLO, F_CHAR_T TRANS, Int * N, Int * K,
21 double * ALPHA,
22 double * A, Int * IA, Int * JA, Int * DESCA,
23 double * BETA,
24 double * C, Int * IC, Int * JC, Int * DESCC )
25#else
26void pzherk_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA,
27 C, IC, JC, DESCC )
28/*
29* .. Scalar Arguments ..
30*/
31 F_CHAR_T TRANS, UPLO;
32 Int * IA, * IC, * JA, * JC, * K, * N;
33 double * ALPHA, * BETA;
34/*
35* .. Array Arguments ..
36*/
37 Int * DESCA, * DESCC;
38 double * A, * C;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PZHERK performs one of the Hermitian rank k operations
46*
47* sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + beta*sub( C ),
48*
49* or
50*
51* sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + beta*sub( C ),
52*
53* where
54*
55* sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and,
56*
57* sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N',
58* A(IA:IA+K-1,JA:JA+N-1) otherwise.
59*
60* Alpha and beta are real scalars, sub( C ) is an n by n Hermitian
61* submatrix and sub( A ) is an n by k submatrix in the first case and a
62* k by n submatrix in the second case.
63*
64* Notes
65* =====
66*
67* A description vector is associated with each 2D block-cyclicly dis-
68* tributed matrix. This vector stores the information required to
69* establish the mapping between a matrix entry and its corresponding
70* process and memory location.
71*
72* In the following comments, the character _ should be read as
73* "of the distributed matrix". Let A be a generic term for any 2D
74* block cyclicly distributed matrix. Its description vector is DESC_A:
75*
76* NOTATION STORED IN EXPLANATION
77* ---------------- --------------- ------------------------------------
78* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
79* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
80* the NPROW x NPCOL BLACS process grid
81* A is distributed over. The context
82* itself is global, but the handle
83* (the integer value) may vary.
84* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
85* ted matrix A, M_A >= 0.
86* N_A (global) DESCA[ N_ ] The number of columns in the distri-
87* buted matrix A, N_A >= 0.
88* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
89* block of the matrix A, IMB_A > 0.
90* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
91* left block of the matrix A,
92* INB_A > 0.
93* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
94* bute the last M_A-IMB_A rows of A,
95* MB_A > 0.
96* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
97* bute the last N_A-INB_A columns of
98* A, NB_A > 0.
99* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
100* row of the matrix A is distributed,
101* NPROW > RSRC_A >= 0.
102* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
103* first column of A is distributed.
104* NPCOL > CSRC_A >= 0.
105* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
106* array storing the local blocks of
107* the distributed matrix A,
108* IF( Lc( 1, N_A ) > 0 )
109* LLD_A >= MAX( 1, Lr( 1, M_A ) )
110* ELSE
111* LLD_A >= 1.
112*
113* Let K be the number of rows of a matrix A starting at the global in-
114* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
115* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
116* receive if these K rows were distributed over NPROW processes. If K
117* is the number of columns of a matrix A starting at the global index
118* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
119* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
120* these K columns were distributed over NPCOL processes.
121*
122* The values of Lr() and Lc() may be determined via a call to the func-
123* tion PB_Cnumroc:
124* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
125* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
126*
127* Arguments
128* =========
129*
130* UPLO (global input) CHARACTER*1
131* On entry, UPLO specifies whether the local pieces of
132* the array C containing the upper or lower triangular part
133* of the Hermitian submatrix sub( C ) are to be referenced as
134* follows:
135*
136* UPLO = 'U' or 'u' Only the local pieces corresponding to
137* the upper triangular part of the
138* Hermitian submatrix sub( C ) are to be
139* referenced,
140*
141* UPLO = 'L' or 'l' Only the local pieces corresponding to
142* the lower triangular part of the
143* Hermitian submatrix sub( C ) are to be
144* referenced.
145*
146* TRANS (global input) CHARACTER*1
147* On entry, TRANS specifies the operation to be performed as
148* follows:
149*
150* TRANS = 'N' or 'n'
151* sub( C ) := alpha*sub( A )*conjg( sub( A )' ) +
152* beta*sub( C ),
153*
154* TRANS = 'C' or 'c'
155* sub( C ) := alpha*conjg( sub( A )' )*sub( A ) +
156* beta*sub( C ).
157*
158* N (global input) INTEGER
159* On entry, N specifies the order of the submatrix sub( C ).
160* N must be at least zero.
161*
162* K (global input) INTEGER
163* On entry, with TRANS = 'N' or 'n', K specifies the number of
164* columns of the submatrix sub( A ), and with TRANS = 'C' or
165* 'c', K specifies the number of rows of the submatrix
166* sub( A ). K must be at least zero.
167*
168* ALPHA (global input) DOUBLE PRECISION
169* On entry, ALPHA specifies the scalar alpha. When ALPHA is
170* supplied as zero then the local entries of the array A
171* corresponding to the entries of the submatrix sub( A ) need
172* not be set on input.
173*
174* A (local input) COMPLEX*16 array
175* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
176* at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at
177* least Lc( 1, JA+N-1 ) otherwise. Before entry, this array
178* contains the local entries of the matrix A.
179* Before entry with TRANS = 'N' or 'n', this array contains the
180* local entries corresponding to the entries of the n by k sub-
181* matrix sub( A ), otherwise the local entries corresponding to
182* the entries of the k by n submatrix sub( A ).
183*
184* IA (global input) INTEGER
185* On entry, IA specifies A's global row index, which points to
186* the beginning of the submatrix sub( A ).
187*
188* JA (global input) INTEGER
189* On entry, JA specifies A's global column index, which points
190* to the beginning of the submatrix sub( A ).
191*
192* DESCA (global and local input) INTEGER array
193* On entry, DESCA is an integer array of dimension DLEN_. This
194* is the array descriptor for the matrix A.
195*
196* BETA (global input) DOUBLE PRECISION
197* On entry, BETA specifies the scalar beta. When BETA is
198* supplied as zero then the local entries of the array C
199* corresponding to the entries of the submatrix sub( C ) need
200* not be set on input.
201*
202* C (local input/local output) COMPLEX*16 array
203* On entry, C is an array of dimension (LLD_C, Kc), where Kc is
204* at least Lc( 1, JC+N-1 ). Before entry, this array contains
205* the local entries of the matrix C.
206* Before entry with UPLO = 'U' or 'u', this array contains
207* the local entries corresponding to the upper triangular part
208* of the Hermitian submatrix sub( C ), and the local entries
209* corresponding to the strictly lower triangular of sub( C )
210* are not referenced. On exit, the upper triangular part of
211* sub( C ) is overwritten by the upper triangular part of the
212* updated submatrix.
213* Before entry with UPLO = 'L' or 'l', this array contains
214* the local entries corresponding to the lower triangular part
215* of the Hermitian submatrix sub( C ), and the local entries
216* corresponding to the strictly upper triangular of sub( C )
217* are not referenced. On exit, the lower triangular part of
218* sub( C ) is overwritten by the lower triangular part of the
219* updated submatrix.
220* Note that the imaginary parts of the local entries corres-
221* ponding to the diagonal elements of sub( C ) need not be
222* set, they are assumed to be zero, and on exit they are set
223* to zero.
224*
225* IC (global input) INTEGER
226* On entry, IC specifies C's global row index, which points to
227* the beginning of the submatrix sub( C ).
228*
229* JC (global input) INTEGER
230* On entry, JC specifies C's global column index, which points
231* to the beginning of the submatrix sub( C ).
232*
233* DESCC (global and local input) INTEGER array
234* On entry, DESCC is an integer array of dimension DLEN_. This
235* is the array descriptor for the matrix C.
236*
237* -- Written on April 1, 1998 by
238* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
239*
240* ---------------------------------------------------------------------
241*/
242/*
243* .. Local Scalars ..
244*/
245 char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave,
246 rtop, rtopsave;
247 Int Ai, Aj, ChooseAC, Ci, Cj, ForceTop, ctxt, info, mycol,
248 myrow, nb, notran, npcol, nprow, upper;
249 double Aest, ACest, tmp1, tmp2, tmp3, tmp4;
250 cmplx16 Calph;
251 PBTYP_T * type;
252/*
253* .. Local Arrays ..
254*/
255 Int Ad[DLEN_], Cd[DLEN_];
256/* ..
257* .. Executable Statements ..
258*
259*/
260 upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
261 notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
262 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
263 PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd );
264#ifndef NO_ARGCHK
265/*
266* Test the input parameters
267*/
268 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
269 if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) )
270 {
271 if( ( !upper ) && ( UploC != CLOWER ) )
272 {
273 PB_Cwarn( ctxt, __LINE__, "PZHERK", "Illegal UPLO = %c\n", UploC );
274 info = -1;
275 }
276 else if( ( !notran ) && ( TranOp != CCOTRAN ) )
277 {
278 PB_Cwarn( ctxt, __LINE__, "PZHERK", "Illegal TRANS = %c\n", TranOp );
279 info = -2;
280 }
281 if( notran )
282 PB_Cchkmat( ctxt, "PZHERK", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9,
283 &info );
284 else
285 PB_Cchkmat( ctxt, "PZHERK", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9,
286 &info );
287 PB_Cchkmat( ctxt, "PZHERK", "C", *N, 3, *N, 3, Ci, Cj, Cd, 14,
288 &info );
289 }
290 if( info ) { PB_Cabort( ctxt, "PZHERK", info ); return; }
291#endif
292/*
293* Quick return if possible
294*/
295 if( ( *N == 0 ) ||
296 ( ( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) &&
297 ( BETA[REAL_PART] == ONE ) ) )
298 return;
299/*
300* Get type structure
301*/
302 type = PB_Cztypeset();
303/*
304* And when alpha or K is zero
305*/
306 if( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) )
307 {
308 if( BETA[REAL_PART] == ZERO )
309 {
310 PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero,
311 ((char *) C), Ci, Cj, Cd );
312 }
313 else
314 {
315 PB_Cplascal( type, &UploC, CONJG, *N, *N, ((char *) BETA),
316 ((char *) C), Ci, Cj, Cd );
317 }
318 return;
319 }
320/*
321* Start the operations
322*/
323#ifdef NO_ARGCHK
324 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
325#endif
326 Calph[REAL_PART] = ALPHA[REAL_PART];
327 Calph[IMAG_PART] = ZERO;
328/*
329* Algorithm selection is based on approximation of the communication volume
330* for distributed and aligned operands.
331*
332* ACest: both operands sub( A ) and sub( C ) are communicated (K >> N)
333* Aest : only sub( A ) is communicated (N >> K)
334*/
335 if( notran )
336 {
337 tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol );
338 ACest = (double)(*N) *
339 ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) +
340 ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO :
341 CBRATIO * tmp1 / TWO ) );
342 tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol );
343 tmp4 = DNROC( *N, Ad[MB_], nprow );
344 Aest = (double)(*K) *
345 ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) +
346 ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) );
347 }
348 else
349 {
350 tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow );
351 ACest = (double)(*N) *
352 ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) +
353 ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO :
354 CBRATIO * tmp2 / TWO ) );
355 tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol );
356 tmp3 = DNROC( *N, Ad[NB_], npcol );
357 Aest = (double)(*K) *
358 ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) +
359 ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) );
360 }
361/*
362* Shift a little the cross-over point between both algorithms.
363*/
364 ChooseAC = ( ( 1.3 * ACest ) <= Aest );
365/*
366* BLACS topologies are enforced iff N and K are strictly greater than the
367* logical block size returned by pilaenv_. Otherwise, it is assumed that the
368* routine calling this routine has already selected an adequate topology.
369*/
370 nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) );
371 ForceTop = ( ( *N > nb ) && ( *K > nb ) );
372
373 if( ChooseAC )
374 {
375 if( notran )
376 {
377 OpC = CBCAST;
378 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
379
380 if( ForceTop )
381 {
382 OpR = CCOMBINE;
383 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
384
385 rtopsave = rtop;
386 ctopsave = ctop;
387
388 if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; }
389 else { TopR = CTOP_DRING; TopC = CTOP_IRING; }
390
391 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC );
392 rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR );
393/*
394* Remove the next line when the BLACS combine operations support ring
395* topologies
396*/
397 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT );
398 }
399
400 DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
401 }
402 else
403 {
404 OpR = CBCAST;
405 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
406
407 if( ForceTop )
408 {
409 OpC = CCOMBINE;
410 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
411
412 rtopsave = rtop;
413 ctopsave = ctop;
414
415 if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; }
416 else { TopR = CTOP_DRING; TopC = CTOP_IRING; }
417
418 rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR );
419 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC );
420/*
421* Remove the next line when the BLACS combine operations support ring
422* topologies
423*/
424 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT );
425 }
426 DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
427 }
428
429 PB_CpsyrkAC( type, &DirA, CONJG, &UploC, ( notran ? NOTRAN : COTRAN ),
430 *N, *K, ((char *)Calph), ((char *)A), Ai, Aj, Ad,
431 ((char *)BETA), ((char *)C), Ci, Cj, Cd );
432 }
433 else
434 {
435 if( notran )
436 {
437 OpR = CBCAST;
438 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
439
440 if( ForceTop )
441 {
442 OpC = CBCAST;
443 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
444
445 rtopsave = rtop;
446 ctopsave = ctop;
447/*
448* No clear winner for the ring topologies, so that if a ring topology is
449* already selected, keep it.
450*/
451 if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) &&
452 ( rtop != CTOP_SRING ) )
453 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING );
454 if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) &&
455 ( ctop != CTOP_SRING ) )
456 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING );
457 }
458
459 DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
460 }
461 else
462 {
463 OpC = CBCAST;
464 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
465
466 if( ForceTop )
467 {
468 OpR = CBCAST;
469 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
470
471 rtopsave = rtop;
472 ctopsave = ctop;
473/*
474* No clear winner for the ring topologies, so that if a ring topology is
475* already selected, keep it.
476*/
477 if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) &&
478 ( rtop != CTOP_SRING ) )
479 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING );
480 if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) &&
481 ( ctop != CTOP_SRING ) )
482 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING );
483 }
484
485 DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
486 }
487
488 PB_CpsyrkA( type, &DirA, CONJG, &UploC, ( notran ? NOTRAN : COTRAN ),
489 *N, *K, ((char *)Calph), ((char *)A), Ai, Aj, Ad,
490 ((char *)BETA), ((char *)C), Ci, Cj, Cd );
491 }
492/*
493* Restore the BLACS topologies when necessary.
494*/
495 if( ForceTop )
496 {
497 rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave );
498 ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave );
499 }
500/*
501* End of PZHERK
502*/
503}
#define Int
Definition Bconfig.h:22
#define REAL_PART
Definition pblas.h:139
double cmplx16[2]
Definition pblas.h:137
#define F2C_CHAR(a)
Definition pblas.h:124
#define CBRATIO
Definition pblas.h:37
#define C2F_CHAR(a)
Definition pblas.h:125
#define IMAG_PART
Definition pblas.h:140
char * F_CHAR_T
Definition pblas.h:122
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define CTOP_SRING
Definition PBblacs.h:29
#define TOP_DEFAULT
Definition PBblacs.h:51
#define CCOMBINE
Definition PBblacs.h:24
#define ROW
Definition PBblacs.h:46
#define CBCAST
Definition PBblacs.h:23
#define TOP_SRING
Definition PBblacs.h:54
void Cblacs_gridinfo()
#define CTOP_IRING
Definition PBblacs.h:27
#define CTOP_DRING
Definition PBblacs.h:28
#define NOTRAN
Definition PBblas.h:44
#define CONJG
Definition PBblas.h:47
#define CBACKWARD
Definition PBblas.h:39
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CNOTRAN
Definition PBblas.h:18
#define COTRAN
Definition PBblas.h:48
#define CCOTRAN
Definition PBblas.h:22
#define CFORWARD
Definition PBblas.h:38
#define CLOWER
Definition PBblas.h:25
#define pzherk_
Definition PBpblas.h:178
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
void PB_CpsyrkAC()
#define MAX(a_, b_)
Definition PBtools.h:77
#define MB_
Definition PBtools.h:43
void PB_Cabort()
#define ONE
Definition PBtools.h:64
void PB_CpsyrkA()
void PB_Cchkmat()
void PB_Cwarn()
char * PB_Ctop()
PBTYP_T * PB_Cztypeset()
void PB_Cplapad()
void PB_Cplascal()
#define RSRC_
Definition PBtools.h:45
#define TWO
Definition PBtools.h:65
void PB_CargFtoC()
#define CSRC_
Definition PBtools.h:46
#define ZERO
Definition PBtools.h:66
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
#define DNROC(n_, nb_, p_)
Definition PBtools.h:111
char type
Definition pblas.h:331
char * zero
Definition pblas.h:335