SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzsyrk_.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "pblas.h"
14#include "PBpblas.h"
15#include "PBtools.h"
16#include "PBblacs.h"
17#include "PBblas.h"
18
19#ifdef __STDC__
20void pzsyrk_( F_CHAR_T UPLO, F_CHAR_T TRANS, Int * N, Int * K,
21 double * ALPHA,
22 double * A, Int * IA, Int * JA, Int * DESCA,
23 double * BETA,
24 double * C, Int * IC, Int * JC, Int * DESCC )
25#else
26void pzsyrk_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA,
27 C, IC, JC, DESCC )
28/*
29* .. Scalar Arguments ..
30*/
31 F_CHAR_T TRANS, UPLO;
32 Int * IA, * IC, * JA, * JC, * K, * N;
33 double * ALPHA, * BETA;
34/*
35* .. Array Arguments ..
36*/
37 Int * DESCA, * DESCC;
38 double * A, * C;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PZSYRK performs one of the symmetric rank k operations
46*
47* sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
48*
49* or
50*
51* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
52*
53* where
54*
55* sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and,
56*
57* sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N',
58* A(IA:IA+K-1,JA:JA+N-1) otherwise.
59*
60* Alpha and beta are scalars, sub( C ) is an n by n symmetric
61* submatrix and sub( A ) is an n by k submatrix in the first case and a
62* k by n submatrix in the second case.
63*
64* Notes
65* =====
66*
67* A description vector is associated with each 2D block-cyclicly dis-
68* tributed matrix. This vector stores the information required to
69* establish the mapping between a matrix entry and its corresponding
70* process and memory location.
71*
72* In the following comments, the character _ should be read as
73* "of the distributed matrix". Let A be a generic term for any 2D
74* block cyclicly distributed matrix. Its description vector is DESC_A:
75*
76* NOTATION STORED IN EXPLANATION
77* ---------------- --------------- ------------------------------------
78* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
79* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
80* the NPROW x NPCOL BLACS process grid
81* A is distributed over. The context
82* itself is global, but the handle
83* (the integer value) may vary.
84* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
85* ted matrix A, M_A >= 0.
86* N_A (global) DESCA[ N_ ] The number of columns in the distri-
87* buted matrix A, N_A >= 0.
88* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
89* block of the matrix A, IMB_A > 0.
90* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
91* left block of the matrix A,
92* INB_A > 0.
93* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
94* bute the last M_A-IMB_A rows of A,
95* MB_A > 0.
96* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
97* bute the last N_A-INB_A columns of
98* A, NB_A > 0.
99* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
100* row of the matrix A is distributed,
101* NPROW > RSRC_A >= 0.
102* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
103* first column of A is distributed.
104* NPCOL > CSRC_A >= 0.
105* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
106* array storing the local blocks of
107* the distributed matrix A,
108* IF( Lc( 1, N_A ) > 0 )
109* LLD_A >= MAX( 1, Lr( 1, M_A ) )
110* ELSE
111* LLD_A >= 1.
112*
113* Let K be the number of rows of a matrix A starting at the global in-
114* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
115* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
116* receive if these K rows were distributed over NPROW processes. If K
117* is the number of columns of a matrix A starting at the global index
118* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
119* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
120* these K columns were distributed over NPCOL processes.
121*
122* The values of Lr() and Lc() may be determined via a call to the func-
123* tion PB_Cnumroc:
124* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
125* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
126*
127* Arguments
128* =========
129*
130* UPLO (global input) CHARACTER*1
131* On entry, UPLO specifies whether the local pieces of
132* the array C containing the upper or lower triangular part
133* of the symmetric submatrix sub( C ) are to be referenced as
134* follows:
135*
136* UPLO = 'U' or 'u' Only the local pieces corresponding to
137* the upper triangular part of the
138* symmetric submatrix sub( C ) are to be
139* referenced,
140*
141* UPLO = 'L' or 'l' Only the local pieces corresponding to
142* the lower triangular part of the
143* symmetric submatrix sub( C ) are to be
144* referenced.
145*
146* TRANS (global input) CHARACTER*1
147* On entry, TRANS specifies the operation to be performed as
148* follows:
149*
150* TRANS = 'N' or 'n'
151* sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
152*
153* TRANS = 'T' or 't'
154* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ).
155*
156* N (global input) INTEGER
157* On entry, N specifies the order of the submatrix sub( C ).
158* N must be at least zero.
159*
160* K (global input) INTEGER
161* On entry, with TRANS = 'N' or 'n', K specifies the number of
162* columns of the submatrix sub( A ), and with TRANS = 'T' or
163* 't', K specifies the number of rows of the submatrix
164* sub( A ). K must be at least zero.
165*
166* ALPHA (global input) COMPLEX*16
167* On entry, ALPHA specifies the scalar alpha. When ALPHA is
168* supplied as zero then the local entries of the array A
169* corresponding to the entries of the submatrix sub( A ) need
170* not be set on input.
171*
172* A (local input) COMPLEX*16 array
173* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
174* at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at
175* least Lc( 1, JA+N-1 ) otherwise. Before entry, this array
176* contains the local entries of the matrix A.
177* Before entry with TRANS = 'N' or 'n', this array contains the
178* local entries corresponding to the entries of the n by k sub-
179* matrix sub( A ), otherwise the local entries corresponding to
180* the entries of the k by n submatrix sub( A ).
181*
182* IA (global input) INTEGER
183* On entry, IA specifies A's global row index, which points to
184* the beginning of the submatrix sub( A ).
185*
186* JA (global input) INTEGER
187* On entry, JA specifies A's global column index, which points
188* to the beginning of the submatrix sub( A ).
189*
190* DESCA (global and local input) INTEGER array
191* On entry, DESCA is an integer array of dimension DLEN_. This
192* is the array descriptor for the matrix A.
193*
194* BETA (global input) COMPLEX*16
195* On entry, BETA specifies the scalar beta. When BETA is
196* supplied as zero then the local entries of the array C
197* corresponding to the entries of the submatrix sub( C ) need
198* not be set on input.
199*
200* C (local input/local output) COMPLEX*16 array
201* On entry, C is an array of dimension (LLD_C, Kc), where Kc is
202* at least Lc( 1, JC+N-1 ). Before entry, this array contains
203* the local entries of the matrix C.
204* Before entry with UPLO = 'U' or 'u', this array contains
205* the local entries corresponding to the upper triangular part
206* of the symmetric submatrix sub( C ), and the local entries
207* corresponding to the strictly lower triangular of sub( C )
208* are not referenced. On exit, the upper triangular part of
209* sub( C ) is overwritten by the upper triangular part of the
210* updated submatrix.
211* Before entry with UPLO = 'L' or 'l', this array contains
212* the local entries corresponding to the lower triangular part
213* of the symmetric submatrix sub( C ), and the local entries
214* corresponding to the strictly upper triangular of sub( C )
215* are not referenced. On exit, the lower triangular part of
216* sub( C ) is overwritten by the lower triangular part of the
217* updated submatrix.
218*
219* IC (global input) INTEGER
220* On entry, IC specifies C's global row index, which points to
221* the beginning of the submatrix sub( C ).
222*
223* JC (global input) INTEGER
224* On entry, JC specifies C's global column index, which points
225* to the beginning of the submatrix sub( C ).
226*
227* DESCC (global and local input) INTEGER array
228* On entry, DESCC is an integer array of dimension DLEN_. This
229* is the array descriptor for the matrix C.
230*
231* -- Written on April 1, 1998 by
232* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
233*
234* ---------------------------------------------------------------------
235*/
236/*
237* .. Local Scalars ..
238*/
239 char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave,
240 rtop, rtopsave;
241 Int Ai, Aj, ChooseAC, Ci, Cj, ForceTop, ctxt, info, mycol,
242 myrow, nb, notran, npcol, nprow, upper;
243 double Aest, ACest, tmp1, tmp2, tmp3, tmp4;
244 PBTYP_T * type;
245/*
246* .. Local Arrays ..
247*/
248 Int Ad[DLEN_], Cd[DLEN_];
249/* ..
250* .. Executable Statements ..
251*
252*/
253 upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
254 notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
255 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
256 PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd );
257#ifndef NO_ARGCHK
258/*
259* Test the input parameters
260*/
261 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
262 if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) )
263 {
264 if( ( !upper ) && ( UploC != CLOWER ) )
265 {
266 PB_Cwarn( ctxt, __LINE__, "PZSYRK", "Illegal UPLO = %c\n", UploC );
267 info = -1;
268 }
269 else if( ( !notran ) && ( TranOp != CTRAN ) )
270 {
271 PB_Cwarn( ctxt, __LINE__, "PZSYRK", "Illegal TRANS = %c\n", TranOp );
272 info = -2;
273 }
274 if( notran )
275 PB_Cchkmat( ctxt, "PZSYRK", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9,
276 &info );
277 else
278 PB_Cchkmat( ctxt, "PZSYRK", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9,
279 &info );
280 PB_Cchkmat( ctxt, "PZSYRK", "C", *N, 3, *N, 3, Ci, Cj, Cd, 14,
281 &info );
282 }
283 if( info ) { PB_Cabort( ctxt, "PZSYRK", info ); return; }
284#endif
285/*
286* Quick return if possible
287*/
288 if( ( *N == 0 ) ||
289 ( ( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ||
290 ( *K == 0 ) ) &&
291 ( ( BETA[REAL_PART] == ONE ) && ( BETA[IMAG_PART] == ZERO ) ) ) )
292 return;
293/*
294* Get type structure
295*/
296 type = PB_Cztypeset();
297/*
298* And when alpha or K is zero
299*/
300 if( ( ( ALPHA[REAL_PART] == ZERO ) && ( ALPHA[IMAG_PART] == ZERO ) ) ||
301 ( *K == 0 ) )
302 {
303 if( ( BETA[REAL_PART] == ZERO ) && ( BETA[IMAG_PART] == ZERO ) )
304 {
305 PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero,
306 ((char *) C), Ci, Cj, Cd );
307 }
308 else
309 {
310 PB_Cplascal( type, &UploC, NOCONJG, *N, *N, ((char *) BETA),
311 ((char *) C), Ci, Cj, Cd );
312 }
313 return;
314 }
315/*
316* Start the operations
317*/
318#ifdef NO_ARGCHK
319 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
320#endif
321/*
322* Algorithm selection is based on approximation of the communication volume
323* for distributed and aligned operands.
324*
325* ACest: both operands sub( A ) and sub( C ) are communicated (K >> N)
326* Aest : only sub( A ) is communicated (N >> K)
327*/
328 if( notran )
329 {
330 tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol );
331 ACest = (double)(*N) *
332 ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) +
333 ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO :
334 CBRATIO * tmp1 / TWO ) );
335 tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol );
336 tmp4 = DNROC( *N, Ad[MB_], nprow );
337 Aest = (double)(*K) *
338 ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) +
339 ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) );
340 }
341 else
342 {
343 tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow );
344 ACest = (double)(*N) *
345 ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) +
346 ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO :
347 CBRATIO * tmp2 / TWO ) );
348 tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol );
349 tmp3 = DNROC( *N, Ad[NB_], npcol );
350 Aest = (double)(*K) *
351 ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) +
352 ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) );
353 }
354/*
355* Shift a little the cross-over point between both algorithms.
356*/
357 ChooseAC = ( ( 1.3 * ACest ) <= Aest );
358/*
359* BLACS topologies are enforced iff N and K are strictly greater than the
360* logical block size returned by pilaenv_. Otherwise, it is assumed that the
361* routine calling this routine has already selected an adequate topology.
362*/
363 nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) );
364 ForceTop = ( ( *N > nb ) && ( *K > nb ) );
365
366 if( ChooseAC )
367 {
368 if( notran )
369 {
370 OpC = CBCAST;
371 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
372
373 if( ForceTop )
374 {
375 OpR = CCOMBINE;
376 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
377
378 rtopsave = rtop;
379 ctopsave = ctop;
380
381 if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; }
382 else { TopR = CTOP_DRING; TopC = CTOP_IRING; }
383
384 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC );
385 rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR );
386/*
387* Remove the next line when the BLACS combine operations support ring
388* topologies
389*/
390 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT );
391 }
392
393 DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
394 }
395 else
396 {
397 OpR = CBCAST;
398 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
399
400 if( ForceTop )
401 {
402 OpC = CCOMBINE;
403 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
404
405 rtopsave = rtop;
406 ctopsave = ctop;
407
408 if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; }
409 else { TopR = CTOP_DRING; TopC = CTOP_IRING; }
410
411 rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR );
412 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC );
413/*
414* Remove the next line when the BLACS combine operations support ring
415* topologies
416*/
417 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT );
418 }
419 DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
420 }
421
422 PB_CpsyrkAC( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N,
423 *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA),
424 ((char *)C), Ci, Cj, Cd );
425 }
426 else
427 {
428 if( notran )
429 {
430 OpR = CBCAST;
431 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
432
433 if( ForceTop )
434 {
435 OpC = CBCAST;
436 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
437
438 rtopsave = rtop;
439 ctopsave = ctop;
440/*
441* No clear winner for the ring topologies, so that if a ring topology is
442* already selected, keep it.
443*/
444 if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) &&
445 ( rtop != CTOP_SRING ) )
446 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING );
447 if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) &&
448 ( ctop != CTOP_SRING ) )
449 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING );
450 }
451
452 DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
453 }
454 else
455 {
456 OpC = CBCAST;
457 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
458
459 if( ForceTop )
460 {
461 OpR = CBCAST;
462 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
463
464 rtopsave = rtop;
465 ctopsave = ctop;
466/*
467* No clear winner for the ring topologies, so that if a ring topology is
468* already selected, keep it.
469*/
470 if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) &&
471 ( rtop != CTOP_SRING ) )
472 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING );
473 if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) &&
474 ( ctop != CTOP_SRING ) )
475 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING );
476 }
477
478 DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
479 }
480
481 PB_CpsyrkA( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N,
482 *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA),
483 ((char *)C), Ci, Cj, Cd );
484 }
485/*
486* Restore the BLACS topologies when necessary.
487*/
488 if( ForceTop )
489 {
490 rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave );
491 ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave );
492 }
493/*
494* End of PZSYRK
495*/
496}
#define Int
Definition Bconfig.h:22
#define REAL_PART
Definition pblas.h:139
#define F2C_CHAR(a)
Definition pblas.h:124
#define CBRATIO
Definition pblas.h:37
#define C2F_CHAR(a)
Definition pblas.h:125
#define IMAG_PART
Definition pblas.h:140
char * F_CHAR_T
Definition pblas.h:122
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define CTOP_SRING
Definition PBblacs.h:29
#define TOP_DEFAULT
Definition PBblacs.h:51
#define CCOMBINE
Definition PBblacs.h:24
#define ROW
Definition PBblacs.h:46
#define CBCAST
Definition PBblacs.h:23
#define TOP_SRING
Definition PBblacs.h:54
void Cblacs_gridinfo()
#define CTOP_IRING
Definition PBblacs.h:27
#define CTOP_DRING
Definition PBblacs.h:28
#define NOTRAN
Definition PBblas.h:44
#define TRAN
Definition PBblas.h:46
#define CBACKWARD
Definition PBblas.h:39
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define CFORWARD
Definition PBblas.h:38
#define CLOWER
Definition PBblas.h:25
#define pilaenv_
Definition PBpblas.h:44
#define pzsyrk_
Definition PBpblas.h:177
#define CTXT_
Definition PBtools.h:38
void PB_CpsyrkAC()
#define MAX(a_, b_)
Definition PBtools.h:77
#define MB_
Definition PBtools.h:43
void PB_Cabort()
#define ONE
Definition PBtools.h:64
void PB_CpsyrkA()
void PB_Cchkmat()
void PB_Cwarn()
char * PB_Ctop()
PBTYP_T * PB_Cztypeset()
void PB_Cplapad()
void PB_Cplascal()
#define RSRC_
Definition PBtools.h:45
#define TWO
Definition PBtools.h:65
void PB_CargFtoC()
#define CSRC_
Definition PBtools.h:46
#define ZERO
Definition PBtools.h:66
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
#define DNROC(n_, nb_, p_)
Definition PBtools.h:111
char type
Definition pblas.h:331
char * zero
Definition pblas.h:335