SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdsyrk_.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "pblas.h"
14#include "PBpblas.h"
15#include "PBtools.h"
16#include "PBblacs.h"
17#include "PBblas.h"
18
19#ifdef __STDC__
20void pdsyrk_( F_CHAR_T UPLO, F_CHAR_T TRANS, Int * N, Int * K,
21 double * ALPHA,
22 double * A, Int * IA, Int * JA, Int * DESCA,
23 double * BETA,
24 double * C, Int * IC, Int * JC, Int * DESCC )
25#else
26void pdsyrk_( UPLO, TRANS, N, K, ALPHA, A, IA, JA, DESCA, BETA,
27 C, IC, JC, DESCC )
28/*
29* .. Scalar Arguments ..
30*/
31 F_CHAR_T TRANS, UPLO;
32 Int * IA, * IC, * JA, * JC, * K, * N;
33 double * ALPHA, * BETA;
34/*
35* .. Array Arguments ..
36*/
37 Int * DESCA, * DESCC;
38 double * A, * C;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PDSYRK performs one of the symmetric rank k operations
46*
47* sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
48*
49* or
50*
51* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
52*
53* where
54*
55* sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and,
56*
57* sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N',
58* A(IA:IA+K-1,JA:JA+N-1) otherwise.
59*
60* Alpha and beta are scalars, sub( C ) is an n by n symmetric
61* submatrix and sub( A ) is an n by k submatrix in the first case and a
62* k by n submatrix in the second case.
63*
64* Notes
65* =====
66*
67* A description vector is associated with each 2D block-cyclicly dis-
68* tributed matrix. This vector stores the information required to
69* establish the mapping between a matrix entry and its corresponding
70* process and memory location.
71*
72* In the following comments, the character _ should be read as
73* "of the distributed matrix". Let A be a generic term for any 2D
74* block cyclicly distributed matrix. Its description vector is DESC_A:
75*
76* NOTATION STORED IN EXPLANATION
77* ---------------- --------------- ------------------------------------
78* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
79* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
80* the NPROW x NPCOL BLACS process grid
81* A is distributed over. The context
82* itself is global, but the handle
83* (the integer value) may vary.
84* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
85* ted matrix A, M_A >= 0.
86* N_A (global) DESCA[ N_ ] The number of columns in the distri-
87* buted matrix A, N_A >= 0.
88* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
89* block of the matrix A, IMB_A > 0.
90* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
91* left block of the matrix A,
92* INB_A > 0.
93* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
94* bute the last M_A-IMB_A rows of A,
95* MB_A > 0.
96* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
97* bute the last N_A-INB_A columns of
98* A, NB_A > 0.
99* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
100* row of the matrix A is distributed,
101* NPROW > RSRC_A >= 0.
102* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
103* first column of A is distributed.
104* NPCOL > CSRC_A >= 0.
105* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
106* array storing the local blocks of
107* the distributed matrix A,
108* IF( Lc( 1, N_A ) > 0 )
109* LLD_A >= MAX( 1, Lr( 1, M_A ) )
110* ELSE
111* LLD_A >= 1.
112*
113* Let K be the number of rows of a matrix A starting at the global in-
114* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
115* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
116* receive if these K rows were distributed over NPROW processes. If K
117* is the number of columns of a matrix A starting at the global index
118* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
119* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
120* these K columns were distributed over NPCOL processes.
121*
122* The values of Lr() and Lc() may be determined via a call to the func-
123* tion PB_Cnumroc:
124* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
125* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
126*
127* Arguments
128* =========
129*
130* UPLO (global input) CHARACTER*1
131* On entry, UPLO specifies whether the local pieces of
132* the array C containing the upper or lower triangular part
133* of the symmetric submatrix sub( C ) are to be referenced as
134* follows:
135*
136* UPLO = 'U' or 'u' Only the local pieces corresponding to
137* the upper triangular part of the
138* symmetric submatrix sub( C ) are to be
139* referenced,
140*
141* UPLO = 'L' or 'l' Only the local pieces corresponding to
142* the lower triangular part of the
143* symmetric submatrix sub( C ) are to be
144* referenced.
145*
146* TRANS (global input) CHARACTER*1
147* On entry, TRANS specifies the operation to be performed as
148* follows:
149*
150* TRANS = 'N' or 'n'
151* sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
152*
153* TRANS = 'T' or 't'
154* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ).
155*
156* TRANS = 'C' or 'c'
157* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ).
158*
159* N (global input) INTEGER
160* On entry, N specifies the order of the submatrix sub( C ).
161* N must be at least zero.
162*
163* K (global input) INTEGER
164* On entry, with TRANS = 'N' or 'n', K specifies the number of
165* columns of the submatrix sub( A ), and with TRANS = 'T' or
166* 't' or 'C' or 'c', K specifies the number of rows of the sub-
167* matrix sub( A ). K must be at least zero.
168*
169* ALPHA (global input) DOUBLE PRECISION
170* On entry, ALPHA specifies the scalar alpha. When ALPHA is
171* supplied as zero then the local entries of the array A
172* corresponding to the entries of the submatrix sub( A ) need
173* not be set on input.
174*
175* A (local input) DOUBLE PRECISION array
176* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
177* at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at
178* least Lc( 1, JA+N-1 ) otherwise. Before entry, this array
179* contains the local entries of the matrix A.
180* Before entry with TRANS = 'N' or 'n', this array contains the
181* local entries corresponding to the entries of the n by k sub-
182* matrix sub( A ), otherwise the local entries corresponding to
183* the entries of the k by n submatrix sub( A ).
184*
185* IA (global input) INTEGER
186* On entry, IA specifies A's global row index, which points to
187* the beginning of the submatrix sub( A ).
188*
189* JA (global input) INTEGER
190* On entry, JA specifies A's global column index, which points
191* to the beginning of the submatrix sub( A ).
192*
193* DESCA (global and local input) INTEGER array
194* On entry, DESCA is an integer array of dimension DLEN_. This
195* is the array descriptor for the matrix A.
196*
197* BETA (global input) DOUBLE PRECISION
198* On entry, BETA specifies the scalar beta. When BETA is
199* supplied as zero then the local entries of the array C
200* corresponding to the entries of the submatrix sub( C ) need
201* not be set on input.
202*
203* C (local input/local output) DOUBLE PRECISION array
204* On entry, C is an array of dimension (LLD_C, Kc), where Kc is
205* at least Lc( 1, JC+N-1 ). Before entry, this array contains
206* the local entries of the matrix C.
207* Before entry with UPLO = 'U' or 'u', this array contains
208* the local entries corresponding to the upper triangular part
209* of the symmetric submatrix sub( C ), and the local entries
210* corresponding to the strictly lower triangular of sub( C )
211* are not referenced. On exit, the upper triangular part of
212* sub( C ) is overwritten by the upper triangular part of the
213* updated submatrix.
214* Before entry with UPLO = 'L' or 'l', this array contains
215* the local entries corresponding to the lower triangular part
216* of the symmetric submatrix sub( C ), and the local entries
217* corresponding to the strictly upper triangular of sub( C )
218* are not referenced. On exit, the lower triangular part of
219* sub( C ) is overwritten by the lower triangular part of the
220* updated submatrix.
221*
222* IC (global input) INTEGER
223* On entry, IC specifies C's global row index, which points to
224* the beginning of the submatrix sub( C ).
225*
226* JC (global input) INTEGER
227* On entry, JC specifies C's global column index, which points
228* to the beginning of the submatrix sub( C ).
229*
230* DESCC (global and local input) INTEGER array
231* On entry, DESCC is an integer array of dimension DLEN_. This
232* is the array descriptor for the matrix C.
233*
234* -- Written on April 1, 1998 by
235* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
236*
237* ---------------------------------------------------------------------
238*/
239/*
240* .. Local Scalars ..
241*/
242 char DirA, OpC, OpR, TopC, TopR, TranOp, UploC, ctop, ctopsave,
243 rtop, rtopsave;
244 Int Ai, Aj, ChooseAC, Ci, Cj, ForceTop, ctxt, info, mycol,
245 myrow, nb, notran, npcol, nprow, upper;
246 double Aest, ACest, tmp1, tmp2, tmp3, tmp4;
247 PBTYP_T * type;
248/*
249* .. Local Arrays ..
250*/
251 Int Ad[DLEN_], Cd[DLEN_];
252/* ..
253* .. Executable Statements ..
254*
255*/
256 upper = ( ( UploC = Mupcase( F2C_CHAR( UPLO )[0] ) ) == CUPPER );
257 notran = ( ( TranOp = Mupcase( F2C_CHAR( TRANS )[0] ) ) == CNOTRAN );
258 PB_CargFtoC( *IA, *JA, DESCA, &Ai, &Aj, Ad );
259 PB_CargFtoC( *IC, *JC, DESCC, &Ci, &Cj, Cd );
260#ifndef NO_ARGCHK
261/*
262* Test the input parameters
263*/
264 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
265 if( !( info = ( ( nprow == -1 ) ? -( 901 + CTXT_ ) : 0 ) ) )
266 {
267 if( ( !upper ) && ( UploC != CLOWER ) )
268 {
269 PB_Cwarn( ctxt, __LINE__, "PDSYRK", "Illegal UPLO = %c\n", UploC );
270 info = -1;
271 }
272 else if( ( !notran ) && ( TranOp != CTRAN ) && ( TranOp != CCOTRAN ) )
273 {
274 PB_Cwarn( ctxt, __LINE__, "PDSYRK", "Illegal TRANS = %c\n", TranOp );
275 info = -2;
276 }
277 if( notran )
278 PB_Cchkmat( ctxt, "PDSYRK", "A", *N, 3, *K, 4, Ai, Aj, Ad, 9,
279 &info );
280 else
281 PB_Cchkmat( ctxt, "PDSYRK", "A", *K, 4, *N, 3, Ai, Aj, Ad, 9,
282 &info );
283 PB_Cchkmat( ctxt, "PDSYRK", "C", *N, 3, *N, 3, Ci, Cj, Cd, 14,
284 &info );
285 }
286 if( info ) { PB_Cabort( ctxt, "PDSYRK", info ); return; }
287#endif
288/*
289* Quick return if possible
290*/
291 if( ( *N == 0 ) ||
292 ( ( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) ) &&
293 ( BETA[REAL_PART] == ONE ) ) )
294 return;
295/*
296* Get type structure
297*/
298 type = PB_Cdtypeset();
299/*
300* And when alpha or K is zero
301*/
302 if( ( ALPHA[REAL_PART] == ZERO ) || ( *K == 0 ) )
303 {
304 if( BETA[REAL_PART] == ZERO )
305 {
306 PB_Cplapad( type, &UploC, NOCONJG, *N, *N, type->zero, type->zero,
307 ((char *) C), Ci, Cj, Cd );
308 }
309 else
310 {
311 PB_Cplascal( type, &UploC, NOCONJG, *N, *N, ((char *) BETA),
312 ((char *) C), Ci, Cj, Cd );
313 }
314 return;
315 }
316/*
317* Start the operations
318*/
319#ifdef NO_ARGCHK
320 Cblacs_gridinfo( ( ctxt = Ad[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
321#endif
322/*
323* Algorithm selection is based on approximation of the communication volume
324* for distributed and aligned operands.
325*
326* ACest: both operands sub( A ) and sub( C ) are communicated (K >> N)
327* Aest : only sub( A ) is communicated (N >> K)
328*/
329 if( notran )
330 {
331 tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp3 = DNROC( *K, Ad[NB_], npcol );
332 ACest = (double)(*N) *
333 ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp3 ) +
334 ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO :
335 CBRATIO * tmp1 / TWO ) );
336 tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol );
337 tmp4 = DNROC( *N, Ad[MB_], nprow );
338 Aest = (double)(*K) *
339 ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp1 ) +
340 ( nprow == 1 ? ZERO : tmp2 ) + MAX( tmp2, tmp4 ) );
341 }
342 else
343 {
344 tmp2 = DNROC( *N, Cd[NB_], npcol ); tmp4 = DNROC( *K, Ad[MB_], nprow );
345 ACest = (double)(*N) *
346 ( ( ( ( Ad[CSRC_] == -1 ) || ( npcol == 1 ) ) ? ZERO : tmp4 ) +
347 ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO :
348 CBRATIO * tmp2 / TWO ) );
349 tmp1 = DNROC( *N, Cd[MB_], nprow ); tmp2 = DNROC( *N, Cd[NB_], npcol );
350 tmp3 = DNROC( *N, Ad[NB_], npcol );
351 Aest = (double)(*K) *
352 ( ( ( ( Ad[RSRC_] == -1 ) || ( nprow == 1 ) ) ? ZERO : tmp2 ) +
353 ( npcol == 1 ? ZERO : tmp1 ) + MAX( tmp1, tmp3 ) );
354 }
355/*
356* Shift a little the cross-over point between both algorithms.
357*/
358 ChooseAC = ( ( 1.3 * ACest ) <= Aest );
359/*
360* BLACS topologies are enforced iff N and K are strictly greater than the
361* logical block size returned by pilaenv_. Otherwise, it is assumed that the
362* routine calling this routine has already selected an adequate topology.
363*/
364 nb = pilaenv_( &ctxt, C2F_CHAR( &type->type ) );
365 ForceTop = ( ( *N > nb ) && ( *K > nb ) );
366
367 if( ChooseAC )
368 {
369 if( notran )
370 {
371 OpC = CBCAST;
372 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
373
374 if( ForceTop )
375 {
376 OpR = CCOMBINE;
377 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
378
379 rtopsave = rtop;
380 ctopsave = ctop;
381
382 if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; }
383 else { TopR = CTOP_DRING; TopC = CTOP_IRING; }
384
385 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC );
386 rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR );
387/*
388* Remove the next line when the BLACS combine operations support ring
389* topologies
390*/
391 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_DEFAULT );
392 }
393
394 DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
395 }
396 else
397 {
398 OpR = CBCAST;
399 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
400
401 if( ForceTop )
402 {
403 OpC = CCOMBINE;
404 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
405
406 rtopsave = rtop;
407 ctopsave = ctop;
408
409 if( upper ) { TopR = CTOP_IRING; TopC = CTOP_DRING; }
410 else { TopR = CTOP_DRING; TopC = CTOP_IRING; }
411
412 rtop = *PB_Ctop( &ctxt, &OpR, ROW, &TopR );
413 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, &TopC );
414/*
415* Remove the next line when the BLACS combine operations support ring
416* topologies
417*/
418 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_DEFAULT );
419 }
420 DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
421 }
422
423 PB_CpsyrkAC( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N,
424 *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA),
425 ((char *)C), Ci, Cj, Cd );
426 }
427 else
428 {
429 if( notran )
430 {
431 OpR = CBCAST;
432 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
433
434 if( ForceTop )
435 {
436 OpC = CBCAST;
437 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
438
439 rtopsave = rtop;
440 ctopsave = ctop;
441/*
442* No clear winner for the ring topologies, so that if a ring topology is
443* already selected, keep it.
444*/
445 if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) &&
446 ( rtop != CTOP_SRING ) )
447 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING );
448 if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) &&
449 ( ctop != CTOP_SRING ) )
450 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING );
451 }
452
453 DirA = ( rtop == CTOP_DRING ? CBACKWARD : CFORWARD );
454 }
455 else
456 {
457 OpC = CBCAST;
458 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_GET );
459
460 if( ForceTop )
461 {
462 OpR = CBCAST;
463 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_GET );
464
465 rtopsave = rtop;
466 ctopsave = ctop;
467/*
468* No clear winner for the ring topologies, so that if a ring topology is
469* already selected, keep it.
470*/
471 if( ( rtop != CTOP_DRING ) && ( rtop != CTOP_IRING ) &&
472 ( rtop != CTOP_SRING ) )
473 rtop = *PB_Ctop( &ctxt, &OpR, ROW, TOP_SRING );
474 if( ( ctop != CTOP_DRING ) && ( ctop != CTOP_IRING ) &&
475 ( ctop != CTOP_SRING ) )
476 ctop = *PB_Ctop( &ctxt, &OpC, COLUMN, TOP_SRING );
477 }
478
479 DirA = ( ctop == CTOP_DRING ? CBACKWARD : CFORWARD );
480 }
481
482 PB_CpsyrkA( type, &DirA, NOCONJG, &UploC, ( notran ? NOTRAN : TRAN ), *N,
483 *K, ((char *)ALPHA), ((char *)A), Ai, Aj, Ad, ((char *)BETA),
484 ((char *)C), Ci, Cj, Cd );
485 }
486/*
487* Restore the BLACS topologies when necessary.
488*/
489 if( ForceTop )
490 {
491 rtopsave = *PB_Ctop( &ctxt, &OpR, ROW, &rtopsave );
492 ctopsave = *PB_Ctop( &ctxt, &OpC, COLUMN, &ctopsave );
493 }
494/*
495* End of PDSYRK
496*/
497}
#define Int
Definition Bconfig.h:22
#define REAL_PART
Definition pblas.h:139
#define F2C_CHAR(a)
Definition pblas.h:124
#define CBRATIO
Definition pblas.h:37
#define C2F_CHAR(a)
Definition pblas.h:125
char * F_CHAR_T
Definition pblas.h:122
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define CTOP_SRING
Definition PBblacs.h:29
#define TOP_DEFAULT
Definition PBblacs.h:51
#define CCOMBINE
Definition PBblacs.h:24
#define ROW
Definition PBblacs.h:46
#define CBCAST
Definition PBblacs.h:23
#define TOP_SRING
Definition PBblacs.h:54
void Cblacs_gridinfo()
#define CTOP_IRING
Definition PBblacs.h:27
#define CTOP_DRING
Definition PBblacs.h:28
#define NOTRAN
Definition PBblas.h:44
#define TRAN
Definition PBblas.h:46
#define CBACKWARD
Definition PBblas.h:39
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define CCOTRAN
Definition PBblas.h:22
#define CFORWARD
Definition PBblas.h:38
#define CLOWER
Definition PBblas.h:25
#define pdsyrk_
Definition PBpblas.h:174
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
void PB_CpsyrkAC()
#define MAX(a_, b_)
Definition PBtools.h:77
#define MB_
Definition PBtools.h:43
void PB_Cabort()
#define ONE
Definition PBtools.h:64
void PB_CpsyrkA()
void PB_Cchkmat()
void PB_Cwarn()
char * PB_Ctop()
void PB_Cplapad()
void PB_Cplascal()
#define RSRC_
Definition PBtools.h:45
#define TWO
Definition PBtools.h:65
void PB_CargFtoC()
#define CSRC_
Definition PBtools.h:46
#define ZERO
Definition PBtools.h:66
PBTYP_T * PB_Cdtypeset()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
#define DNROC(n_, nb_, p_)
Definition PBtools.h:111
char type
Definition pblas.h:331
char * zero
Definition pblas.h:335