SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_CpsyrkA.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_CpsyrkA( PBTYP_T * TYPE, char * DIRECA, char * CONJUG,
21 char * UPLO, char * TRANS, Int N, Int K, char * ALPHA,
22 char * A, Int IA, Int JA, Int * DESCA, char * BETA,
23 char * C, Int IC, Int JC, Int * DESCC )
24#else
25void PB_CpsyrkA( TYPE, DIRECA, CONJUG, UPLO, TRANS, N, K, ALPHA, A, IA,
26 JA, DESCA, BETA, C, IC, JC, DESCC )
27/*
28* .. Scalar Arguments ..
29*/
30 char * CONJUG, * DIRECA, * TRANS, * UPLO;
31 Int IA, IC, JA, JC, K, N;
32 char * ALPHA, * BETA;
33 PBTYP_T * TYPE;
34/*
35* .. Array Arguments ..
36*/
37 Int * DESCA, * DESCC;
38 char * A, * C;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PB_CpsyrkA performs one of the following symmetric or Hermitian rank
46* k operations
47*
48* sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
49* or
50* sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + beta*sub( C ),
51* or
52* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
53* or
54* sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + beta*sub( C ),
55*
56* where
57*
58* sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and,
59*
60* sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N',
61* A(IA:IA+K-1,JA:JA+N-1) otherwise.
62*
63* Alpha and beta are scalars, sub( C ) is an n by n symmetric
64* or Hermitian submatrix and sub( A ) is an n by k submatrix in the
65* first case and a k by n submatrix in the second case.
66*
67* This is the outer-product algorithm using the logical LCM hybrid
68* and static blocking techniques. The submatrix operand sub( C ) stays
69* in place.
70*
71* Notes
72* =====
73*
74* A description vector is associated with each 2D block-cyclicly dis-
75* tributed matrix. This vector stores the information required to
76* establish the mapping between a matrix entry and its corresponding
77* process and memory location.
78*
79* In the following comments, the character _ should be read as
80* "of the distributed matrix". Let A be a generic term for any 2D
81* block cyclicly distributed matrix. Its description vector is DESC_A:
82*
83* NOTATION STORED IN EXPLANATION
84* ---------------- --------------- ------------------------------------
85* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
86* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
87* the NPROW x NPCOL BLACS process grid
88* A is distributed over. The context
89* itself is global, but the handle
90* (the integer value) may vary.
91* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
92* ted matrix A, M_A >= 0.
93* N_A (global) DESCA[ N_ ] The number of columns in the distri-
94* buted matrix A, N_A >= 0.
95* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
96* block of the matrix A, IMB_A > 0.
97* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
98* left block of the matrix A,
99* INB_A > 0.
100* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
101* bute the last M_A-IMB_A rows of A,
102* MB_A > 0.
103* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
104* bute the last N_A-INB_A columns of
105* A, NB_A > 0.
106* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
107* row of the matrix A is distributed,
108* NPROW > RSRC_A >= 0.
109* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
110* first column of A is distributed.
111* NPCOL > CSRC_A >= 0.
112* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
113* array storing the local blocks of
114* the distributed matrix A,
115* IF( Lc( 1, N_A ) > 0 )
116* LLD_A >= MAX( 1, Lr( 1, M_A ) )
117* ELSE
118* LLD_A >= 1.
119*
120* Let K be the number of rows of a matrix A starting at the global in-
121* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
122* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
123* receive if these K rows were distributed over NPROW processes. If K
124* is the number of columns of a matrix A starting at the global index
125* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
126* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
127* these K columns were distributed over NPCOL processes.
128*
129* The values of Lr() and Lc() may be determined via a call to the func-
130* tion PB_Cnumroc:
131* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
132* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
133*
134* Arguments
135* =========
136*
137* TYPE (local input) pointer to a PBTYP_T structure
138* On entry, TYPE is a pointer to a structure of type PBTYP_T,
139* that contains type information (See pblas.h).
140*
141* DIRECA (global input) pointer to CHAR
142* On entry, DIRECA specifies the direction in which the rows
143* or columns of sub( A ) should be looped over as follows:
144* DIRECA = 'F' or 'f' forward or increasing,
145* DIRECA = 'B' or 'b' backward or decreasing.
146*
147* CONJUG (global input) pointer to CHAR
148* On entry, CONJUG specifies whether sub( C ) is a symmetric or
149* Hermitian submatrix operand as follows:
150* CONJUG = 'N' or 'n' sub( C ) is symmetric,
151* CONJUG = 'Z' or 'z' sub( C ) is Hermitian.
152*
153* UPLO (global input) pointer to CHAR
154* On entry, UPLO specifies whether the local pieces of
155* the array C containing the upper or lower triangular part
156* of the submatrix sub( C ) are to be referenced as follows:
157* UPLO = 'U' or 'u' Only the local pieces corresponding to
158* the upper triangular part of the
159* submatrix sub( C ) are referenced,
160* UPLO = 'L' or 'l' Only the local pieces corresponding to
161* the lower triangular part of the
162* submatrix sub( C ) are referenced.
163*
164* TRANS (global input) pointer to CHAR
165* On entry, TRANS specifies the operation to be performed as
166* follows:
167*
168* TRANS = 'N' or 'n'
169* sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
170* or
171* sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
172* or
173* sub( C ) := alpha*sub( A )*conjg( sub( A )' ) +
174* beta*sub( C ),
175*
176* TRANS = 'T' or 't'
177* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
178* or
179* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
180*
181* TRANS = 'C' or 'c'
182* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
183* or
184* sub( C ) := alpha*conjg( sub( A )' )*sub( A ) +
185* beta*sub( C ).
186*
187* N (global input) INTEGER
188* On entry, N specifies the order of the submatrix sub( C ).
189* N must be at least zero.
190*
191* K (global input) INTEGER
192* On entry, with TRANS = 'N' or 'n', K specifies the number of
193* columns of the submatrix sub( A ), and with TRANS = 'T' or
194* 't' or 'C' or 'c', K specifies the number of rows of the sub-
195* matrix sub( A ). K must be at least zero.
196*
197* ALPHA (global input) pointer to CHAR
198* On entry, ALPHA specifies the scalar alpha. When ALPHA is
199* supplied as zero then the local entries of the array A
200* corresponding to the entries of the submatrix sub( A ) need
201* not be set on input.
202*
203* A (local input) pointer to CHAR
204* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
205* at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at
206* least Lc( 1, JA+N-1 ) otherwise. Before entry, this array
207* contains the local entries of the matrix A.
208* Before entry with TRANS = 'N' or 'n', this array contains the
209* local entries corresponding to the entries of the n by k sub-
210* matrix sub( A ), otherwise the local entries corresponding to
211* the entries of the k by n submatrix sub( A ).
212*
213* IA (global input) INTEGER
214* On entry, IA specifies A's global row index, which points to
215* the beginning of the submatrix sub( A ).
216*
217* JA (global input) INTEGER
218* On entry, JA specifies A's global column index, which points
219* to the beginning of the submatrix sub( A ).
220*
221* DESCA (global and local input) INTEGER array
222* On entry, DESCA is an integer array of dimension DLEN_. This
223* is the array descriptor for the matrix A.
224*
225* BETA (global input) pointer to CHAR
226* On entry, BETA specifies the scalar beta. When BETA is
227* supplied as zero then the local entries of the array C
228* corresponding to the entries of the submatrix sub( C ) need
229* not be set on input.
230*
231* C (local input/local output) pointer to CHAR
232* On entry, C is an array of dimension (LLD_C, Kc), where Kc is
233* at least Lc( 1, JC+N-1 ). Before entry, this array contains
234* the local entries of the matrix C.
235* Before entry with UPLO = 'U' or 'u', this array contains
236* the local entries corresponding to the upper triangular part
237* of the symmetric or Hermitian submatrix sub( C ), and the
238* local entries corresponding to the strictly lower triangular
239* of sub( C ) are not referenced. On exit, the upper triangular
240* part of sub( C ) is overwritten by the upper triangular part
241* of the updated submatrix.
242* Before entry with UPLO = 'L' or 'l', this array contains
243* the local entries corresponding to the lower triangular part
244* of the symmetric or Hermitian submatrix sub( C ), and the
245* local entries corresponding to the strictly upper triangular
246* of sub( C ) are not referenced. On exit, the lower triangular
247* part of sub( C ) is overwritten by the lower triangular part
248* of the updated submatrix.
249* Note that the imaginary parts of the local entries corres-
250* ponding to the diagonal elements of sub( C ) need not be
251* set, they are assumed to be zero, and on exit they are set
252* to zero.
253*
254* IC (global input) INTEGER
255* On entry, IC specifies C's global row index, which points to
256* the beginning of the submatrix sub( C ).
257*
258* JC (global input) INTEGER
259* On entry, JC specifies C's global column index, which points
260* to the beginning of the submatrix sub( C ).
261*
262* DESCC (global and local input) INTEGER array
263* On entry, DESCC is an integer array of dimension DLEN_. This
264* is the array descriptor for the matrix C.
265*
266* -- Written on April 1, 1998 by
267* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
268*
269* ---------------------------------------------------------------------
270*/
271/*
272* .. Local Scalars ..
273*/
274 char * one;
275 Int AcurrocR, Afwd, AiD, AiR, AiiD, AiiR, AinbD, AinbR, Ainb1D,
276 Ainb1R, AisR, Ald, AmyprocD, AmyprocR, AnbD, AnbR, AnpR,
277 AnprocsD, AnprocsR, ArocD, ArocR, Arocs, AsrcR, Ccol, Cii,
278 Cimb1, Cinb1, Cjj, Clcmb, Cld, Clp, Clq, Cnq0, Cmb, Cmp,
279 Cmp0, Cnb, Cnq, Crow, WACfr, WACld, WACsum, WARfr, WARld,
280 WARsum, Wkbb=0, ctxt, k, kb, kbb, l, lb, ltmp, maxp, mycol,
281 myrow, notran, npcol, nprow, p=0, size, tmp, upper;
282 GEMM_T gemm;
283 TZSYR_T tzsyrk;
284/*
285* .. Local Arrays ..
286*/
287 Int Cd0[DLEN_], DBUFA[DLEN_], WACd0[DLEN_], WARd0[DLEN_];
288 char * Aptr = NULL, * Cptr = NULL, * WAC = NULL, * WAR = NULL;
289/* ..
290* .. Executable Statements ..
291*
292*/
293/*
294* sub( C ) := beta * sub( C )
295*/
296 PB_Cplascal( TYPE, UPLO, CONJUG, N, N, BETA, C, IC, JC, DESCC );
297/*
298* Retrieve process grid information
299*/
300 Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
301
302 size = TYPE->size; one = TYPE->one; gemm = TYPE->Fgemm;
303 kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) );
304/*
305* Compute descriptor Cd0 for sub( C )
306*/
307 PB_Cdescribe( N, N, IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj,
308 &Cld, &Cimb1, &Cinb1, &Cmb, &Cnb, &Crow, &Ccol, Cd0 );
309
310 Cmp = PB_Cnumroc( N, 0, Cimb1, Cmb, myrow, Crow, nprow );
311 Cnq = PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol );
312
313 if( ( Cmp > 0 ) && ( Cnq > 0 ) )
314 {
315 Cptr = Mptr( C, Cii, Cjj, Cld, size );
316 tzsyrk = ( ( Mupcase( CONJUG[0] ) == CNOCONJG ) ? PB_Ctzsyrk :
317 PB_Ctzherk );
318/*
319* Computational partitioning size is computed as the product of the logical
320* value returned by pilaenv_ and 2 * lcm( nprow, npcol ).
321*/
322 Clcmb = 2 * kb * PB_Clcm( ( Crow >= 0 ? nprow : 1 ),
323 ( Ccol >= 0 ? npcol : 1 ) );
324 }
325/*
326* Retrieve local information for sub( A )
327*/
328 if( ( notran = ( Mupcase( TRANS[0] ) == CNOTRAN ) ) != 0 )
329 {
330 AiR = JA; AnprocsR = npcol; AinbR = DESCA[INB_]; AnbR = DESCA[NB_];
331 AsrcR = DESCA[CSRC_];
332 }
333 else
334 {
335 AiR = IA; AnprocsR = nprow; AinbR = DESCA[IMB_]; AnbR = DESCA[MB_];
336 AsrcR = DESCA[RSRC_];
337 }
338/*
339* If sub( A ) only spans one process row or column, then there is no need to
340* pack the data.
341*/
342 if( !( PB_Cspan( K, AiR, AinbR, AnbR, AsrcR, AnprocsR ) ) )
343 {
344/*
345* Replicate sub( A ) in process rows and columns spanned by sub( C ): WAC, WAR
346*/
347 if( notran )
348 {
349 PB_CInV( TYPE, NOCONJG, COLUMN, N, N, Cd0, K, A, IA, JA, DESCA,
350 COLUMN, &WAC, WACd0, &WACfr );
351 PB_CInV( TYPE, CONJUG, ROW, N, N, Cd0, K, WAC, 0, 0, WACd0,
352 COLUMN, &WAR, WARd0, &WARfr );
353 }
354 else
355 {
356 PB_CInV( TYPE, NOCONJG, ROW, N, N, Cd0, K, A, IA, JA, DESCA,
357 ROW, &WAR, WARd0, &WARfr );
358 PB_CInV( TYPE, CONJUG, COLUMN, N, N, Cd0, K, WAR, 0, 0, WARd0,
359 ROW, &WAC, WACd0, &WACfr );
360 }
361/*
362* Perform the local update if I own some data
363*/
364 if( ( Cmp > 0 ) && ( Cnq > 0 ) )
365 {
366 WACld = WACd0[LLD_]; WARld = WARd0[LLD_];
367
368 if( Mupcase( UPLO[0] ) == CUPPER )
369 {
370 for( l = 0; l < N; l += Clcmb )
371 {
372 lb = N - l; lb = MIN( lb, Clcmb );
373 Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
374 Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
375 Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
376 if( Clp > 0 && Cnq0 > 0 )
377 gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Clp, &Cnq0, &K,
378 ALPHA, WAC, &WACld, Mptr( WAR, 0, Clq, WARld, size ),
379 &WARld, one, Mptr( Cptr, 0, Clq, Cld, size ), &Cld );
380 PB_Cpsyr( TYPE, UPPER, lb, K, ALPHA, Mptr( WAC, Clp, 0, WACld,
381 size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld,
382 Cptr, l, l, Cd0, tzsyrk );
383 }
384 }
385 else
386 {
387 for( l = 0; l < N; l += Clcmb )
388 {
389 lb = N - l; ltmp = l + ( lb = MIN( lb, Clcmb ) );
390 Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
391 Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
392 PB_Cpsyr( TYPE, LOWER, lb, K, ALPHA, Mptr( WAC, Clp, 0, WACld,
393 size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld,
394 Cptr, l, l, Cd0, tzsyrk );
395 Clp = PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow );
396 Cmp0 = Cmp - Clp;
397 Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
398 if( Cmp0 > 0 && Cnq0 > 0 )
399 gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq0,
400 &K, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), &WACld,
401 Mptr( WAR, 0, Clq, WARld, size ), &WARld, one,
402 Mptr( Cptr, Clp, Clq, Cld, size ), &Cld );
403 }
404 }
405 }
406
407 if( WACfr ) free( WAC );
408 if( WARfr ) free( WAR );
409
410 return;
411 }
412/*
413* Otherwise sub( A ) spans more than one process row or columns -> LCM hybrid
414*/
415 Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD );
416 upper = ( Mupcase( UPLO [0] ) == CUPPER );
417
418 if( notran )
419 {
420 AiD = IA; AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_];
421 AmyprocD = myrow; AmyprocR = mycol; AnprocsD = nprow;
422 PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR,
423 &AiiD, &AiiR, &ArocD, &ArocR );
424 }
425 else
426 {
427 AiD = JA; AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_];
428 AmyprocD = mycol; AmyprocR = myrow; AnprocsD = npcol;
429 PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD,
430 &AiiR, &AiiD, &ArocR, &ArocD );
431 }
432 Ainb1D = PB_Cfirstnb( N, AiD, AinbD, AnbD );
433 Ainb1R = PB_Cfirstnb( K, AiR, AinbR, AnbR );
434 AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) );
435/*
436* When sub( A ) is not replicated and backward pass on sub( A ), find the
437* virtual process (p,p) owning the last row or column of sub( A ).
438*/
439 if( !( AisR ) && !( Afwd ) )
440 {
441 tmp = PB_Cindxg2p( K - 1, Ainb1R, AnbR, ArocR, ArocR, AnprocsR );
442 p = MModSub( tmp, ArocR, AnprocsR );
443 }
444/*
445* Allocate work space in process rows and columns spanned by sub( C )
446*/
447 PB_COutV( TYPE, COLUMN, NOINIT, N, N, Cd0, kb, &WAC, WACd0, &WACfr,
448 &WACsum );
449 PB_COutV( TYPE, ROW, NOINIT, N, N, Cd0, kb, &WAR, WARd0, &WARfr,
450 &WARsum );
451/*
452* Loop over the virtual process grid induced by the rows or columns of sub( A )
453*/
454 maxp = ( AisR ? 1 : AnprocsR );
455 AcurrocR = ( AisR ? -1 : MModAdd( ArocR, p, AnprocsR ) );
456 AnpR = PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR );
457
458 for( k = 0; k < K; k += kb )
459 {
460 kbb = K - k; kbb = MIN( kbb, kb );
461
462 while( Wkbb != kbb )
463 {
464/*
465* Ensure that the current virtual process (p,p) has something to contribute to
466* the replicated buffers WAC and WAR.
467*/
468 while( AnpR == 0 )
469 {
470 p = ( Afwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) );
471 AcurrocR = ( AisR ? -1 : MModAdd( ArocR, p, AnprocsR ) );
472 AnpR = PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR,
473 AnprocsR );
474 }
475/*
476* Current virtual process (p,p) has something, find out how many rows or
477* columns could be used: Arocs.
478*/
479 if( Wkbb == 0 ) { Arocs = ( AnpR < kbb ? AnpR : kbb ); }
480 else { Arocs = kbb - Wkbb; Arocs = MIN( Arocs, AnpR ); }
481/*
482* The current virtual process (p,p) has Arocs rows or columns of sub( A )
483* to contribute, replicate the data over sub( C ).
484*/
485 if( notran )
486 {
487 if( AisR || ( AmyprocR == AcurrocR ) )
488 { Aptr = Mptr( A, AiiD, AiiR, Ald, size ); AiiR += Arocs; }
489 PB_Cdescset( DBUFA, N, Arocs, Ainb1D, Arocs, AnbD, Arocs,
490 ArocD, AcurrocR, ctxt, Ald );
491/*
492* Replicate Arocs columns of sub( A ) in process columns spanned by sub( C )
493*/
494 PB_CInV2( TYPE, NOCONJG, COLUMN, N, N, Cd0, Arocs, Aptr, 0, 0,
495 DBUFA, COLUMN, WAC, Wkbb, WACd0 );
496 }
497 else
498 {
499 if( AisR || ( AmyprocR == AcurrocR ) )
500 { Aptr = Mptr( A, AiiR, AiiD, Ald, size ); AiiR += Arocs; }
501 PB_Cdescset( DBUFA, Arocs, N, Arocs, Ainb1D, Arocs, AnbD,
502 AcurrocR, ArocD, ctxt, Ald );
503/*
504* Replicate Arocs rows of sub( A ) in process rows spanned by sub( C )
505*/
506 PB_CInV2( TYPE, NOCONJG, ROW, N, N, Cd0, Arocs, Aptr, 0, 0,
507 DBUFA, ROW, WAR, Wkbb, WARd0 );
508 }
509/*
510* Arocs rows or columns of sub( A ) have been replicated over sub( C ),
511* update the number of diagonals in this virtual process as well as the
512* number of rows or columns of sub( A ) that are in WAR or WAC.
513*/
514 AnpR -= Arocs;
515 Wkbb += Arocs;
516 }
517
518 if( notran )
519 {
520/*
521* WAR := WAC'
522*/
523 PB_CInV2( TYPE, CONJUG, ROW, N, N, Cd0, kbb, WAC, 0, 0, WACd0,
524 COLUMN, WAR, 0, WARd0 );
525 }
526 else
527 {
528/*
529* WAC := WAR'
530*/
531 PB_CInV2( TYPE, CONJUG, COLUMN, N, N, Cd0, kbb, WAR, 0, 0, WARd0,
532 ROW, WAC, 0, WACd0 );
533 }
534/*
535* Perform the local update if I own some data
536*/
537 if( ( Cmp > 0 ) && ( Cnq > 0 ) )
538 {
539 WACld = WACd0[LLD_]; WARld = WARd0[LLD_];
540
541 if( upper )
542 {
543 for( l = 0; l < N; l += Clcmb )
544 {
545 lb = N - l; lb = MIN( lb, Clcmb );
546 Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
547 Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
548 Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
549 if( Clp > 0 && Cnq0 > 0 )
550 gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Clp, &Cnq0,
551 &kbb, ALPHA, WAC, &WACld, Mptr( WAR, 0, Clq, WARld,
552 size ), &WARld, one, Mptr( Cptr, 0, Clq, Cld, size ),
553 &Cld );
554 PB_Cpsyr( TYPE, UPPER, lb, kbb, ALPHA, Mptr( WAC, Clp, 0, WACld,
555 size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld,
556 Cptr, l, l, Cd0, tzsyrk );
557 }
558 }
559 else
560 {
561 for( l = 0; l < N; l += Clcmb )
562 {
563 lb = N - l; ltmp = l + ( lb = MIN( lb, Clcmb ) );
564 Clp = PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
565 Clq = PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
566 PB_Cpsyr( TYPE, LOWER, lb, kbb, ALPHA, Mptr( WAC, Clp, 0, WACld,
567 size ), WACld, Mptr( WAR, 0, Clq, WARld, size ), WARld,
568 Cptr, l, l, Cd0, tzsyrk );
569 Clp = PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow );
570 Cmp0 = Cmp - Clp;
571 Cnq0 = PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
572 if( Cmp0 > 0 && Cnq0 > 0 )
573 gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( NOTRAN ), &Cmp0, &Cnq0,
574 &kbb, ALPHA, Mptr( WAC, Clp, 0, WACld, size ), &WACld,
575 Mptr( WAR, 0, Clq, WARld, size ), &WARld, one,
576 Mptr( Cptr, Clp, Clq, Cld, size ), &Cld );
577 }
578 }
579 }
580
581 Wkbb = 0;
582 }
583
584 if( WACfr ) free( WAC );
585 if( WARfr ) free( WAR );
586/*
587* End of PB_CpsyrkA
588*/
589}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* GEMM_T)()
Definition pblas.h:317
#define C2F_CHAR(a)
Definition pblas.h:125
void(* TZSYR_T)()
Definition pblas.h:429
#define COLUMN
Definition PBblacs.h:45
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define NOTRAN
Definition PBblas.h:44
#define NOINIT
Definition PBblas.h:62
#define CNOCONJG
Definition PBblas.h:19
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CNOTRAN
Definition PBblas.h:18
#define LOWER
Definition PBblas.h:51
#define UPPER
Definition PBblas.h:52
#define CFORWARD
Definition PBblas.h:38
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
Int PB_Cfirstnb()
#define MB_
Definition PBtools.h:43
void PB_Cpsyr()
void PB_CInV2()
void PB_Ctzherk()
void PB_Cinfog2l()
void PB_CpsyrkA()
#define MModSub(I1, I2, d)
Definition PBtools.h:102
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
void PB_CInV()
void PB_Cplascal()
#define RSRC_
Definition PBtools.h:45
void PB_Cdescset()
void PB_COutV()
#define MModAdd1(I, d)
Definition PBtools.h:100
#define MModAdd(I1, I2, d)
Definition PBtools.h:97
#define INB_
Definition PBtools.h:42
#define MModSub1(I, d)
Definition PBtools.h:105
#define CSRC_
Definition PBtools.h:46
Int PB_Clcm()
void PB_Ctzsyrk()
#define IMB_
Definition PBtools.h:41
Int PB_Cindxg2p()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
Int PB_Cspan()
void PB_Cdescribe()
#define TYPE
Definition clamov.c:7