SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_CpsyrkAC.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_CpsyrkAC( PBTYP_T * TYPE, char * DIRECA, char * CONJUG,
21 char * UPLO, char * TRANS, Int N, Int K, char * ALPHA,
22 char * A, Int IA, Int JA, Int * DESCA, char * BETA,
23 char * C, Int IC, Int JC, Int * DESCC )
24#else
25void PB_CpsyrkAC( TYPE, DIRECA, CONJUG, UPLO, TRANS, N, K, ALPHA, A, IA,
26 JA, DESCA, BETA, C, IC, JC, DESCC )
27/*
28* .. Scalar Arguments ..
29*/
30 char * CONJUG, * DIRECA, * TRANS, * UPLO;
31 Int IA, IC, JA, JC, K, N;
32 char * ALPHA, * BETA;
33 PBTYP_T * TYPE;
34/*
35* .. Array Arguments ..
36*/
37 Int * DESCA, * DESCC;
38 char * A, * C;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PB_CpsyrkAC performs one of the following symmetric or Hermitian rank
46* k operations
47*
48* sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
49* or
50* sub( C ) := alpha*sub( A )*conjg( sub( A )' ) + beta*sub( C ),
51* or
52* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
53* or
54* sub( C ) := alpha*conjg( sub( A )' )*sub( A ) + beta*sub( C ),
55*
56* where
57*
58* sub( C ) denotes C(IC:IC+N-1,JC:JC+N-1), and,
59*
60* sub( A ) denotes A(IA:IA+N-1,JA:JA+K-1) if TRANS = 'N',
61* A(IA:IA+K-1,JA:JA+N-1) otherwise.
62*
63* Alpha and beta are scalars, sub( C ) is an n by n symmetric
64* or Hermitian submatrix and sub( A ) is an n by k submatrix in the
65* first case and a k by n submatrix in the second case.
66*
67* This is the outer-product algorithm using the logical aggregation
68* blocking technique.
69*
70* Notes
71* =====
72*
73* A description vector is associated with each 2D block-cyclicly dis-
74* tributed matrix. This vector stores the information required to
75* establish the mapping between a matrix entry and its corresponding
76* process and memory location.
77*
78* In the following comments, the character _ should be read as
79* "of the distributed matrix". Let A be a generic term for any 2D
80* block cyclicly distributed matrix. Its description vector is DESC_A:
81*
82* NOTATION STORED IN EXPLANATION
83* ---------------- --------------- ------------------------------------
84* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
85* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
86* the NPROW x NPCOL BLACS process grid
87* A is distributed over. The context
88* itself is global, but the handle
89* (the integer value) may vary.
90* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
91* ted matrix A, M_A >= 0.
92* N_A (global) DESCA[ N_ ] The number of columns in the distri-
93* buted matrix A, N_A >= 0.
94* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
95* block of the matrix A, IMB_A > 0.
96* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
97* left block of the matrix A,
98* INB_A > 0.
99* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
100* bute the last M_A-IMB_A rows of A,
101* MB_A > 0.
102* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
103* bute the last N_A-INB_A columns of
104* A, NB_A > 0.
105* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
106* row of the matrix A is distributed,
107* NPROW > RSRC_A >= 0.
108* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
109* first column of A is distributed.
110* NPCOL > CSRC_A >= 0.
111* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
112* array storing the local blocks of
113* the distributed matrix A,
114* IF( Lc( 1, N_A ) > 0 )
115* LLD_A >= MAX( 1, Lr( 1, M_A ) )
116* ELSE
117* LLD_A >= 1.
118*
119* Let K be the number of rows of a matrix A starting at the global in-
120* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
121* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
122* receive if these K rows were distributed over NPROW processes. If K
123* is the number of columns of a matrix A starting at the global index
124* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
125* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
126* these K columns were distributed over NPCOL processes.
127*
128* The values of Lr() and Lc() may be determined via a call to the func-
129* tion PB_Cnumroc:
130* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
131* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
132*
133* Arguments
134* =========
135*
136* TYPE (local input) pointer to a PBTYP_T structure
137* On entry, TYPE is a pointer to a structure of type PBTYP_T,
138* that contains type information (See pblas.h).
139*
140* DIRECA (global input) pointer to CHAR
141* On entry, DIRECA specifies the direction in which the rows
142* or columns of sub( A ) and sub( C ) should be looped over as
143* follows:
144* DIRECA = 'F' or 'f' forward or increasing,
145* DIRECA = 'B' or 'b' backward or decreasing.
146*
147* CONJUG (global input) pointer to CHAR
148* On entry, CONJUG specifies whether sub( C ) is a symmetric or
149* Hermitian submatrix operand as follows:
150* CONJUG = 'N' or 'n' sub( C ) is symmetric,
151* CONJUG = 'Z' or 'z' sub( C ) is Hermitian.
152*
153* UPLO (global input) pointer to CHAR
154* On entry, UPLO specifies whether the local pieces of
155* the array C containing the upper or lower triangular part
156* of the submatrix sub( C ) are to be referenced as follows:
157* UPLO = 'U' or 'u' Only the local pieces corresponding to
158* the upper triangular part of the
159* submatrix sub( C ) are referenced,
160* UPLO = 'L' or 'l' Only the local pieces corresponding to
161* the lower triangular part of the
162* submatrix sub( C ) are referenced.
163*
164* TRANS (global input) pointer to CHAR
165* On entry, TRANS specifies the operation to be performed as
166* follows:
167*
168* TRANS = 'N' or 'n'
169* sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
170* or
171* sub( C ) := alpha*sub( A )*sub( A )' + beta*sub( C ),
172* or
173* sub( C ) := alpha*sub( A )*conjg( sub( A )' ) +
174* beta*sub( C ),
175*
176* TRANS = 'T' or 't'
177* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
178* or
179* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
180*
181* TRANS = 'C' or 'c'
182* sub( C ) := alpha*sub( A )'*sub( A ) + beta*sub( C ),
183* or
184* sub( C ) := alpha*conjg( sub( A )' )*sub( A ) +
185* beta*sub( C ).
186*
187* N (global input) INTEGER
188* On entry, N specifies the order of the submatrix sub( C ).
189* N must be at least zero.
190*
191* K (global input) INTEGER
192* On entry, with TRANS = 'N' or 'n', K specifies the number of
193* columns of the submatrix sub( A ), and with TRANS = 'T' or
194* 't' or 'C' or 'c', K specifies the number of rows of the sub-
195* matrix sub( A ). K must be at least zero.
196*
197* ALPHA (global input) pointer to CHAR
198* On entry, ALPHA specifies the scalar alpha. When ALPHA is
199* supplied as zero then the local entries of the array A
200* corresponding to the entries of the submatrix sub( A ) need
201* not be set on input.
202*
203* A (local input) pointer to CHAR
204* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
205* at least Lc( 1, JA+K-1 ) when TRANS = 'N' or 'n', and is at
206* least Lc( 1, JA+N-1 ) otherwise. Before entry, this array
207* contains the local entries of the matrix A.
208* Before entry with TRANS = 'N' or 'n', this array contains the
209* local entries corresponding to the entries of the n by k sub-
210* matrix sub( A ), otherwise the local entries corresponding to
211* the entries of the k by n submatrix sub( A ).
212*
213* IA (global input) INTEGER
214* On entry, IA specifies A's global row index, which points to
215* the beginning of the submatrix sub( A ).
216*
217* JA (global input) INTEGER
218* On entry, JA specifies A's global column index, which points
219* to the beginning of the submatrix sub( A ).
220*
221* DESCA (global and local input) INTEGER array
222* On entry, DESCA is an integer array of dimension DLEN_. This
223* is the array descriptor for the matrix A.
224*
225* BETA (global input) pointer to CHAR
226* On entry, BETA specifies the scalar beta. When BETA is
227* supplied as zero then the local entries of the array C
228* corresponding to the entries of the submatrix sub( C ) need
229* not be set on input.
230*
231* C (local input/local output) pointer to CHAR
232* On entry, C is an array of dimension (LLD_C, Kc), where Kc is
233* at least Lc( 1, JC+N-1 ). Before entry, this array contains
234* the local entries of the matrix C.
235* Before entry with UPLO = 'U' or 'u', this array contains
236* the local entries corresponding to the upper triangular part
237* of the symmetric or Hermitian submatrix sub( C ), and the
238* local entries corresponding to the strictly lower triangular
239* of sub( C ) are not referenced. On exit, the upper triangular
240* part of sub( C ) is overwritten by the upper triangular part
241* of the updated submatrix.
242* Before entry with UPLO = 'L' or 'l', this array contains
243* the local entries corresponding to the lower triangular part
244* of the symmetric or Hermitian submatrix sub( C ), and the
245* local entries corresponding to the strictly upper triangular
246* of sub( C ) are not referenced. On exit, the lower triangular
247* part of sub( C ) is overwritten by the lower triangular part
248* of the updated submatrix.
249* Note that the imaginary parts of the local entries corres-
250* ponding to the diagonal elements of sub( C ) need not be
251* set, they are assumed to be zero, and on exit they are set
252* to zero.
253*
254* IC (global input) INTEGER
255* On entry, IC specifies C's global row index, which points to
256* the beginning of the submatrix sub( C ).
257*
258* JC (global input) INTEGER
259* On entry, JC specifies C's global column index, which points
260* to the beginning of the submatrix sub( C ).
261*
262* DESCC (global and local input) INTEGER array
263* On entry, DESCC is an integer array of dimension DLEN_. This
264* is the array descriptor for the matrix C.
265*
266* -- Written on April 1, 1998 by
267* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
268*
269* ---------------------------------------------------------------------
270*/
271/*
272* .. Local Scalars ..
273*/
274 char GatherDir, ScatterDir, * one, top, tran, * zero;
275 Int Acol, Acurcol, Acurimb1, Acurinb1, Acurrow, Afr, Aii, Aimb,
276 Aimb1, Ainb, Ainb1, Ajj, Ald, Am, Amb, Amp, Amp0, An, Anb,
277 Anq, Anq0, Arow, Ccsrc, Cimb, Cinb, Cmb, Cnb, Crsrc, WAfr,
278 WCfr, WCsum, conjg, ctxt, fwd, k, kb, kbb, kend, kstart,
279 kstep, ktmp, mycol, myrow, notran, npcol, nprow, size, upper;
280 GEMM_T gemm;
281 GSUM2D_T gsum2d;
282/*
283* .. Local Arrays ..
284*/
285 Int Ad0[DLEN_], DBUFA[DLEN_], WAd[DLEN_], WCd[DLEN_];
286 char * Aptr = NULL, * Aptr0 = NULL, * WA = NULL, * WC = NULL;
287/* ..
288* .. Executable Statements ..
289*
290*/
291/*
292* sub( C ) = beta * sub( C )
293*/
294 PB_Cplascal( TYPE, UPLO, CONJUG, N, N, BETA, C, IC, JC, DESCC );
295/*
296* Retrieve process grid information
297*/
298 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
299
300 fwd = ( Mupcase( DIRECA[0] ) == CFORWARD );
301 conjg = ( Mupcase( CONJUG[0] ) == CCONJG );
302 upper = ( Mupcase( UPLO [0] ) == CUPPER );
303 notran = ( Mupcase( TRANS [0] ) == CNOTRAN );
304 tran = ( conjg ? CCOTRAN : CTRAN );
305
306 size = TYPE->size; one = TYPE->one; zero = TYPE->zero;
307 gsum2d = TYPE->Cgsum2d; gemm = TYPE->Fgemm;
308/*
309* Figure out the loop bounds accordingly to DIRECA
310*/
311 kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) );
312 if( fwd )
313 {
314 kstart = 0; kend = ( ( N - 1 ) / kb + 1 ) * kb; kstep = kb;
315 GatherDir = CFORWARD; ScatterDir = CBACKWARD;
316 }
317 else
318 {
319 kstart = ( ( N - 1 ) / kb ) * kb; kend = kstep = -kb;
320 GatherDir = CBACKWARD; ScatterDir = CFORWARD;
321 }
322/*
323* Compute local information for A
324*/
325 if( notran ) { Am = N; An = K; } else { Am = K; An = N; }
326 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
327 &Arow, &Acol );
328 Aimb = DESCA[IMB_]; Ainb = DESCA[INB_];
329 Amb = DESCA[MB_ ]; Anb = DESCA[NB_ ]; Ald = DESCA[LLD_];
330
331 Aimb1 = PB_Cfirstnb( Am, IA, Aimb, Amb );
332 Amp0 = PB_Cnumroc( Am, 0, Aimb1, Amb, myrow, Arow, nprow );
333 Ainb1 = PB_Cfirstnb( An, JA, Ainb, Anb );
334 Anq0 = PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
335 if( ( Amp0 > 0 ) && ( Anq0 > 0 ) ) Aptr0 = Mptr( A, Aii, Ajj, Ald, size );
336
337 if( notran )
338 {
339 top = *PB_Ctop( &ctxt, COMBINE, ROW, TOP_GET );
340 Cinb = DESCC[INB_]; Cnb = DESCC[NB_]; Ccsrc = DESCC[CSRC_];
341
342 if( upper )
343 {
344 for( k = kstart; k != kend; k += kstep )
345 {
346 kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb;
347/*
348* Accumulate A( IA+k:IA+k+kbb-1, JA:JA+K-1 )
349*/
350 PB_CGatherV( TYPE, REUSE, &GatherDir, kbb, An, A, IA+k, JA, DESCA,
351 ROW, &Aptr, DBUFA, &Afr );
352/*
353* Replicate A( IA+k:IA+k+kbb-1, JA:JA+K-1 ) over A( IA:IA+k+kbb-1, JA:JA+K-1 )
354*/
355 PB_Cdescset( Ad0, ktmp, An, Aimb1, Ainb1, Amb, Anb, Arow, Acol,
356 ctxt, Ald );
357 PB_CInV( TYPE, NOCONJG, ROW, ktmp, An, Ad0, kbb, Aptr, 0, 0, DBUFA,
358 ROW, &WA, WAd, &WAfr );
359/*
360* WC := A( IA:IA+k+kbb-1, JA:JA+K-1 ) * A( IA+k:IA+k+kbb-1, JA:JA+K-1 )'
361*/
362 PB_COutV( TYPE, COLUMN, INIT, ktmp, An, Ad0, kbb, &WC, WCd, &WCfr,
363 &WCsum );
364 Amp = PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
365 if( ( Amp > 0 ) && ( Anq0 > 0 ) )
366 gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &tran ), &Amp, &kbb, &Anq0,
367 ALPHA, Aptr0, &Ald, WA, &WAd[LLD_], zero, WC, &WCd[LLD_] );
368 if( WAfr ) free( WA );
369 if( Afr ) free( Aptr );
370
371 if( WCsum )
372 {
373 WCd[CSRC_] = PB_Cindxg2p( JC + ( fwd ? k : ktmp - 1 ), Cinb,
374 Cnb, Ccsrc, Ccsrc, npcol );
375 if( Amp > 0 )
376 gsum2d( ctxt, ROW, &top, Amp, kbb, WC, WCd[LLD_], myrow,
377 WCd[CSRC_] );
378 }
379/*
380* Zero lower triangle of WC( k:k+kbb-1, 0:kbb-1 )
381*/
382 if( conjg )
383 PB_Cplapad( TYPE, LOWER, CONJG, kbb, kbb, zero, zero, WC,
384 k, 0, WCd );
385 else if( kbb > 1 )
386 PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WC,
387 k+1, 0, WCd );
388/*
389* Add WC to C( IC:IC+k+kbb-1, JC+k:JC+k+kbb-1 )
390*/
391 PB_CScatterV( TYPE, &ScatterDir, ktmp, kbb, WC, 0, 0, WCd, COLUMN,
392 one, C, IC, JC+k, DESCC, COLUMN );
393 if( WCfr ) free( WC );
394 }
395 }
396 else
397 {
398 for( k = kstart; k != kend; k += kstep )
399 {
400 ktmp = N - k; kbb = MIN( ktmp, kb );
401/*
402* Accumulate A( IA+k:IA+k+kbb-1, JA:JA+K-1 )
403*/
404 PB_CGatherV( TYPE, REUSE, &GatherDir, kbb, An, A, IA+k, JA, DESCA,
405 ROW, &Aptr, DBUFA, &Afr );
406/*
407* Replicate A( IA+k:IA+k+kbb-1, JA:JA+K-1 ) over A( IA+k:IA+N-1, JA:JA+K-1 )
408*/
409 Acurimb1 = PB_Cfirstnb( ktmp, IA+k, Aimb, Amb );
410 Acurrow = PB_Cindxg2p( k, Aimb1, Amb, Arow, Arow, nprow );
411 PB_Cdescset( Ad0, ktmp, An, Acurimb1, Ainb1, Amb, Anb, Acurrow,
412 Acol, ctxt, Ald );
413 PB_CInV( TYPE, NOCONJG, ROW, ktmp, An, Ad0, kbb, Aptr, 0, 0, DBUFA,
414 ROW, &WA, WAd, &WAfr );
415/*
416* WC := A( IA+k:IA+N-1, JA:JA+K-1 ) * A( IA+k:IA+k+kbb-1, JA:JA+K-1 )'
417*/
418 PB_COutV( TYPE, COLUMN, INIT, ktmp, An, Ad0, kbb, &WC, WCd, &WCfr,
419 &WCsum );
420 Amp = PB_Cnumroc( ktmp, k, Aimb1, Amb, myrow, Arow, nprow );
421 if( ( Amp > 0 ) && ( Anq0 > 0 ) )
422 gemm( C2F_CHAR( NOTRAN ), C2F_CHAR( &tran ), &Amp, &kbb, &Anq0,
423 ALPHA, Mptr( Aptr0, Amp0-Amp, 0, Ald, size ), &Ald, WA,
424 &WAd[LLD_], zero, WC, &WCd[LLD_] );
425 if( WAfr ) free( WA );
426 if( Afr ) free( Aptr );
427
428 if( WCsum )
429 {
430 WCd[CSRC_] = PB_Cindxg2p( JC + ( fwd ? k : k + kbb - 1 ), Cinb,
431 Cnb, Ccsrc, Ccsrc, npcol );
432 if( Amp > 0 )
433 gsum2d( ctxt, ROW, &top, Amp, kbb, WC, WCd[LLD_], myrow,
434 WCd[CSRC_] );
435 }
436/*
437* Zero upper triangle of WC
438*/
439 if( conjg )
440 PB_Cplapad( TYPE, UPPER, CONJG, kbb, kbb, zero, zero, WC,
441 0, 0, WCd );
442 else if( kbb > 1 )
443 PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WC,
444 0, 1, WCd );
445/*
446* Add WC to C( IC+k:IC+N-1, JC+k:JC+k+kbb-1 )
447*/
448 PB_CScatterV( TYPE, &ScatterDir, ktmp, kbb, WC, 0, 0, WCd, COLUMN,
449 one, C, IC+k, JC+k, DESCC, COLUMN );
450 if( WCfr ) free( WC );
451 }
452 }
453 }
454 else
455 {
456 top = *PB_Ctop( &ctxt, COMBINE, COLUMN, TOP_GET );
457 Cimb = DESCC[IMB_]; Cmb = DESCC[MB_]; Crsrc = DESCC[RSRC_];
458
459 if( upper )
460 {
461 for( k = kstart; k != kend; k += kstep )
462 {
463 ktmp = N - k; kbb = MIN( ktmp, kb );
464/*
465* Accumulate A( IA:IA+K-1, JA+k:JA+k+kbb-1 )
466*/
467 PB_CGatherV( TYPE, REUSE, &GatherDir, Am, kbb, A, IA, JA+k, DESCA,
468 COLUMN, &Aptr, DBUFA, &Afr );
469/*
470* Replicate A( IA:IA+K-1, JA+k:JA+k+kbb-1 ) over A( IA:IA+K-1, JA+k:JA+N-1 )
471*/
472 Acurinb1 = PB_Cfirstnb( ktmp, JA+k, Ainb, Anb );
473 Acurcol = PB_Cindxg2p( k, Ainb1, Anb, Acol, Acol, npcol );
474 PB_Cdescset( Ad0, Am, ktmp, Aimb1, Acurinb1, Amb, Anb, Arow,
475 Acurcol, ctxt, Ald );
476 PB_CInV( TYPE, NOCONJG, COLUMN, Am, ktmp, Ad0, kbb, Aptr, 0, 0,
477 DBUFA, COLUMN, &WA, WAd, &WAfr );
478/*
479* WC := A( IA:IA+K-1, JA+k:JA+k+kbb-1 )' * A( IA:IA+K-1,JA+k:JA+N-1 )
480*/
481 PB_COutV( TYPE, ROW, INIT, Am, ktmp, Ad0, kbb, &WC, WCd, &WCfr,
482 &WCsum );
483 Anq = PB_Cnumroc( ktmp, k, Ainb1, Anb, mycol, Acol, npcol );
484 if( ( Anq > 0 ) && ( Amp0 > 0 ) )
485 gemm( C2F_CHAR( &tran ), C2F_CHAR( NOTRAN ), &kbb, &Anq, &Amp0,
486 ALPHA, WA, &WAd[LLD_], Mptr( Aptr0, 0, Anq0-Anq, Ald,
487 size ), &Ald, zero, WC, &WCd[LLD_] );
488 if( WAfr ) free( WA );
489 if( Afr ) free( Aptr );
490
491 if( WCsum )
492 {
493 WCd[RSRC_] = PB_Cindxg2p( IC + ( fwd ? k : k + kbb - 1 ), Cimb,
494 Cmb, Crsrc, Crsrc, nprow );
495 if( Anq > 0 )
496 gsum2d( ctxt, COLUMN, &top, kbb, Anq, WC, WCd[LLD_],
497 WCd[RSRC_], mycol );
498 }
499/*
500* Zero lower triangle of WC
501*/
502 if( conjg )
503 PB_Cplapad( TYPE, LOWER, CONJG, kbb, kbb, zero, zero, WC,
504 0, 0, WCd );
505 else if( kbb > 1 )
506 PB_Cplapad( TYPE, LOWER, NOCONJG, kbb-1, kbb-1, zero, zero, WC,
507 1, 0, WCd );
508/*
509* Add WC to C( IC+k:IC+k+kbb-1, JC+k:JC+N-1 )
510*/
511 PB_CScatterV( TYPE, &ScatterDir, kbb, ktmp, WC, 0, 0, WCd, ROW, one,
512 C, IC+k, JC+k, DESCC, ROW );
513 if( WCfr ) free( WC );
514 }
515 }
516 else
517 {
518 for( k = kstart; k != kend; k += kstep )
519 {
520 kbb = N - k; kbb = MIN( kbb, kb ); ktmp = k + kbb;
521/*
522* Accumulate A( IA:IA+K-1, JA+k:JA+k+kbb-1 )
523*/
524 PB_CGatherV( TYPE, REUSE, &GatherDir, Am, kbb, A, IA, JA+k, DESCA,
525 COLUMN, &Aptr, DBUFA, &Afr );
526/*
527* Replicate A( IA:IA+K-1, JA+k:JA+k+kbb-1 ) over A( IA:IA+K-1, JA:JA+k+kbb-1 )
528*/
529 PB_Cdescset( Ad0, Am, ktmp, Aimb1, Ainb1, Amb, Anb, Arow, Acol,
530 ctxt, Ald );
531 PB_CInV( TYPE, NOCONJG, COLUMN, Am, ktmp, Ad0, kbb, Aptr, 0, 0,
532 DBUFA, COLUMN, &WA, WAd, &WAfr );
533/*
534* WC := A( IA:IA+K-1, JA+k:JA+k+kbb-1 )' * A( IA:IA+K-1, JA:JA+k+kbb-1 )
535*/
536 PB_COutV( TYPE, ROW, INIT, Am, ktmp, Ad0, kbb, &WC, WCd, &WCfr,
537 &WCsum );
538 Anq = PB_Cnumroc( ktmp, 0, Ainb1, Anb, mycol, Acol, npcol );
539 if( ( Anq > 0 ) && ( Amp0 > 0 ) )
540 gemm( C2F_CHAR( &tran ), C2F_CHAR( NOTRAN ), &kbb, &Anq, &Amp0,
541 ALPHA, WA, &WAd[LLD_], Aptr0, &Ald, zero, WC, &WCd[LLD_] );
542 if( WAfr ) free( WA );
543 if( Afr ) free( Aptr );
544
545 if( WCsum )
546 {
547 WCd[RSRC_] = PB_Cindxg2p( IC + ( fwd ? k : ktmp - 1 ), Cimb,
548 Cmb, Crsrc, Crsrc, nprow );
549 if( Anq > 0 )
550 gsum2d( ctxt, COLUMN, &top, kbb, Anq, WC, WCd[LLD_],
551 WCd[RSRC_], mycol );
552 }
553/*
554* Zero upper triangle of WC( 0:kbb-1, k:k+kbb-1 )
555*/
556 if( conjg )
557 PB_Cplapad( TYPE, UPPER, CONJG, kbb, kbb, zero, zero, WC,
558 0, k, WCd );
559 else if( kbb > 1 )
560 PB_Cplapad( TYPE, UPPER, NOCONJG, kbb-1, kbb-1, zero, zero, WC,
561 0, k+1, WCd );
562/*
563* Add WC to C( IC+k:IC+k+kbb-1, JC:JC+k+kbb-1 )
564*/
565 PB_CScatterV( TYPE, &ScatterDir, kbb, ktmp, WC, 0, 0, WCd, ROW, one,
566 C, IC+k, JC, DESCC, ROW );
567 if( WCfr ) free( WC );
568 }
569 }
570 }
571/*
572* End of PB_CpsyrkAC
573*/
574}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* GEMM_T)()
Definition pblas.h:317
void(* GSUM2D_T)()
Definition pblas.h:286
#define C2F_CHAR(a)
Definition pblas.h:125
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define COMBINE
Definition PBblacs.h:49
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define NOTRAN
Definition PBblas.h:44
#define CONJG
Definition PBblas.h:47
#define CCONJG
Definition PBblas.h:21
#define CBACKWARD
Definition PBblas.h:39
#define NOCONJG
Definition PBblas.h:45
#define CUPPER
Definition PBblas.h:26
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define LOWER
Definition PBblas.h:51
#define REUSE
Definition PBblas.h:67
#define CCOTRAN
Definition PBblas.h:22
#define INIT
Definition PBblas.h:61
#define UPPER
Definition PBblas.h:52
#define CFORWARD
Definition PBblas.h:38
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
void PB_CpsyrkAC()
Int PB_Cfirstnb()
#define MB_
Definition PBtools.h:43
void PB_Cinfog2l()
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
void PB_CGatherV()
Int PB_Cnumroc()
char * PB_Ctop()
void PB_CInV()
void PB_CScatterV()
void PB_Cplapad()
void PB_Cplascal()
#define RSRC_
Definition PBtools.h:45
void PB_Cdescset()
void PB_COutV()
#define INB_
Definition PBtools.h:42
#define CSRC_
Definition PBtools.h:46
#define IMB_
Definition PBtools.h:41
Int PB_Cindxg2p()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
#define TYPE
Definition clamov.c:7