SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_Cpgeadd.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_Cpgeadd( PBTYP_T * TYPE, char * DIRECA, char * DIRECC,
21 char * CONJUG, Int M, Int N, char * ALPHA, char * A,
22 Int IA, Int JA, Int * DESCA, char * BETA, char * C,
23 Int IC, Int JC, Int * DESCC )
24#else
25void PB_Cpgeadd( TYPE, DIRECA, DIRECC, CONJUG, M, N, ALPHA, A, IA, JA,
26 DESCA, BETA, C, IC, JC, DESCC )
27/*
28* .. Scalar Arguments ..
29*/
30 char * CONJUG, * DIRECA, * DIRECC;
31 Int IA, IC, JA, JC, M, N;
32 char * ALPHA, * BETA;
33 PBTYP_T * TYPE;
34/*
35* .. Array Arguments ..
36*/
37 Int * DESCA, * DESCC;
38 char * A, * C;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PB_Cpgeadd adds a matrix to another
46*
47* sub( C ) := beta*sub( C ) + alpha*op( sub( A ) )
48*
49* where
50*
51* sub( C ) denotes C(IC:IC+M-1,JC:JC+N-1), and, op( X ) is one of
52*
53* op( X ) = X or op( X ) = conjg( X ).
54*
55* Thus, op( sub( A ) ) denotes A(IA:IA+M-1,JA:JA+N-1) if CONJUG = 'N',
56* conjg(A(IA:IA+N-1,JA:JA+M-1)) if CONJUG = 'C'.
57*
58* Alpha and beta are scalars, sub( C ) and op( sub( A ) ) are m by n
59* submatrices.
60*
61* Notes
62* =====
63*
64* A description vector is associated with each 2D block-cyclicly dis-
65* tributed matrix. This vector stores the information required to
66* establish the mapping between a matrix entry and its corresponding
67* process and memory location.
68*
69* In the following comments, the character _ should be read as
70* "of the distributed matrix". Let A be a generic term for any 2D
71* block cyclicly distributed matrix. Its description vector is DESC_A:
72*
73* NOTATION STORED IN EXPLANATION
74* ---------------- --------------- ------------------------------------
75* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
76* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
77* the NPROW x NPCOL BLACS process grid
78* A is distributed over. The context
79* itself is global, but the handle
80* (the integer value) may vary.
81* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
82* ted matrix A, M_A >= 0.
83* N_A (global) DESCA[ N_ ] The number of columns in the distri-
84* buted matrix A, N_A >= 0.
85* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
86* block of the matrix A, IMB_A > 0.
87* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
88* left block of the matrix A,
89* INB_A > 0.
90* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
91* bute the last M_A-IMB_A rows of A,
92* MB_A > 0.
93* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
94* bute the last N_A-INB_A columns of
95* A, NB_A > 0.
96* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
97* row of the matrix A is distributed,
98* NPROW > RSRC_A >= 0.
99* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
100* first column of A is distributed.
101* NPCOL > CSRC_A >= 0.
102* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
103* array storing the local blocks of
104* the distributed matrix A,
105* IF( Lc( 1, N_A ) > 0 )
106* LLD_A >= MAX( 1, Lr( 1, M_A ) )
107* ELSE
108* LLD_A >= 1.
109*
110* Let K be the number of rows of a matrix A starting at the global in-
111* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
112* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
113* receive if these K rows were distributed over NPROW processes. If K
114* is the number of columns of a matrix A starting at the global index
115* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
116* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
117* these K columns were distributed over NPCOL processes.
118*
119* The values of Lr() and Lc() may be determined via a call to the func-
120* tion PB_Cnumroc:
121* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
122* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
123*
124* Arguments
125* =========
126*
127* TYPE (local input) pointer to a PBTYP_T structure
128* On entry, TYPE is a pointer to a structure of type PBTYP_T,
129* that contains type information (See pblas.h).
130*
131* DIRECA (global input) pointer to CHAR
132* On entry, DIRECA specifies the direction in which the rows
133* or columns of sub( A ) should be looped over as follows:
134* DIRECA = 'F' or 'f' forward or increasing,
135* DIRECA = 'B' or 'b' backward or decreasing.
136*
137* DIRECC (global input) pointer to CHAR
138* On entry, DIRECC specifies the direction in which the rows
139* or columns of sub( C ) should be looped over as follows:
140* DIRECC = 'F' or 'f' forward or increasing,
141* DIRECC = 'B' or 'b' backward or decreasing.
142*
143* CONJUG (global input) pointer to CHAR
144* On entry, CONJUG specifies whether conjg( sub( A ) ) or
145* sub( A ) should be added to sub( C ) as follows:
146* CONJUG = 'N' or 'n':
147* sub( C ) := beta*sub( C ) + alpha*sub( A )'
148* otherwise
149* sub( C ) := beta*sub( C ) + alpha*conjg( sub( A ) )'.
150*
151* M (global input) INTEGER
152* On entry, M specifies the number of rows of the submatrices
153* sub( A ) and sub( C ). M must be at least zero.
154*
155* N (global input) INTEGER
156* On entry, N specifies the number of columns of the submatri-
157* ces sub( A ) and sub( C ). N must be at least zero.
158*
159* ALPHA (global input) pointer to CHAR
160* On entry, ALPHA specifies the scalar alpha. When ALPHA is
161* supplied as zero then the local entries of the array A
162* corresponding to the entries of the submatrix sub( A ) need
163* not be set on input.
164*
165* A (local input) pointer to CHAR
166* On entry, A is an array of dimension (LLD_A, Ka), where Ka is
167* at least Lc( 1, JA+N-1 ). Before entry, this array contains
168* the local entries of the matrix A.
169*
170* IA (global input) INTEGER
171* On entry, IA specifies A's global row index, which points to
172* the beginning of the submatrix sub( A ).
173*
174* JA (global input) INTEGER
175* On entry, JA specifies A's global column index, which points
176* to the beginning of the submatrix sub( A ).
177*
178* DESCA (global and local input) INTEGER array
179* On entry, DESCA is an integer array of dimension DLEN_. This
180* is the array descriptor for the matrix A.
181*
182* BETA (global input) pointer to CHAR
183* On entry, BETA specifies the scalar beta. When BETA is
184* supplied as zero then the local entries of the array C
185* corresponding to the entries of the submatrix sub( C ) need
186* not be set on input.
187*
188* C (local input/local output) pointer to CHAR
189* On entry, C is an array of dimension (LLD_C, Kc), where Kc is
190* at least Lc( 1, JC+N-1 ). Before entry, this array contains
191* the local entries of the matrix C.
192* On exit, the entries of this array corresponding to the local
193* entries of the submatrix sub( C ) are overwritten by the
194* local entries of the m by n updated submatrix.
195*
196* IC (global input) INTEGER
197* On entry, IC specifies C's global row index, which points to
198* the beginning of the submatrix sub( C ).
199*
200* JC (global input) INTEGER
201* On entry, JC specifies C's global column index, which points
202* to the beginning of the submatrix sub( C ).
203*
204* DESCC (global and local input) INTEGER array
205* On entry, DESCC is an integer array of dimension DLEN_. This
206* is the array descriptor for the matrix C.
207*
208* -- Written on April 1, 1998 by
209* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
210*
211* ---------------------------------------------------------------------
212*/
213/*
214* .. Local Scalars ..
215*/
216 char ACroc, * one, * talpha, * tbeta, * zero;
217 Int ACmyprocD, ACmyprocR, ACnD, ACnR, ACnprocsD, ACnprocsR,
218 Abufld, AcurrocR, Afr, Afwd, AiD, AiR, AiiD, AiiR, AinbD,
219 AinbR, Ainb1D, Ainb1R, AisR, Akk, Ald, AnbD, AnbR, AnpD,
220 AnpR, Aoff, ArocD, ArocR, AsrcR, Cbufld, CcurrocR, Cfr,
221 Cfwd, CiD, CiR, CiiD, CiiR, CinbD, CinbR, Cinb1D, Cinb1R,
222 CisR, Ckk, Cld, CnbD, CnbR, CnpD, CnpR, Coff, CrocD, CrocR,
223 CsrcR, ctxt, k, kb, kbb, lcmb, maxp, maxpm1, maxpq, maxq,
224 mycol, myrow, npcol, npq, nprow, ncpq, nrpq, p=0, q=0,
225 row2row, size, tmp;
226 PB_VM_T VM;
227/*
228* .. Local Arrays ..
229*/
230 Int DBUFA[DLEN_], DBUFC[DLEN_];
231 char * Abuf = NULL, * Cbuf = NULL;
232/* ..
233* .. Executable Statements ..
234*
235*/
236/*
237* Retrieve process grid information
238*/
239 Cblacs_gridinfo( ( ctxt = DESCC[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
240/*
241* Loop over the rows of sub( C ) when M <= N, and the columns of sub( C )
242* otherwise.
243*/
244 row2row = ( ( M <= N ) || ( npcol == 1 ) || ( DESCA[CSRC_] == -1 ) );
245
246 if( row2row )
247 {
248 AinbR = DESCA[IMB_]; AnbR = DESCA[MB_]; AsrcR = DESCA[RSRC_];
249 CinbR = DESCC[IMB_]; CnbR = DESCC[MB_]; CsrcR = DESCC[RSRC_];
250/*
251* If sub( A ) and sub( C ) span only one process row, then there is no need
252* to pack the data.
253*/
254 if( !( PB_Cspan( M, IA, AinbR, AnbR, AsrcR, nprow ) ) &&
255 !( PB_Cspan( M, IC, CinbR, CnbR, CsrcR, nprow ) ) )
256 {
257 PB_Cpaxpby( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, ROW, BETA,
258 C, IC, JC, DESCC, ROW );
259 return;
260 }
261/*
262* Compute local information for sub( A ) and sub( C )
263*/
264 ACnR = M; ACnD = N;
265 ACmyprocR = myrow; ACnprocsR = nprow;
266 ACmyprocD = mycol; ACnprocsD = npcol; ACroc = CROW;
267 AiR = IA; AiD = JA;
268 AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_];
269 PB_Cinfog2l( IA, JA, DESCA, ACnprocsR, ACnprocsD, ACmyprocR, ACmyprocD,
270 &AiiR, &AiiD, &ArocR, &ArocD );
271 CiR = IC; CiD = JC;
272 CinbD = DESCC[INB_]; CnbD = DESCC[NB_]; Cld = DESCC[LLD_];
273 PB_Cinfog2l( IC, JC, DESCC, ACnprocsR, ACnprocsD, ACmyprocR, ACmyprocD,
274 &CiiR, &CiiD, &CrocR, &CrocD );
275 }
276 else
277 {
278 AinbR = DESCA[INB_]; AnbR = DESCA[NB_]; AsrcR = DESCA[CSRC_];
279 CinbR = DESCC[INB_]; CnbR = DESCC[NB_]; CsrcR = DESCC[CSRC_];
280/*
281* If sub( A ) and sub( C ) span only one process column, then there is no need
282* to pack the data.
283*/
284 if( !( PB_Cspan( N, JA, AinbR, AnbR, AsrcR, npcol ) ) &&
285 !( PB_Cspan( N, JC, CinbR, CnbR, CsrcR, npcol ) ) )
286 {
287 PB_Cpaxpby( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, COLUMN, BETA,
288 C, IC, JC, DESCC, COLUMN );
289 return;
290 }
291/*
292* Compute local information for sub( A ) and sub( C )
293*/
294 ACnR = N; ACnD = M;
295 ACmyprocR = mycol; ACnprocsR = npcol;
296 ACmyprocD = myrow; ACnprocsD = nprow; ACroc = CCOLUMN;
297 AiR = JA; AiD = IA;
298 AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_];
299 PB_Cinfog2l( IA, JA, DESCA, ACnprocsD, ACnprocsR, ACmyprocD, ACmyprocR,
300 &AiiD, &AiiR, &ArocD, &ArocR );
301 CiR = JC; CiD = IC;
302 CinbD = DESCC[IMB_]; CnbD = DESCC[MB_]; Cld = DESCC[LLD_];
303 PB_Cinfog2l( IC, JC, DESCC, ACnprocsD, ACnprocsR, ACmyprocD, ACmyprocR,
304 &CiiD, &CiiR, &CrocD, &CrocR );
305 }
306
307 size = TYPE->size; one = TYPE->one; zero = TYPE->zero;
308 kb = pilaenv_( &ctxt, C2F_CHAR( &TYPE->type ) );
309
310 Ainb1D = PB_Cfirstnb( ACnD, AiD, AinbD, AnbD );
311 AnpD = PB_Cnumroc( ACnD, 0, Ainb1D, AnbD, ACmyprocD, ArocD, ACnprocsD );
312 Ainb1R = PB_Cfirstnb( ACnR, AiR, AinbR, AnbR );
313 AisR = ( ( AsrcR < 0 ) || ( ACnprocsR == 1 ) );
314
315 Cinb1D = PB_Cfirstnb( ACnD, CiD, CinbD, CnbD );
316 CnpD = PB_Cnumroc( ACnD, 0, Cinb1D, CnbD, ACmyprocD, CrocD, ACnprocsD );
317 Cinb1R = PB_Cfirstnb( ACnR, CiR, CinbR, CnbR );
318 CisR = ( ( CsrcR < 0 ) || ( ACnprocsR == 1 ) );
319
320 lcmb = PB_Clcm( ( maxp = ( CisR ? 1 : ACnprocsR ) ) * CnbR,
321 ( maxq = ( AisR ? 1 : ACnprocsR ) ) * AnbR );
322
323 Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD );
324 Cfwd = ( Mupcase( DIRECC[0] ) == CFORWARD );
325/*
326* When sub( A ) is not replicated and backward pass on sub( A ), find the
327* virtual process q owning the last row or column of sub( A ).
328*/
329 if( !( AisR ) && !( Afwd ) )
330 {
331 tmp = PB_Cindxg2p( ACnR-1, Ainb1R, AnbR, ArocR, ArocR, ACnprocsR );
332 q = MModSub( tmp, ArocR, ACnprocsR );
333 }
334/*
335* When sub( C ) is not replicated and backward pass on sub( C ), find the
336* virtual process p owning the last row or column of sub( C ).
337*/
338 if( !( CisR ) && !( Cfwd ) )
339 {
340 tmp = PB_Cindxg2p( ACnR-1, Cinb1R, CnbR, CrocR, CrocR, ACnprocsR );
341 p = MModSub( tmp, CrocR, ACnprocsR );
342 }
343/*
344* Loop over the processes of the virtual grid
345*/
346 maxpm1 = maxp - 1; maxpq = maxp * maxq;
347
348 for( k = 0; k < maxpq; k++ )
349 {
350 AcurrocR = ( AisR ? -1 : MModAdd( ArocR, q, ACnprocsR ) );
351 CcurrocR = ( CisR ? -1 : MModAdd( CrocR, p, ACnprocsR ) );
352
353 if( ( AisR || ( ACmyprocR == AcurrocR ) ) ||
354 ( CisR || ( ACmyprocR == CcurrocR ) ) )
355 {
356 Ckk = CiiR; Akk = AiiR;
357/*
358* Initialize local virtual matrix in process (p,q)
359*/
360 AnpR = PB_Cnumroc( ACnR, 0, Ainb1R, AnbR, AcurrocR, ArocR, ACnprocsR );
361 CnpR = PB_Cnumroc( ACnR, 0, Cinb1R, CnbR, CcurrocR, CrocR, ACnprocsR );
362 PB_CVMinit( &VM, 0, CnpR, AnpR, Cinb1R, Ainb1R, CnbR, AnbR, p, q,
363 maxp, maxq, lcmb );
364/*
365* Figure out how many diagonal entries in this new virtual process (npq).
366*/
367 npq = PB_CVMnpq( &VM );
368/*
369* Re-adjust the number of rows or columns to be (un)packed, in order to average
370* the message sizes.
371*/
372 if( npq ) kbb = npq / ( ( npq - 1 ) / kb + 1 );
373
374 if( row2row )
375 {
376 while( npq )
377 {
378 kbb = MIN( kbb, npq );
379/*
380* Find out how many rows of sub( A ) and sub( C ) are contiguous
381*/
382 PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff );
383/*
384* Compute the descriptor DBUFA for the buffer that will contained the packed
385* rows of sub( A ).
386*/
387 if( ( Afr = ( ncpq < kbb ) ) != 0 )
388 {
389/*
390* If rows of sub( A ) are not contiguous, then allocate the buffer and pack
391* the kbb rows of sub( A ).
392*/
393 Abufld = kbb;
394 if( AisR || ( ACmyprocR == AcurrocR ) )
395 {
396 Abuf = PB_Cmalloc( AnpD * kbb * size );
397 PB_CVMpack( TYPE, &VM, COLUMN, &ACroc, PACKING, NOTRAN,
398 kbb, AnpD, one, Mptr( A, Akk, AiiD, Ald,
399 size ), Ald, zero, Abuf, Abufld );
400 }
401 }
402 else
403 {
404/*
405* Otherwise, re-use sub( A ) directly.
406*/
407 Abufld = Ald;
408 if( AisR || ( ACmyprocR == AcurrocR ) )
409 Abuf = Mptr( A, Akk+Aoff, AiiD, Ald, size );
410 }
411 PB_Cdescset( DBUFA, kbb, ACnD, kbb, Ainb1D, kbb, AnbD, AcurrocR,
412 ArocD, ctxt, Abufld );
413/*
414* Compute the descriptor DBUFC for the buffer that will contained the packed
415* rows of sub( C ). Allocate it.
416*/
417 if( ( Cfr = ( nrpq < kbb ) ) != 0 )
418 {
419/*
420* If rows of sub( C ) are not contiguous, then allocate receiving buffer.
421*/
422 Cbufld = kbb; talpha = one; tbeta = zero;
423 if( CisR || ( ACmyprocR == CcurrocR ) )
424 Cbuf = PB_Cmalloc( CnpD * kbb * size );
425 }
426 else
427 {
428/*
429* Otherwise, re-use sub( C ) directly.
430*/
431 Cbufld = Cld; talpha = ALPHA; tbeta = BETA;
432 if( CisR || ( ACmyprocR == CcurrocR ) )
433 Cbuf = Mptr( C, Ckk+Coff, CiiD, Cld, size );
434 }
435 PB_Cdescset( DBUFC, kbb, ACnD, kbb, Cinb1D, kbb, CnbD, CcurrocR,
436 CrocD, ctxt, Cbufld );
437/*
438* Add the one-dimensional buffer Abuf into Cbuf.
439*/
440 PB_Cpaxpby( TYPE, CONJUG, kbb, ACnD, talpha, Abuf, 0, 0, DBUFA,
441 &ACroc, tbeta, Cbuf, 0, 0, DBUFC, &ACroc );
442/*
443* Release the buffer containing the packed rows of sub( A )
444*/
445 if( Afr && ( AisR || ( ACmyprocR == AcurrocR ) ) )
446 if( Abuf ) free( Abuf );
447/*
448* Unpack the kbb rows of sub( C ) and release the buffer containing them.
449*/
450 if( Cfr && ( CisR || ( ACmyprocR == CcurrocR ) ) )
451 {
452 PB_CVMpack( TYPE, &VM, ROW, &ACroc, UNPACKING, NOTRAN, kbb,
453 CnpD, BETA, Mptr( C, Ckk, CiiD, Cld, size ), Cld,
454 ALPHA, Cbuf, Cbufld );
455 if( Cbuf ) free( Cbuf );
456 }
457/*
458* Update the local row indexes of sub( A ) and sub( C )
459*/
460 PB_CVMupdate( &VM, kbb, &Ckk, &Akk );
461 npq -= kbb;
462 }
463 }
464 else
465 {
466 while( npq )
467 {
468 kbb = MIN( kbb, npq );
469/*
470* Find out how many columns of sub( A ) and sub( C ) are contiguous
471*/
472 PB_CVMcontig( &VM, &nrpq, &ncpq, &Coff, &Aoff );
473/*
474* Compute the descriptor DBUFA for the buffer that will contained the packed
475* columns of sub( A ).
476*/
477 if( ( Afr = ( ncpq < kbb ) ) != 0 )
478 {
479/*
480* If columns of sub( A ) are not contiguous, then allocate the buffer and
481* pack the kbb columns of sub( A ).
482*/
483 Abufld = MAX( 1, AnpD );
484 if( AisR || ( ACmyprocR == AcurrocR ) )
485 {
486 Abuf = PB_Cmalloc( AnpD * kbb * size );
487 PB_CVMpack( TYPE, &VM, COLUMN, &ACroc, PACKING, NOTRAN,
488 kbb, AnpD, one, Mptr( A, AiiD, Akk, Ald,
489 size ), Ald, zero, Abuf, Abufld );
490 }
491 }
492 else
493 {
494/*
495* Otherwise, re-use sub( A ) directly.
496*/
497 Abufld = Ald;
498 if( AisR || ( ACmyprocR == AcurrocR ) )
499 Abuf = Mptr( A, AiiD, Akk+Aoff, Ald, size );
500 }
501 PB_Cdescset( DBUFA, ACnD, kbb, Ainb1D, kbb, AnbD, kbb, ArocD,
502 AcurrocR, ctxt, Abufld );
503/*
504* Compute the descriptor DBUFC for the buffer that will contained the packed
505* columns of sub( C ). Allocate it.
506*/
507 if( ( Cfr = ( nrpq < kbb ) ) != 0 )
508 {
509/*
510* If columns of sub( C ) are not contiguous, then allocate receiving buffer.
511*/
512 Cbufld = MAX( 1, CnpD ); talpha = one; tbeta = zero;
513 if( CisR || ( ACmyprocR == CcurrocR ) )
514 Cbuf = PB_Cmalloc( CnpD * kbb * size );
515 }
516 else
517 {
518 Cbufld = Cld; talpha = ALPHA; tbeta = BETA;
519 if( CisR || ( ACmyprocR == CcurrocR ) )
520 Cbuf = Mptr( C, CiiD, Ckk+Coff, Cld, size );
521 }
522 PB_Cdescset( DBUFC, ACnD, kbb, Cinb1D, kbb, CnbD, kbb, CrocD,
523 CcurrocR, ctxt, Cbufld );
524/*
525* Add the one-dimensional buffer Abuf into Cbuf.
526*/
527 PB_Cpaxpby( TYPE, CONJUG, ACnD, kbb, talpha, Abuf, 0, 0, DBUFA,
528 &ACroc, tbeta, Cbuf, 0, 0, DBUFC, &ACroc );
529/*
530* Release the buffer containing the packed columns of sub( A )
531*/
532 if( Afr && ( AisR || ( ACmyprocR == AcurrocR ) ) )
533 if( Abuf ) free( Abuf );
534/*
535* Unpack the kbb columns of sub( C ) and release the buffer containing them.
536*/
537 if( Cfr && ( CisR || ( ACmyprocR == CcurrocR ) ) )
538 {
539 PB_CVMpack( TYPE, &VM, ROW, &ACroc, UNPACKING, NOTRAN, kbb,
540 CnpD, BETA, Mptr( C, CiiD, Ckk, Cld, size ), Cld,
541 ALPHA, Cbuf, Cbufld );
542 if( Cbuf ) free( Cbuf );
543 }
544/*
545* Update the local row index of sub( A ) and the local column index of sub( C )
546*/
547 PB_CVMupdate( &VM, kbb, &Ckk, &Akk );
548 npq -= kbb;
549 }
550 }
551 }
552/*
553* Go to the next virtual process (p,q)
554*/
555 if( ( Cfwd && ( p == maxpm1 ) ) || ( !( Cfwd ) && ( p == 0 ) ) )
556 q = ( Afwd ? MModAdd1( q, maxq ) : MModSub1( q, maxq ) );
557 p = ( Cfwd ? MModAdd1( p, maxp ) : MModSub1( p, maxp ) );
558 }
559/*
560* End of PB_Cpgeadd
561*/
562}
#define Int
Definition Bconfig.h:22
#define C2F_CHAR(a)
Definition pblas.h:125
#define CCOLUMN
Definition PBblacs.h:20
#define COLUMN
Definition PBblacs.h:45
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define NOTRAN
Definition PBblas.h:44
#define CFORWARD
Definition PBblas.h:38
#define pilaenv_
Definition PBpblas.h:44
#define CTXT_
Definition PBtools.h:38
#define UNPACKING
Definition PBtools.h:54
void PB_CVMinit()
Int PB_Cfirstnb()
#define MAX(a_, b_)
Definition PBtools.h:77
#define MB_
Definition PBtools.h:43
char * PB_Cmalloc()
void PB_Cinfog2l()
#define MModSub(I1, I2, d)
Definition PBtools.h:102
#define PACKING
Definition PBtools.h:53
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
Int PB_CVMpack()
void PB_CVMupdate()
#define RSRC_
Definition PBtools.h:45
void PB_Cpgeadd()
void PB_Cdescset()
#define MModAdd1(I, d)
Definition PBtools.h:100
#define MModAdd(I1, I2, d)
Definition PBtools.h:97
#define INB_
Definition PBtools.h:42
Int PB_CVMnpq()
#define MModSub1(I, d)
Definition PBtools.h:105
#define CSRC_
Definition PBtools.h:46
Int PB_Clcm()
#define IMB_
Definition PBtools.h:41
Int PB_Cindxg2p()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
void PB_CVMcontig()
void PB_Cpaxpby()
Int PB_Cspan()
#define TYPE
Definition clamov.c:7