SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_Cpaxpby.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_Cpaxpby( PBTYP_T * TYPE, char * CONJUG, Int M, Int N,
21 char * ALPHA,
22 char * A, Int IA, Int JA, Int * DESCA, char * AROC,
23 char * BETA,
24 char * B, Int IB, Int JB, Int * DESCB, char * BROC )
25#else
26void PB_Cpaxpby( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC,
27 BETA, B, IB, JB, DESCB, BROC )
28/*
29* .. Scalar Arguments ..
30*/
31 char * AROC, * BROC, * CONJUG;
32 Int IA, IB, JA, JB, M, N;
33 char * ALPHA, * BETA;
34 PBTYP_T * TYPE;
35/*
36* .. Array Arguments ..
37*/
38 Int * DESCA, * DESCB;
39 char * A, * B;
40#endif
41{
42/*
43* Purpose
44* =======
45*
46* PB_Cpaxpby adds one submatrix to another,
47*
48* sub( B ) := beta * sub( B ) + alpha * sub( A ), or,
49*
50* sub( B ) := beta * sub( B ) + alpha * conjg( sub( A ) ),
51*
52* where both submatrices are distributed along one dimension; sub( A )
53* always denotes A(IA:IA+M-1,JA:JA+N-1). When AROC is 'R' or 'r'
54* sub( A ) is distributed along a process row, otherwise sub( A )
55* is distributed along a process column. When sub( A ) is distributed
56* along a process row and BROC is 'R' or 'r' or sub( A ) is distributed
57* along a process column and BROC is 'C' or 'c', then sub( B ) denotes
58* B(IB:IB+M-1,JB:JB+N-1), and B(IB:IB+N-1,JB:JB+M-1) otherwise.
59*
60* Notes
61* =====
62*
63* A description vector is associated with each 2D block-cyclicly dis-
64* tributed matrix. This vector stores the information required to
65* establish the mapping between a matrix entry and its corresponding
66* process and memory location.
67*
68* In the following comments, the character _ should be read as
69* "of the distributed matrix". Let A be a generic term for any 2D
70* block cyclicly distributed matrix. Its description vector is DESC_A:
71*
72* NOTATION STORED IN EXPLANATION
73* ---------------- --------------- ------------------------------------
74* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
75* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
76* the NPROW x NPCOL BLACS process grid
77* A is distributed over. The context
78* itself is global, but the handle
79* (the integer value) may vary.
80* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
81* ted matrix A, M_A >= 0.
82* N_A (global) DESCA[ N_ ] The number of columns in the distri-
83* buted matrix A, N_A >= 0.
84* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
85* block of the matrix A, IMB_A > 0.
86* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
87* left block of the matrix A,
88* INB_A > 0.
89* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
90* bute the last M_A-IMB_A rows of A,
91* MB_A > 0.
92* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
93* bute the last N_A-INB_A columns of
94* A, NB_A > 0.
95* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
96* row of the matrix A is distributed,
97* NPROW > RSRC_A >= 0.
98* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
99* first column of A is distributed.
100* NPCOL > CSRC_A >= 0.
101* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
102* array storing the local blocks of
103* the distributed matrix A,
104* IF( Lc( 1, N_A ) > 0 )
105* LLD_A >= MAX( 1, Lr( 1, M_A ) )
106* ELSE
107* LLD_A >= 1.
108*
109* Let K be the number of rows of a matrix A starting at the global in-
110* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
111* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
112* receive if these K rows were distributed over NPROW processes. If K
113* is the number of columns of a matrix A starting at the global index
114* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
115* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
116* these K columns were distributed over NPCOL processes.
117*
118* The values of Lr() and Lc() may be determined via a call to the func-
119* tion PB_Cnumroc:
120* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
121* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
122*
123* Arguments
124* =========
125*
126* TYPE (local input) pointer to a PBTYP_T structure
127* On entry, TYPE is a pointer to a structure of type PBTYP_T,
128* that contains type information (See pblas.h).
129*
130* CONJUG (global input) pointer to CHAR
131* On entry, CONJUG specifies whether conjg( sub( A ) ) or
132* sub( A ) should be added to sub( B ) as follows:
133* CONJUG = 'N' or 'n':
134* sub( B ) := beta*sub( B ) + alpha*sub( A ),
135* otherwise
136* sub( B ) := beta*sub( B ) + alpha*conjg( sub( A ) ).
137*
138* M (global input) INTEGER
139* On entry, M specifies the number of rows of the submatrix
140* sub( A ). M must be at least zero.
141*
142* N (global input) INTEGER
143* On entry, N specifies the number of columns of the submatrix
144* sub( A ). N must be at least zero.
145*
146* ALPHA (global input) pointer to CHAR
147* On entry, ALPHA specifies the scalar alpha. When ALPHA is
148* supplied as zero then the local entries of the array A cor-
149* responding to the entries of the submatrix sub( A ) need not
150* be set on input.
151*
152* A (local input) pointer to CHAR
153* On entry, A is an array of dimension (LLD_A, Ka), where LLD_A
154* is at least MAX( 1, Lr( 1, IA+M-1 ) ), and, Ka is at least
155* Lc( 1, JA+N-1 ). Before entry, this array contains the local
156* entries of the matrix A.
157*
158* IA (global input) INTEGER
159* On entry, IA specifies A's global row index, which points to
160* the beginning of the submatrix sub( A ).
161*
162* JA (global input) INTEGER
163* On entry, JA specifies A's global column index, which points
164* to the beginning of the submatrix sub( A ).
165*
166* DESCA (global and local input) INTEGER array
167* On entry, DESCA is an integer array of dimension DLEN_. This
168* is the array descriptor for the matrix A.
169*
170* AROC (global input) pointer to CHAR
171* On entry, AROC specifies the orientation of the subvector
172* sub( A ). When AROC is 'R' or 'r', sub( A ) is a row vector,
173* and a column vector otherwise.
174*
175* BETA (global input) pointer to CHAR
176* On entry, BETA specifies the scalar beta. When BETA is sup-
177* plied as zero then the local entries of the array B corres-
178* ponding to the entries of the submatrix sub( B ) need not be
179* set on input.
180*
181* B (local input/local output) pointer to CHAR
182* On entry, B is an array of dimension (LLD_B, Kb), where LLD_B
183* is at least MAX( 1, Lr( 1, IB+M-1 ) ) when sub( A ) and
184* sub( B ) are both distributed along a process column or a
185* process row. In that case, Kb is at least Lc( 1, JB+N-1 ).
186* Otherwise, LLD_B is at least MAX( 1, Lr( 1, IB+N-1 ) ) and
187* Kb is at least Lc( 1, JB+M-1 ). Before entry, this array
188* contains the local entries of the matrix B. On exit, sub( B )
189* is overwritten with the updated submatrix.
190*
191* IB (global input) INTEGER
192* On entry, IB specifies B's global row index, which points to
193* the beginning of the submatrix sub( B ).
194*
195* JB (global input) INTEGER
196* On entry, JB specifies B's global column index, which points
197* to the beginning of the submatrix sub( B ).
198*
199* DESCB (global and local input) INTEGER array
200* On entry, DESCB is an integer array of dimension DLEN_. This
201* is the array descriptor for the matrix B.
202*
203* BROC (global input) pointer to CHAR
204* On entry, BROC specifies the orientation of the subvector
205* sub( B ). When BROC is 'R' or 'r', sub( B ) is a row vector,
206* and a column vector otherwise.
207*
208* -- Written on April 1, 1998 by
209* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
210*
211* ---------------------------------------------------------------------
212*/
213/*
214* .. Local Scalars ..
215*/
216 char ascope, bscope, * buf = NULL, * one, * top, tran, * zero;
217 Int Acol, Aii, AinbD, Ainb1D, AisD, AisR, AisRow, AiD, Ajj, Ald,
218 AmyprocD, AmyprocR, AnbD, AnD, AnR, AnpD, AnprocsD, AnprocsR,
219 AprocD, AprocR, Aroc, Arow, Bcol, Bii, BinbD, Binb1D, BisD,
220 BisR, BisRow, BiD, Bjj, Bld, BmyprocD, BmyprocR, BnbD, BnD,
221 BnR, BnpD, BnprocsD, BnprocsR, BprocD, BprocR, Broc, Brow,
222 BsrcD, OneBlock, OneDgrid, RRorCC, Square, cdst, csrc, ctxt,
223 dst, gcdPQ, k, l, lcmPQ, lcmb, ma, mb, mycol, myrow, na, nb,
224 npcol, npq, nprow, p, q, rdst, rsrc, size, src;
225 PB_VM_T VM;
226 MMADD_T add;
227/* ..
228* .. Executable Statements ..
229*
230*/
231/*
232* Quick return if possible
233*/
234 if( ( M <= 0 ) || ( N <= 0 ) ) return;
235/*
236* Retrieve process grid information
237*/
238 Cblacs_gridinfo( ( ctxt = DESCA[ CTXT_ ] ), &nprow, &npcol, &myrow, &mycol );
239/*
240* Determine if sub( A ) is distributed or not
241*/
242 if( ( AisRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 )
243 AisD = ( ( DESCA[CSRC_] >= 0 ) && ( ( AnprocsD = npcol ) > 1 ) );
244 else
245 AisD = ( ( DESCA[RSRC_] >= 0 ) && ( ( AnprocsD = nprow ) > 1 ) );
246/*
247* Determine if sub( B ) is distributed or not
248*/
249 if( ( BisRow = ( Mupcase( BROC[0] ) == CROW ) ) != 0 )
250 BisD = ( ( DESCB[CSRC_] >= 0 ) && ( ( BnprocsD = npcol ) > 1 ) );
251 else
252 BisD = ( ( DESCB[RSRC_] >= 0 ) && ( ( BnprocsD = nprow ) > 1 ) );
253/*
254* AisD && BisD <=> both operands are indeed distributed
255*/
256 if( AisD && BisD )
257 {
258/*
259* Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
260*/
261 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow,
262 &Acol );
263 if( AisRow )
264 {
265 AinbD = DESCA[INB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_];
266 AiD = JA; AnD = N; AnR = M;
267 AprocD = Acol; AmyprocD = mycol;
268 AprocR = Arow; AmyprocR = myrow; AnprocsR = nprow;
269 AisR = ( ( DESCA[ RSRC_ ] == -1 ) || ( AnprocsR == 1 ) );
270 }
271 else
272 {
273 AinbD = DESCA[IMB_]; AnbD = DESCA[MB_]; Ald = DESCA[LLD_];
274 AiD = IA; AnD = M; AnR = N;
275 AprocD = Arow; AmyprocD = myrow;
276 AprocR = Acol; AmyprocR = mycol; AnprocsR = npcol;
277 AisR = ( ( DESCA[ CSRC_ ] == -1 ) || ( AnprocsR == 1 ) );
278 }
279 Ainb1D = PB_Cfirstnb( AnD, AiD, AinbD, AnbD );
280/*
281* Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ...
282*/
283 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow,
284 &Bcol );
285 if( BisRow )
286 {
287 BinbD = DESCB[ INB_ ]; BnbD = DESCB[ NB_ ];
288 BsrcD = DESCB[ CSRC_ ]; Bld = DESCB[ LLD_ ];
289 BiD = JB;
290 if( AisRow ) { BnD = N; BnR = M; } else { BnD = M; BnR = N; }
291 BprocD = Bcol; BmyprocD = mycol;
292 BprocR = Brow; BmyprocR = myrow; BnprocsR = nprow;
293 BisR = ( ( DESCB[ RSRC_ ] == -1 ) || ( BnprocsR == 1 ) );
294 }
295 else
296 {
297 BinbD = DESCB[ IMB_ ]; BnbD = DESCB[ MB_ ];
298 BsrcD = DESCB[ RSRC_ ]; Bld = DESCB[ LLD_ ];
299 BiD = IB;
300 if( AisRow ) { BnD = N; BnR = M; } else { BnD = M; BnR = N; }
301 BprocD = Brow; BmyprocD = myrow;
302 BprocR = Bcol; BmyprocR = mycol; BnprocsR = npcol;
303 BisR = ( ( DESCB[ CSRC_ ] == -1 ) || ( BnprocsR == 1 ) );
304 }
305 Binb1D = PB_Cfirstnb( BnD, BiD, BinbD, BnbD );
306/*
307* Are sub( A ) and sub( B ) both row or column vectors ?
308*/
309 RRorCC = ( ( AisRow && BisRow ) || ( !( AisRow ) && !( BisRow ) ) );
310/*
311* Do sub( A ) and sub( B ) span more than one process ?
312*/
313 OneDgrid = ( ( AnprocsD == 1 ) && ( BnprocsD == 1 ) );
314 OneBlock = ( ( Ainb1D >= AnD ) && ( Binb1D >= BnD ) );
315/*
316* Are sub( A ) and sub( B ) distributed in the same manner ?
317*/
318 Square = ( ( Ainb1D == Binb1D ) && ( AnbD == BnbD ) &&
319 ( AnprocsD == BnprocsD ) );
320
321 if( !( AisR ) )
322 {
323/*
324* sub( A ) is distributed but not replicated
325*/
326 if( BisR )
327 {
328/*
329* If sub( A ) is not replicated, but sub( B ) is, a process row or column
330* BprocR need to be selected. It will contain the non-replicated vector to
331* add sub( A ) to.
332*/
333 if( RRorCC )
334 {
335/*
336* sub( A ) and sub( B ) are both row or column vectors
337*/
338 if( ( OneDgrid || OneBlock || Square ) && ( AprocD == BprocD ) )
339 {
340/*
341* sub( A ) and sub( B ) start in the same process row or column AprocD=BprocD.
342* Enforce a purely local operation by choosing BprocR to be equal to AprocR.
343*/
344 BprocR = AprocR;
345 }
346 else
347 {
348/*
349* Otherwise, communication has to occur, so choose the next process row or
350* column for BprocR to maximize the number of links, i.e reduce contention.
351*/
352 BprocR = MModAdd1( AprocR, AnprocsR );
353 }
354 }
355 else
356 {
357/*
358* sub( A ) and sub( B ) are distributed in orthogonal directions, what is
359* chosen for YprocR does not really matter. Select the process origin.
360*/
361 BprocR = AprocD;
362 }
363 }
364 else
365 {
366/*
367* Neither sub( A ) nor sub( B ) are replicated. If I am not in process row or
368* column AprocR and not in process row or column BprocR, then quick return.
369*/
370 if( ( AmyprocR != AprocR ) && ( BmyprocR != BprocR ) )
371 return;
372 }
373 }
374 else
375 {
376/*
377* sub( A ) is distributed and replicated (so no quick return possible)
378*/
379 if( BisR )
380 {
381/*
382* sub( B ) is distributed and replicated as well
383*/
384 if( RRorCC )
385 {
386/*
387* sub( A ) and sub( B ) are both row or column vectors
388*/
389 if( ( OneDgrid || OneBlock || Square ) && ( AprocD == BprocD ) )
390 {
391/*
392* sub( A ) and sub( B ) start in the same process row or column AprocD=BprocD.
393* Enforce a purely local operation by choosing AprocR and BprocR to be equal
394* to zero.
395*/
396 AprocR = BprocR = 0;
397 }
398 else
399 {
400/*
401* Otherwise, communication has to occur, so select BprocR to be zero and the
402* next process row or column for AprocR in order to maximize the number of
403* used links, i.e reduce contention.
404*/
405 BprocR = 0;
406 AprocR = MModAdd1( BprocR, BnprocsR );
407 }
408 }
409 else
410 {
411/*
412* sub( A ) and sub( B ) are distributed in orthogonal directions, select the
413* origin processes.
414*/
415 AprocR = BprocD;
416 BprocR = AprocD;
417 }
418 }
419 else
420 {
421/*
422* sub( B ) is distributed, but not replicated
423*/
424 if( RRorCC )
425 {
426/*
427* sub( A ) and sub( B ) are both row or column vectors
428*/
429 if( ( OneDgrid || OneBlock || Square ) && ( AprocD == BprocD ) )
430 {
431/*
432* sub( A ) and sub( B ) start in the same process row or column AprocD=BprocD.
433* Enforce a purely local operation by choosing AprocR to be equal to BprocR.
434*/
435 AprocR = BprocR;
436 if( ( AmyprocR != AprocR ) && ( BmyprocR != BprocR ) ) return;
437 }
438 else
439 {
440/*
441* Otherwise, communication has to occur, so choose the next process row or
442* column for AprocR to maximize the number of links, i.e reduce contention.
443*/
444 AprocR = MModAdd1( BprocR, BnprocsR );
445 }
446 }
447 else
448 {
449/*
450* sub( A ) and sub( B ) are distributed in orthogonal directions, what is
451* chosen for AprocR does not really matter. Select the origin process.
452*/
453 AprocR = BprocD;
454 if( ( OneDgrid || OneBlock || Square ) &&
455 ( AmyprocR != AprocR ) && ( BmyprocR != BprocR ) ) return;
456 }
457 }
458 }
459/*
460* Even if sub( A ) and/or sub( B ) are replicated, only two process row or
461* column are active, namely AprocR and BprocR. If any of those operands is
462* replicated, broadcast will occur (unless there is an easy way out).
463*/
464 size = TYPE->size;
465/*
466* A purely local operation occurs iff the operands start in the same process
467* and, if either the grid is mono-dimensional or there is a single local block
468* to be added or if both operands are aligned.
469*/
470 if( ( ( RRorCC && ( AprocD == BprocD ) &&
471 ( AisR || BisR || ( AprocR == BprocR ) ) ) ||
472 ( !( RRorCC ) && ( BisR || ( AprocD == BprocR ) ) &&
473 ( AisR || ( AprocR == BprocD ) ) ) ) &&
474 ( OneDgrid || OneBlock || ( RRorCC && Square ) ) )
475 {
476 if( ( !AisR && ( AmyprocR == AprocR ) ) ||
477 ( AisR && ( BisR || BmyprocR == BprocR ) ) )
478 {
479 AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, AmyprocD, AprocD,
480 AnprocsD );
481 BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, BmyprocD, BprocD,
482 BnprocsD );
483 if( ( AnpD > 0 ) && ( BnpD > 0 ) )
484 {
485/*
486* Select the local add routine accordingly to RRorCC
487*/
488 if( RRorCC )
489 {
490 if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd;
491 else add = TYPE->Fmmadd;
492 }
493 else
494 {
495 if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd;
496 else add = TYPE->Fmmtadd;
497 }
498/*
499* Local addition
500*/
501 if( AisRow )
502 add( &AnR, &AnpD, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald,
503 BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld );
504 else
505 add( &AnpD, &AnR, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald,
506 BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld );
507 }
508 }
509 if( RRorCC && AisR && BisR ) return;
510 }
511 else if( ( RRorCC && OneDgrid ) || OneBlock || Square )
512 {
513/*
514* Otherwise, it may be possible to add the distributed vectors in a single
515* message exchange iff the grid is mono-dimensional and the operands are
516* distributed in the same direction, or there is just one block to be exchanged
517* or if both operands are similarly distributed in their respective direction.
518*/
519 if( RRorCC )
520 {
521 if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd;
522 else add = TYPE->Fmmadd;
523 }
524 else
525 {
526 if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd;
527 else add = TYPE->Fmmtadd;
528 }
529
530 if( ( AisR && BisR ) || ( AmyprocR == AprocR ) )
531 {
532
533 AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, AmyprocD, AprocD,
534 AnprocsD );
535 if( AnpD > 0 )
536 {
537 dst = BprocD + MModSub( AmyprocD, AprocD, AnprocsD );
538 dst = MPosMod( dst, BnprocsD );
539 if( AisRow ) { ma = AnR; na = AnpD; }
540 else { ma = AnpD; na = AnR; }
541 if( !( AisR && BisR ) )
542 {
543 if( BisRow ) { rdst = BprocR; cdst = dst; }
544 else { rdst = dst; cdst = BprocR; }
545 }
546 else
547 {
548 if( BisRow )
549 {
550 if( !AisRow ) { rdst = AmyprocR; }
551 else { rdst = MModAdd1( BmyprocR, BnprocsR ); }
552 cdst = dst;
553 }
554 else
555 {
556 rdst = dst;
557 if( AisRow ) { cdst = AmyprocR; }
558 else { cdst = MModAdd1( BmyprocR, BnprocsR ); }
559 }
560 }
561 if( ( myrow == rdst ) && ( mycol == cdst ) )
562 {
563 add( &ma, &na, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald,
564 BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld );
565 }
566 else
567 {
568 TYPE->Cgesd2d( ctxt, ma, na, Mptr( A, Aii, Ajj, Ald, size ),
569 Ald, rdst, cdst );
570 }
571 }
572 }
573
574 if( ( AisR && BisR ) || ( BmyprocR == BprocR ) )
575 {
576 BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, BmyprocD, BprocD,
577 BnprocsD );
578 if( BnpD > 0 )
579 {
580 src = AprocD + MModSub( BmyprocD, BprocD, BnprocsD );
581 src = MPosMod( src, AnprocsD );
582 if( AisRow ) { ma = BnR; na = BnpD; }
583 else { ma = BnpD; na = BnR; }
584 if( !( AisR && BisR ) )
585 {
586 if( AisRow ) { rsrc = AprocR; csrc = src; }
587 else { rsrc = src; csrc = AprocR; }
588 }
589 else
590 {
591 if( AisRow )
592 {
593 if( !BisRow ) { rsrc = BmyprocR; }
594 else { rsrc = MModSub1( AmyprocR, AnprocsR ); }
595 csrc = src;
596 }
597 else
598 {
599 rsrc = src;
600 if( BisRow ) { csrc = BmyprocR; }
601 else { csrc = MModSub1( AmyprocR, AnprocsR ); }
602 }
603 }
604 if( ( myrow != rsrc ) || ( mycol != csrc ) )
605 {
606 buf = PB_Cmalloc( BnpD * BnR * size );
607 TYPE->Cgerv2d( ctxt, ma, na, buf, ma, rsrc, csrc );
608 add( &ma, &na, ALPHA, buf, &ma, BETA, Mptr( B, Bii, Bjj, Bld,
609 size ), &Bld );
610 if( buf ) free( buf );
611 }
612 }
613 }
614 if( AisR && BisR ) return;
615 }
616 else
617 {
618/*
619* General case
620*/
621 if( RRorCC )
622 {
623 if( Mupcase( CONJUG[0] ) != CNOCONJG ) tran = CCONJG;
624 else tran = CNOTRAN;
625 }
626 else
627 {
628 if( Mupcase( CONJUG[0] ) != CNOCONJG ) tran = CCOTRAN;
629 else tran = CTRAN;
630 }
631
632 if( AisRow ) { ascope = CCOLUMN; ma = AnR; }
633 else { ascope = CROW; na = AnR; }
634 bscope = ( BisRow ? CCOLUMN : CROW );
635 lcmb = PB_Clcm( AnprocsD * AnbD, BnprocsD * BnbD );
636 one = TYPE->one; zero = TYPE->zero;
637 gcdPQ = PB_Cgcd( AnprocsD, BnprocsD );
638 lcmPQ = ( AnprocsD / gcdPQ ) * BnprocsD;
639
640 for( k = 0; k < gcdPQ; k++ )
641 {
642 p = 0; q = k;
643
644 for( l = 0; l < lcmPQ; l++ )
645 {
646 Aroc = MModAdd( AprocD, p, AnprocsD );
647 Broc = MModAdd( BprocD, q, BnprocsD );
648
649 if( ( AmyprocD == Aroc ) || ( BmyprocD == Broc ) )
650 {
651 AnpD = PB_Cnumroc( AnD, 0, Ainb1D, AnbD, Aroc, AprocD,
652 AnprocsD );
653 BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, Broc, BprocD,
654 BnprocsD );
655 PB_CVMinit( &VM, 0, AnpD, BnpD, Ainb1D, Binb1D, AnbD, BnbD,
656 p, q, AnprocsD, BnprocsD, lcmb );
657 if( npq = PB_CVMnpq( &VM ) )
658 {
659 if( ( RRorCC && ( Aroc == Broc ) &&
660 ( AisR || ( AprocR == BprocR ) ) ) ||
661 ( !( RRorCC ) && ( Aroc == BprocR ) &&
662 ( AisR || ( AprocR == Broc ) ) ) )
663 {
664 if( ( BmyprocD == Broc ) && ( BmyprocR == BprocR ) )
665 {
666 PB_CVMloc( TYPE, &VM, ROW, &ascope, PACKING, &tran,
667 npq, AnR, ALPHA, Mptr( A, Aii, Ajj, Ald,
668 size ), Ald, BETA, Mptr( B, Bii, Bjj, Bld,
669 size ), Bld );
670 }
671 }
672 else
673 {
674 if( ( AmyprocR == AprocR ) && ( AmyprocD == Aroc ) )
675 {
676 if( AisRow ) { na = npq; } else { ma = npq; }
677 buf = PB_Cmalloc( ma * na * size );
678 PB_CVMpack( TYPE, &VM, ROW, &ascope, PACKING, NOTRAN,
679 npq, AnR, one, Mptr( A, Aii, Ajj, Ald,
680 size ), Ald, zero, buf, ma );
681 if( BisRow ) { rdst = BprocR; cdst = Broc; }
682 else { rdst = Broc; cdst = BprocR; }
683 TYPE->Cgesd2d( ctxt, ma, na, buf, ma, rdst, cdst );
684 if( buf ) free ( buf );
685 }
686 if( ( BmyprocR == BprocR ) && ( BmyprocD == Broc ) )
687 {
688 if( AisRow )
689 { na = npq; rsrc = AprocR; csrc = Aroc; }
690 else
691 { ma = npq; rsrc = Aroc; csrc = AprocR; }
692 buf = PB_Cmalloc( ma * na * size );
693 TYPE->Cgerv2d( ctxt, ma, na, buf, ma, rsrc, csrc );
694 PB_CVMpack( TYPE, &VM, COLUMN, &bscope, UNPACKING,
695 &tran, npq, AnR, BETA, Mptr( B, Bii, Bjj,
696 Bld, size ), Bld, ALPHA, buf, ma );
697 if( buf ) free ( buf );
698 }
699 }
700 }
701 }
702 p = MModAdd1( p, AnprocsD );
703 q = MModAdd1( q, BnprocsD );
704 }
705 if( AisR ) AprocR = MModAdd1( AprocR, AnprocsR );
706 }
707 }
708
709 if( BisR )
710 {
711/*
712* Replicate sub( B )
713*/
714 BnpD = PB_Cnumroc( BnD, BiD, BinbD, BnbD, BmyprocD, BsrcD, BnprocsD );
715 if( BnpD > 0 )
716 {
717 if( BisRow )
718 {
719 bscope = CCOLUMN; mb = BnR; nb = BnpD;
720 rsrc = BprocR; csrc = BmyprocD;
721 }
722 else
723 {
724 bscope = CROW; mb = BnpD; nb = BnR;
725 rsrc = BmyprocD; csrc = BprocR;
726 }
727 top = PB_Ctop( &ctxt, BCAST, &bscope, TOP_GET );
728 if( BmyprocR == BprocR )
729 {
730 TYPE->Cgebs2d( ctxt, &bscope, top, mb, nb, Mptr( B, Bii, Bjj,
731 Bld, size ), Bld );
732 }
733 else
734 {
735 TYPE->Cgebr2d( ctxt, &bscope, top, mb, nb, Mptr( B, Bii, Bjj,
736 Bld, size ), Bld, rsrc, csrc );
737 }
738 }
739 }
740 }
741 else if( !( AisD ) && BisD )
742 {
743/*
744* sub( A ) is not distributed and sub( B ) is distributed.
745*/
746 PB_CpaxpbyND( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC, BETA, B,
747 IB, JB, DESCB, BROC );
748 }
749 else if( AisD && !( BisD ) )
750 {
751/*
752* sub( A ) is distributed and sub( B ) is not distributed.
753*/
754 PB_CpaxpbyDN( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC, BETA, B,
755 IB, JB, DESCB, BROC );
756 }
757 else
758 {
759/*
760* Neither sub( A ) nor sub( B ) are distributed.
761*/
762 PB_CpaxpbyNN( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC, BETA, B,
763 IB, JB, DESCB, BROC );
764 }
765/*
766* End of PB_Cpaxpby
767*/
768}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* MMADD_T)()
Definition pblas.h:288
#define CCOLUMN
Definition PBblacs.h:20
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define BCAST
Definition PBblacs.h:48
#define NOTRAN
Definition PBblas.h:44
#define CCONJG
Definition PBblas.h:21
#define CNOCONJG
Definition PBblas.h:19
#define CNOTRAN
Definition PBblas.h:18
#define CTRAN
Definition PBblas.h:20
#define CCOTRAN
Definition PBblas.h:22
#define CTXT_
Definition PBtools.h:38
#define UNPACKING
Definition PBtools.h:54
void PB_CVMinit()
Int PB_Cfirstnb()
#define MB_
Definition PBtools.h:43
void PB_CpaxpbyDN()
char * PB_Cmalloc()
void PB_Cinfog2l()
#define MModSub(I1, I2, d)
Definition PBtools.h:102
#define PACKING
Definition PBtools.h:53
void PB_CpaxpbyND()
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
Int PB_CVMpack()
char * PB_Ctop()
Int PB_Cgcd()
#define RSRC_
Definition PBtools.h:45
#define MModAdd1(I, d)
Definition PBtools.h:100
#define MModAdd(I1, I2, d)
Definition PBtools.h:97
#define INB_
Definition PBtools.h:42
#define MPosMod(I, d)
Definition PBtools.h:95
Int PB_CVMnpq()
#define MModSub1(I, d)
Definition PBtools.h:105
#define CSRC_
Definition PBtools.h:46
Int PB_Clcm()
#define IMB_
Definition PBtools.h:41
#define Mupcase(C)
Definition PBtools.h:83
#define NB_
Definition PBtools.h:44
Int PB_CVMloc()
void PB_CpaxpbyNN()
void PB_Cpaxpby()
#define TYPE
Definition clamov.c:7