SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_CpaxpbyND.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_CpaxpbyND( PBTYP_T * TYPE, char * CONJUG, Int M, Int N,
21 char * ALPHA,
22 char * A, Int IA, Int JA, Int * DESCA, char * AROC,
23 char * BETA,
24 char * B, Int IB, Int JB, Int * DESCB, char * BROC )
25#else
26void PB_CpaxpbyND( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC,
27 BETA, B, IB, JB, DESCB, BROC )
28/*
29* .. Scalar Arguments ..
30*/
31 char * AROC, * BROC, * CONJUG;
32 Int IA, IB, JA, JB, M, N;
33 char * ALPHA, * BETA;
34 PBTYP_T * TYPE;
35/*
36* .. Array Arguments ..
37*/
38 Int * DESCA, * DESCB;
39 char * A, * B;
40#endif
41{
42/*
43* Purpose
44* =======
45*
46* PB_CpaxpbyND adds one submatrix to another,
47*
48* sub( B ) := beta * sub( B ) + alpha * sub( A ), or,
49*
50* sub( B ) := beta * sub( B ) + alpha * conjg( sub( A ) ),
51*
52* where sub( A ) is not distributed and sub( B ) is distributed.
53*
54* sub( A ) always denotes A(IA:IA+M-1,JA:JA+N-1). When AROC is 'R' or
55* 'r' sub( A ) resides in a process row, otherwise sub( A ) resides in
56* a process column. When sub( A ) resides in a process row and BROC is
57* 'R' or 'r' or sub( A ) resides in a process column and BROC is 'C' or
58* 'c', then sub( B ) denotes B( IB:IB+M-1, JB:JB+N-1 ), and otherwise
59* sub( B ) denotes B(IB:IB+N-1,JB:JB+M-1).
60* otherwise.
61*
62* Notes
63* =====
64*
65* A description vector is associated with each 2D block-cyclicly dis-
66* tributed matrix. This vector stores the information required to
67* establish the mapping between a matrix entry and its corresponding
68* process and memory location.
69*
70* In the following comments, the character _ should be read as
71* "of the distributed matrix". Let A be a generic term for any 2D
72* block cyclicly distributed matrix. Its description vector is DESC_A:
73*
74* NOTATION STORED IN EXPLANATION
75* ---------------- --------------- ------------------------------------
76* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
77* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
78* the NPROW x NPCOL BLACS process grid
79* A is distributed over. The context
80* itself is global, but the handle
81* (the integer value) may vary.
82* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
83* ted matrix A, M_A >= 0.
84* N_A (global) DESCA[ N_ ] The number of columns in the distri-
85* buted matrix A, N_A >= 0.
86* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
87* block of the matrix A, IMB_A > 0.
88* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
89* left block of the matrix A,
90* INB_A > 0.
91* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
92* bute the last M_A-IMB_A rows of A,
93* MB_A > 0.
94* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
95* bute the last N_A-INB_A columns of
96* A, NB_A > 0.
97* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
98* row of the matrix A is distributed,
99* NPROW > RSRC_A >= 0.
100* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
101* first column of A is distributed.
102* NPCOL > CSRC_A >= 0.
103* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
104* array storing the local blocks of
105* the distributed matrix A,
106* IF( Lc( 1, N_A ) > 0 )
107* LLD_A >= MAX( 1, Lr( 1, M_A ) )
108* ELSE
109* LLD_A >= 1.
110*
111* Let K be the number of rows of a matrix A starting at the global in-
112* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
113* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
114* receive if these K rows were distributed over NPROW processes. If K
115* is the number of columns of a matrix A starting at the global index
116* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
117* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
118* these K columns were distributed over NPCOL processes.
119*
120* The values of Lr() and Lc() may be determined via a call to the func-
121* tion PB_Cnumroc:
122* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
123* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
124*
125* Arguments
126* =========
127*
128* TYPE (local input) pointer to a PBTYP_T structure
129* On entry, TYPE is a pointer to a structure of type PBTYP_T,
130* that contains type information (See pblas.h).
131*
132* CONJUG (global input) pointer to CHAR
133* On entry, CONJUG specifies whether conjg( sub( A ) ) or
134* sub( A ) should be added to sub( B ) as follows:
135* CONJUG = 'N' or 'n':
136* sub( B ) := beta*sub( B ) + alpha*sub( A ),
137* otherwise
138* sub( B ) := beta*sub( B ) + alpha*conjg( sub( A ) ).
139*
140* M (global input) INTEGER
141* On entry, M specifies the number of rows of the submatrix
142* sub( A ). M must be at least zero.
143*
144* N (global input) INTEGER
145* On entry, N specifies the number of columns of the submatrix
146* sub( A ). N must be at least zero.
147*
148* ALPHA (global input) pointer to CHAR
149* On entry, ALPHA specifies the scalar alpha. When ALPHA is
150* supplied as zero then the local entries of the array A cor-
151* responding to the entries of the submatrix sub( A ) need not
152* be set on input.
153*
154* A (local input) pointer to CHAR
155* On entry, A is an array of dimension (LLD_A, Ka), where LLD_A
156* is at least MAX( 1, Lr( 1, IA+M-1 ) ), and, Ka is at least
157* Lc( 1, JA+N-1 ). Before entry, this array contains the local
158* entries of the matrix A.
159*
160* IA (global input) INTEGER
161* On entry, IA specifies A's global row index, which points to
162* the beginning of the submatrix sub( A ).
163*
164* JA (global input) INTEGER
165* On entry, JA specifies A's global column index, which points
166* to the beginning of the submatrix sub( A ).
167*
168* DESCA (global and local input) INTEGER array
169* On entry, DESCA is an integer array of dimension DLEN_. This
170* is the array descriptor for the matrix A.
171*
172* AROC (global input) pointer to CHAR
173* On entry, AROC specifies the orientation of the subvector
174* sub( A ). When AROC is 'R' or 'r', sub( A ) is a row vector,
175* and a column vector otherwise.
176*
177* BETA (global input) pointer to CHAR
178* On entry, BETA specifies the scalar beta. When BETA is sup-
179* plied as zero then the local entries of the array B corres-
180* ponding to the entries of the submatrix sub( B ) need not be
181* set on input.
182*
183* B (local input/local output) pointer to CHAR
184* On entry, B is an array of dimension (LLD_B, Kb), where LLD_B
185* is at least MAX( 1, Lr( 1, IB+M-1 ) ) when sub( A ) and
186* sub( B ) are both distributed along a process column or a
187* process row. In that case, Kb is at least Lc( 1, JB+N-1 ).
188* Otherwise, LLD_B is at least MAX( 1, Lr( 1, IB+N-1 ) ) and
189* Kb is at least Lc( 1, JB+M-1 ). Before entry, this array
190* contains the local entries of the matrix B. On exit, sub( B )
191* is overwritten with the updated submatrix.
192*
193* IB (global input) INTEGER
194* On entry, IB specifies B's global row index, which points to
195* the beginning of the submatrix sub( B ).
196*
197* JB (global input) INTEGER
198* On entry, JB specifies B's global column index, which points
199* to the beginning of the submatrix sub( B ).
200*
201* DESCB (global and local input) INTEGER array
202* On entry, DESCB is an integer array of dimension DLEN_. This
203* is the array descriptor for the matrix B.
204*
205* BROC (global input) pointer to CHAR
206* On entry, BROC specifies the orientation of the subvector
207* sub( B ). When BROC is 'R' or 'r', sub( B ) is a row vector,
208* and a column vector otherwise.
209*
210* -- Written on April 1, 1998 by
211* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
212*
213* ---------------------------------------------------------------------
214*/
215/*
216* .. Local Scalars ..
217*/
218 char * one, * top, * zero;
219 Int Acol, Aii, AisR, AisRow, Ajj, Ald, AmyprocD, AmyprocR,
220 AnprocsD, AprocR, Aroc, Arow, Bcol, Bii, Binb1D, BisR, BisRow,
221 Bjj, Bld, BmyprocD, BmyprocR, BnD, BnbD, BnpD, BnprocsD,
222 BprocD, BprocR, Broc, Brow, RRorCC, ctxt, k, kbb, kk, kn,
223 ktmp, mycol, mydist, myproc, myrow, npcol, nprow, p, size;
224 MMADD_T add;
225/*
226* .. Local Arrays ..
227*/
228 char * buf = NULL;
229/* ..
230* .. Executable Statements ..
231*
232*/
233/*
234* Retrieve process grid information
235*/
236 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
237/*
238* Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
239*/
240 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
241 &Arow, &Acol );
242 if( ( AisRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 )
243 {
244 BnD = N; Ald = DESCA[LLD_];
245 AmyprocD = mycol; AnprocsD = npcol; AmyprocR = myrow; AprocR = Arow;
246 AisR = ( ( Arow == -1 ) || ( nprow == 1 ) );
247 }
248 else
249 {
250 BnD = M; Ald = DESCA[LLD_];
251 AmyprocD = myrow; AnprocsD = nprow; AmyprocR = mycol; AprocR = Acol;
252 AisR = ( ( Acol == -1 ) || ( npcol == 1 ) );
253 }
254/*
255* Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ...
256*/
257 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
258 &Brow, &Bcol );
259 if( ( BisRow = ( Mupcase( BROC[0] ) == CROW ) ) != 0 )
260 {
261 BnbD = DESCB[NB_]; Bld = DESCB[LLD_];
262 BprocD = Bcol; BmyprocD = mycol; BnprocsD = npcol;
263 BprocR = Brow; BmyprocR = myrow;
264 BisR = ( ( BprocR == -1 ) || ( nprow == 1 ) );
265 Binb1D = PB_Cfirstnb( BnD, JB, DESCB[INB_], BnbD );
266 }
267 else
268 {
269 BnbD = DESCB[MB_]; Bld = DESCB[LLD_];
270 BprocD = Brow; BmyprocD = myrow; BnprocsD = nprow;
271 BprocR = Bcol; BmyprocR = mycol;
272 BisR = ( ( BprocR == -1 ) || ( npcol == 1 ) );
273 Binb1D = PB_Cfirstnb( BnD, IB, DESCB[IMB_], BnbD );
274 }
275/*
276* Are sub( A ) and sub( B ) both row or column vectors ?
277*/
278 RRorCC = ( ( AisRow && BisRow ) || ( !( AisRow ) && !( BisRow ) ) );
279/*
280* sub( A ) is not distributed and sub( B ) is distributed
281*/
282 if( !( AisR ) )
283 {
284/*
285* sub( A ) is not replicated. Since this operation is local if sub( A ) and
286* sub( B ) are both row or column vectors, choose BprocR = AprocR when RRorCC,
287* and BprocR = 0 otherwise.
288*/
289 if( BisR ) { BprocR = ( ( RRorCC ) ? AprocR : 0 ); }
290/*
291* Now, it is just like sub( B ) is not replicated, this information however is
292* kept in BisR for later use.
293*/
294 size = TYPE->size;
295
296 if( ( AmyprocR == AprocR ) || ( BmyprocR == BprocR ) )
297 {
298 zero = TYPE->zero; one = TYPE->one;
299/*
300* sub( A ) and sub( B ) are both row or column vectors
301*/
302 if( RRorCC )
303 {
304 if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd;
305 else add = TYPE->Fmmadd;
306
307 BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, BmyprocD, BprocD,
308 BnprocsD );
309/*
310* sub( A ) and sub( B ) are in the same process row or column
311*/
312 if( AprocR == BprocR )
313 {
314/*
315* In each process, the non distributed part of sub( A ) is added to sub( B ).
316*/
317 if( BnpD > 0 )
318 {
319 Broc = BprocD;
320 if( AisRow ) { kk = Bjj; ktmp = JA + N; kn = JA + Binb1D; }
321 else { kk = Bii; ktmp = IA + M; kn = IA + Binb1D; }
322
323 if( BmyprocD == Broc )
324 {
325 if( AisRow )
326 add( &M, &Binb1D, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
327 &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld );
328 else
329 add( &Binb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
330 &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld );
331 kk += Binb1D;
332 }
333 Broc = MModAdd1( Broc, BnprocsD );
334
335 for( k = kn; k < ktmp; k += BnbD )
336 {
337 kbb = ktmp - k; kbb = MIN( kbb, BnbD );
338
339 if( BmyprocD == Broc )
340 {
341 if( AisRow )
342 add( &M, &kbb, ALPHA, Mptr( A, Aii, k, Ald, size ),
343 &Ald, BETA, Mptr( B, Bii, kk, Bld, size ),
344 &Bld );
345 else
346 add( &kbb, &N, ALPHA, Mptr( A, k, Ajj, Ald, size ),
347 &Ald, BETA, Mptr( B, kk, Bjj, Bld, size ),
348 &Bld );
349 kk += kbb;
350 }
351 Broc = MModAdd1( Broc, BnprocsD );
352 }
353 }
354 }
355 else
356 {
357/*
358* sub( A ) and sub( B ) are in a different process row or column
359*/
360 if( BmyprocR == BprocR )
361 {
362/*
363* If I own a piece of sub( B ), then receive the relevant piece of sub( A )
364* from the corresponding process row or column where it resides.
365*/
366 if( BnpD > 0 )
367 {
368 if( BisRow )
369 {
370 buf = PB_Cmalloc( M * BnpD * size );
371 TYPE->Cgerv2d( ctxt, M, BnpD, buf, M, AprocR,
372 BmyprocD );
373 add( &M, &BnpD, ALPHA, buf, &M, BETA, Mptr( B, Bii, Bjj,
374 Bld, size ), &Bld );
375 if( buf ) free( buf );
376 }
377 else
378 {
379 buf = PB_Cmalloc( BnpD * N * size );
380 TYPE->Cgerv2d( ctxt, BnpD, N, buf, BnpD, BmyprocD,
381 AprocR );
382 add( &BnpD, &N, ALPHA, buf, &BnpD, BETA, Mptr( B, Bii,
383 Bjj, Bld, size ), &Bld );
384 if( buf ) free( buf );
385 }
386 }
387 }
388
389 if( AmyprocR == AprocR )
390 {
391/*
392* If I own sub( A ), then pack and send the distributed part that should be
393* added to the distributed part of sub( B ) that resides in my row or column.
394*/
395 if( BnpD > 0 )
396 {
397 if( AisRow )
398 {
399 ktmp = JA + N;
400 kn = JA + Binb1D;
401 buf = PB_Cmalloc( M * BnpD * size );
402 }
403 else
404 {
405 ktmp = IA + M;
406 kn = IA + Binb1D;
407 buf = PB_Cmalloc( BnpD * N * size );
408 }
409 Broc = BprocD;
410 kk = 0;
411
412 if( BmyprocD == Broc )
413 {
414 if( AisRow )
415 add( &M, &Binb1D, one, Mptr( A, Aii, Ajj, Ald,
416 size ), &Ald, zero, buf, &M );
417 else
418 add( &Binb1D, &N, one, Mptr( A, Aii, Ajj, Ald,
419 size ), &Ald, zero, buf, &BnpD );
420 kk += Binb1D;
421 }
422
423 Broc = MModAdd1( Broc, BnprocsD );
424 for( k = kn; k < ktmp; k += BnbD )
425 {
426 kbb = ktmp - k; kbb = MIN( kbb, BnbD );
427
428 if( BmyprocD == Broc )
429 {
430 if( AisRow )
431 add( &M, &kbb, one, Mptr( A, Aii, k, Ald, size ),
432 &Ald, zero, Mptr( buf, 0, kk, M, size ),
433 &M );
434 else
435 add( &kbb, &N, one, Mptr( A, k, Ajj, Ald, size ),
436 &Ald, zero, Mptr( buf, kk, 0, BnpD, size ),
437 &BnpD );
438 kk += kbb;
439 }
440 Broc = MModAdd1( Broc, BnprocsD );
441 }
442
443 if( AisRow )
444 TYPE->Cgesd2d( ctxt, M, BnpD, buf, M, BprocR,
445 AmyprocD );
446 else
447 TYPE->Cgesd2d( ctxt, BnpD, N, buf, BnpD, AmyprocD,
448 BprocR );
449 if( buf ) free( buf );
450 }
451 }
452 }
453 }
454 else
455 {
456/*
457* sub( A ) and sub( B ) are not both row or column vectors
458*/
459 if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd;
460 else add = TYPE->Fmmtadd;
461
462 Aroc = 0;
463 if( AisRow ) { ktmp = JA + N; kn = JA + Binb1D; }
464 else { ktmp = IA + M; kn = IA + Binb1D; }
465/*
466* Loop over the processes in which sub( B ) resides, for each process find the
467* next process Xroc. Exchange and add the data.
468*/
469 for( p = 0; p < BnprocsD; p++ )
470 {
471 mydist = MModSub( p, BprocD, BnprocsD );
472 myproc = MModAdd( BprocD, mydist, BnprocsD );
473
474 if( ( AprocR == p ) && ( BprocR == Aroc ) )
475 {
476 if( ( AmyprocR == p ) && ( AmyprocD == Aroc ) )
477 {
478/*
479* local add at the intersection of the process cross
480*/
481 BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, p, BprocD,
482 BnprocsD );
483 if( BnpD > 0 )
484 {
485 Broc = BprocD;
486 kk = ( AisRow ? Bii : Bjj );
487
488 if( myproc == Broc )
489 {
490 if( AisRow )
491 add( &M, &Binb1D, ALPHA, Mptr( A, Aii, Ajj, Ald,
492 size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld,
493 size ), &Bld );
494 else
495 add( &Binb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald,
496 size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld,
497 size ), &Bld );
498 kk += Binb1D;
499 }
500 Broc = MModAdd1( Broc, BnprocsD );
501
502 for( k = kn; k < ktmp; k += BnbD )
503 {
504 kbb = ktmp - k; kbb = MIN( kbb, BnbD );
505 if( myproc == Broc )
506 {
507 if( AisRow )
508 add( &M, &kbb, ALPHA, Mptr( A, Aii, k, Ald,
509 size ), &Ald, BETA, Mptr( B, kk, Bjj, Bld,
510 size ), &Bld );
511 else
512 add( &kbb, &N, ALPHA, Mptr( A, k, Ajj, Ald,
513 size ), &Ald, BETA, Mptr( B, Bii, kk, Bld,
514 size ), &Bld );
515 kk += kbb;
516 }
517 Broc = MModAdd1( Broc, BnprocsD );
518 }
519 }
520 }
521 }
522 else
523 {
524/*
525* Message exchange
526*/
527 if( ( BmyprocR == BprocR ) && ( BmyprocD == p ) )
528 {
529 BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, p, BprocD,
530 BnprocsD );
531 if( BnpD > 0 )
532 {
533 if( AisRow )
534 {
535 buf = PB_Cmalloc( M * BnpD * size );
536 TYPE->Cgerv2d( ctxt, BnpD, M, buf, BnpD, AprocR,
537 Aroc );
538 TYPE->Fmmadd( &BnpD, &M, ALPHA, buf, &BnpD, BETA,
539 Mptr( B, Bii, Bjj, Bld, size ), &Bld );
540 }
541 else
542 {
543 buf = PB_Cmalloc( BnpD * N * size );
544 TYPE->Cgerv2d( ctxt, N, BnpD, buf, N, Aroc, AprocR );
545 TYPE->Fmmadd( &N, &BnpD, ALPHA, buf, &N, BETA,
546 Mptr( B, Bii, Bjj, Bld, size ), &Bld );
547 }
548 if( buf ) free( buf );
549 }
550 }
551
552 if( ( AmyprocR == AprocR ) && ( AmyprocD == Aroc ) )
553 {
554 BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, p, BprocD,
555 BnprocsD );
556 if( BnpD > 0 )
557 {
558 if( AisRow )
559 buf = PB_Cmalloc( M * BnpD * size );
560 else
561 buf = PB_Cmalloc( BnpD * N * size );
562 Broc = BprocD;
563 kk = 0;
564
565 if( myproc == Broc )
566 {
567 if( AisRow )
568 add( &M, &Binb1D, one, Mptr( A, Aii, Ajj, Ald,
569 size ), &Ald, zero, buf, &BnpD );
570 else
571 add( &Binb1D, &N, one, Mptr( A, Aii, Ajj, Ald,
572 size ), &Ald, zero, buf, &N );
573 kk += Binb1D;
574 }
575 Broc = MModAdd1( Broc, BnprocsD );
576
577 for( k = kn; k < ktmp; k += BnbD )
578 {
579 kbb = ktmp - k; kbb = MIN( kbb, BnbD );
580 if( myproc == Broc )
581 {
582 if( AisRow )
583 add( &M, &kbb, one, Mptr( A, Aii, k, Ald,
584 size ), &Ald, zero, Mptr( buf, kk, 0,
585 BnpD, size ), &BnpD );
586 else
587 add( &kbb, &N, one, Mptr( A, k, Ajj, Ald,
588 size ), &Ald, zero, Mptr( buf, 0, kk,
589 N, size ), &N );
590 kk += kbb;
591 }
592 Broc = MModAdd1( Broc, BnprocsD );
593 }
594 if( AisRow )
595 TYPE->Cgesd2d( ctxt, BnpD, M, buf, BnpD, p, BprocR );
596 else
597 TYPE->Cgesd2d( ctxt, N, BnpD, buf, N, BprocR, p );
598 if( buf ) free( buf );
599 }
600 }
601 }
602 Aroc = MModAdd1( Aroc, AnprocsD );
603 }
604 }
605 }
606
607 if( BisR )
608 {
609/*
610* Replicate sub( B )
611*/
612 BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, BmyprocD, BprocD, BnprocsD );
613 if( BnpD > 0 )
614 {
615 if( BisRow )
616 {
617 top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
618 if( BmyprocR == BprocR )
619 TYPE->Cgebs2d( ctxt, COLUMN, top, ( AisRow ? M : N ), BnpD,
620 Mptr( B, Bii, Bjj, Bld, size ), Bld );
621 else
622 TYPE->Cgebr2d( ctxt, COLUMN, top, ( AisRow ? M : N ), BnpD,
623 Mptr( B, Bii, Bjj, Bld, size ), Bld, BprocR,
624 BmyprocD );
625 }
626 else
627 {
628 top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
629 if( BmyprocR == BprocR )
630 TYPE->Cgebs2d( ctxt, ROW, top, BnpD, ( AisRow ? M : N ),
631 Mptr( B, Bii, Bjj, Bld, size ), Bld );
632 else
633 TYPE->Cgebr2d( ctxt, ROW, top, BnpD, ( AisRow ? M : N ),
634 Mptr( B, Bii, Bjj, Bld, size ), Bld, BmyprocD,
635 BprocR );
636 }
637 }
638 }
639 }
640 else
641 {
642/*
643* sub( A ) is replicated in every process. Add the data in process row or
644* column BprocR when sub( B ) is not replicated and in every process otherwise.
645*/
646 if( !( BisR ) && ( BmyprocR != BprocR ) ) return;
647
648 size = TYPE->size;
649
650 if( RRorCC )
651 {
652 if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd;
653 else add = TYPE->Fmmadd;
654 }
655 else
656 {
657 if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd;
658 else add = TYPE->Fmmtadd;
659 }
660
661 Broc = BprocD;
662 kk = ( BisRow ? Bjj : Bii );
663 if( AisRow ) { ktmp = JA + N; kn = JA + Binb1D; }
664 else { ktmp = IA + M; kn = IA + Binb1D; }
665
666 if( BmyprocD == Broc )
667 {
668 if( AisRow )
669 add( &M, &Binb1D, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA,
670 Mptr( B, Bii, Bjj, Bld, size ), &Bld );
671 else
672 add( &Binb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA,
673 Mptr( B, Bii, Bjj, Bld, size ), &Bld );
674 kk += Binb1D;
675 }
676 Broc = MModAdd1( Broc, BnprocsD );
677
678 for( k = kn; k < ktmp; k += BnbD )
679 {
680 kbb = ktmp - k; kbb = MIN( kbb, BnbD );
681 if( BmyprocD == Broc )
682 {
683 if( BisRow ) { buf = Mptr( B, Bii, kk, Bld, size ); }
684 else { buf = Mptr( B, kk, Bjj, Bld, size ); }
685 if( AisRow )
686 add( &M, &kbb, ALPHA, Mptr( A, Aii, k, Ald, size ), &Ald, BETA,
687 buf, &Bld );
688 else
689 add( &kbb, &N, ALPHA, Mptr( A, k, Ajj, Ald, size ), &Ald, BETA,
690 buf, &Bld );
691 kk += kbb;
692 }
693 Broc = MModAdd1( Broc, BnprocsD );
694 }
695 }
696/*
697* End of PB_CpaxpbyND
698*/
699}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* MMADD_T)()
Definition pblas.h:288
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define BCAST
Definition PBblacs.h:48
#define CNOCONJG
Definition PBblas.h:19
#define CTXT_
Definition PBtools.h:38
Int PB_Cfirstnb()
#define MB_
Definition PBtools.h:43
char * PB_Cmalloc()
void PB_Cinfog2l()
#define MModSub(I1, I2, d)
Definition PBtools.h:102
void PB_CpaxpbyND()
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
char * PB_Ctop()
#define MModAdd1(I, d)
Definition PBtools.h:100
#define MModAdd(I1, I2, d)
Definition PBtools.h:97
#define INB_
Definition PBtools.h:42
#define IMB_
Definition PBtools.h:41
#define Mupcase(C)
Definition PBtools.h:83
#define NB_
Definition PBtools.h:44
#define TYPE
Definition clamov.c:7