ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_CpaxpbyND.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_CpaxpbyND( PBTYP_T * TYPE, char * CONJUG, int M, int N,
21  char * ALPHA,
22  char * A, int IA, int JA, int * DESCA, char * AROC,
23  char * BETA,
24  char * B, int IB, int JB, int * DESCB, char * BROC )
25 #else
26 void PB_CpaxpbyND( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC,
27  BETA, B, IB, JB, DESCB, BROC )
28 /*
29 * .. Scalar Arguments ..
30 */
31  char * AROC, * BROC, * CONJUG;
32  int IA, IB, JA, JB, M, N;
33  char * ALPHA, * BETA;
34  PBTYP_T * TYPE;
35 /*
36 * .. Array Arguments ..
37 */
38  int * DESCA, * DESCB;
39  char * A, * B;
40 #endif
41 {
42 /*
43 * Purpose
44 * =======
45 *
46 * PB_CpaxpbyND adds one submatrix to another,
47 *
48 * sub( B ) := beta * sub( B ) + alpha * sub( A ), or,
49 *
50 * sub( B ) := beta * sub( B ) + alpha * conjg( sub( A ) ),
51 *
52 * where sub( A ) is not distributed and sub( B ) is distributed.
53 *
54 * sub( A ) always denotes A(IA:IA+M-1,JA:JA+N-1). When AROC is 'R' or
55 * 'r' sub( A ) resides in a process row, otherwise sub( A ) resides in
56 * a process column. When sub( A ) resides in a process row and BROC is
57 * 'R' or 'r' or sub( A ) resides in a process column and BROC is 'C' or
58 * 'c', then sub( B ) denotes B( IB:IB+M-1, JB:JB+N-1 ), and otherwise
59 * sub( B ) denotes B(IB:IB+N-1,JB:JB+M-1).
60 * otherwise.
61 *
62 * Notes
63 * =====
64 *
65 * A description vector is associated with each 2D block-cyclicly dis-
66 * tributed matrix. This vector stores the information required to
67 * establish the mapping between a matrix entry and its corresponding
68 * process and memory location.
69 *
70 * In the following comments, the character _ should be read as
71 * "of the distributed matrix". Let A be a generic term for any 2D
72 * block cyclicly distributed matrix. Its description vector is DESC_A:
73 *
74 * NOTATION STORED IN EXPLANATION
75 * ---------------- --------------- ------------------------------------
76 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
77 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
78 * the NPROW x NPCOL BLACS process grid
79 * A is distributed over. The context
80 * itself is global, but the handle
81 * (the integer value) may vary.
82 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
83 * ted matrix A, M_A >= 0.
84 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
85 * buted matrix A, N_A >= 0.
86 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
87 * block of the matrix A, IMB_A > 0.
88 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
89 * left block of the matrix A,
90 * INB_A > 0.
91 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
92 * bute the last M_A-IMB_A rows of A,
93 * MB_A > 0.
94 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
95 * bute the last N_A-INB_A columns of
96 * A, NB_A > 0.
97 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
98 * row of the matrix A is distributed,
99 * NPROW > RSRC_A >= 0.
100 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
101 * first column of A is distributed.
102 * NPCOL > CSRC_A >= 0.
103 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
104 * array storing the local blocks of
105 * the distributed matrix A,
106 * IF( Lc( 1, N_A ) > 0 )
107 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
108 * ELSE
109 * LLD_A >= 1.
110 *
111 * Let K be the number of rows of a matrix A starting at the global in-
112 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
113 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
114 * receive if these K rows were distributed over NPROW processes. If K
115 * is the number of columns of a matrix A starting at the global index
116 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
117 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
118 * these K columns were distributed over NPCOL processes.
119 *
120 * The values of Lr() and Lc() may be determined via a call to the func-
121 * tion PB_Cnumroc:
122 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
123 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
124 *
125 * Arguments
126 * =========
127 *
128 * TYPE (local input) pointer to a PBTYP_T structure
129 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
130 * that contains type information (See pblas.h).
131 *
132 * CONJUG (global input) pointer to CHAR
133 * On entry, CONJUG specifies whether conjg( sub( A ) ) or
134 * sub( A ) should be added to sub( B ) as follows:
135 * CONJUG = 'N' or 'n':
136 * sub( B ) := beta*sub( B ) + alpha*sub( A ),
137 * otherwise
138 * sub( B ) := beta*sub( B ) + alpha*conjg( sub( A ) ).
139 *
140 * M (global input) INTEGER
141 * On entry, M specifies the number of rows of the submatrix
142 * sub( A ). M must be at least zero.
143 *
144 * N (global input) INTEGER
145 * On entry, N specifies the number of columns of the submatrix
146 * sub( A ). N must be at least zero.
147 *
148 * ALPHA (global input) pointer to CHAR
149 * On entry, ALPHA specifies the scalar alpha. When ALPHA is
150 * supplied as zero then the local entries of the array A cor-
151 * responding to the entries of the submatrix sub( A ) need not
152 * be set on input.
153 *
154 * A (local input) pointer to CHAR
155 * On entry, A is an array of dimension (LLD_A, Ka), where LLD_A
156 * is at least MAX( 1, Lr( 1, IA+M-1 ) ), and, Ka is at least
157 * Lc( 1, JA+N-1 ). Before entry, this array contains the local
158 * entries of the matrix A.
159 *
160 * IA (global input) INTEGER
161 * On entry, IA specifies A's global row index, which points to
162 * the beginning of the submatrix sub( A ).
163 *
164 * JA (global input) INTEGER
165 * On entry, JA specifies A's global column index, which points
166 * to the beginning of the submatrix sub( A ).
167 *
168 * DESCA (global and local input) INTEGER array
169 * On entry, DESCA is an integer array of dimension DLEN_. This
170 * is the array descriptor for the matrix A.
171 *
172 * AROC (global input) pointer to CHAR
173 * On entry, AROC specifies the orientation of the subvector
174 * sub( A ). When AROC is 'R' or 'r', sub( A ) is a row vector,
175 * and a column vector otherwise.
176 *
177 * BETA (global input) pointer to CHAR
178 * On entry, BETA specifies the scalar beta. When BETA is sup-
179 * plied as zero then the local entries of the array B corres-
180 * ponding to the entries of the submatrix sub( B ) need not be
181 * set on input.
182 *
183 * B (local input/local output) pointer to CHAR
184 * On entry, B is an array of dimension (LLD_B, Kb), where LLD_B
185 * is at least MAX( 1, Lr( 1, IB+M-1 ) ) when sub( A ) and
186 * sub( B ) are both distributed along a process column or a
187 * process row. In that case, Kb is at least Lc( 1, JB+N-1 ).
188 * Otherwise, LLD_B is at least MAX( 1, Lr( 1, IB+N-1 ) ) and
189 * Kb is at least Lc( 1, JB+M-1 ). Before entry, this array
190 * contains the local entries of the matrix B. On exit, sub( B )
191 * is overwritten with the updated submatrix.
192 *
193 * IB (global input) INTEGER
194 * On entry, IB specifies B's global row index, which points to
195 * the beginning of the submatrix sub( B ).
196 *
197 * JB (global input) INTEGER
198 * On entry, JB specifies B's global column index, which points
199 * to the beginning of the submatrix sub( B ).
200 *
201 * DESCB (global and local input) INTEGER array
202 * On entry, DESCB is an integer array of dimension DLEN_. This
203 * is the array descriptor for the matrix B.
204 *
205 * BROC (global input) pointer to CHAR
206 * On entry, BROC specifies the orientation of the subvector
207 * sub( B ). When BROC is 'R' or 'r', sub( B ) is a row vector,
208 * and a column vector otherwise.
209 *
210 * -- Written on April 1, 1998 by
211 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
212 *
213 * ---------------------------------------------------------------------
214 */
215 /*
216 * .. Local Scalars ..
217 */
218  char * one, * top, * zero;
219  int Acol, Aii, AisR, AisRow, Ajj, Ald, AmyprocD, AmyprocR,
220  AnprocsD, AprocR, Aroc, Arow, Bcol, Bii, Binb1D, BisR, BisRow,
221  Bjj, Bld, BmyprocD, BmyprocR, BnD, BnbD, BnpD, BnprocsD,
222  BprocD, BprocR, Broc, Brow, RRorCC, ctxt, k, kbb, kk, kn,
223  ktmp, mycol, mydist, myproc, myrow, npcol, nprow, p, size;
224  MMADD_T add;
225 /*
226 * .. Local Arrays ..
227 */
228  char * buf = NULL;
229 /* ..
230 * .. Executable Statements ..
231 *
232 */
233 /*
234 * Retrieve process grid information
235 */
236  Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
237 /*
238 * Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
239 */
240  PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
241  &Arow, &Acol );
242  if( ( AisRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 )
243  {
244  BnD = N; Ald = DESCA[LLD_];
245  AmyprocD = mycol; AnprocsD = npcol; AmyprocR = myrow; AprocR = Arow;
246  AisR = ( ( Arow == -1 ) || ( nprow == 1 ) );
247  }
248  else
249  {
250  BnD = M; Ald = DESCA[LLD_];
251  AmyprocD = myrow; AnprocsD = nprow; AmyprocR = mycol; AprocR = Acol;
252  AisR = ( ( Acol == -1 ) || ( npcol == 1 ) );
253  }
254 /*
255 * Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ...
256 */
257  PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
258  &Brow, &Bcol );
259  if( ( BisRow = ( Mupcase( BROC[0] ) == CROW ) ) != 0 )
260  {
261  BnbD = DESCB[NB_]; Bld = DESCB[LLD_];
262  BprocD = Bcol; BmyprocD = mycol; BnprocsD = npcol;
263  BprocR = Brow; BmyprocR = myrow;
264  BisR = ( ( BprocR == -1 ) || ( nprow == 1 ) );
265  Binb1D = PB_Cfirstnb( BnD, JB, DESCB[INB_], BnbD );
266  }
267  else
268  {
269  BnbD = DESCB[MB_]; Bld = DESCB[LLD_];
270  BprocD = Brow; BmyprocD = myrow; BnprocsD = nprow;
271  BprocR = Bcol; BmyprocR = mycol;
272  BisR = ( ( BprocR == -1 ) || ( npcol == 1 ) );
273  Binb1D = PB_Cfirstnb( BnD, IB, DESCB[IMB_], BnbD );
274  }
275 /*
276 * Are sub( A ) and sub( B ) both row or column vectors ?
277 */
278  RRorCC = ( ( AisRow && BisRow ) || ( !( AisRow ) && !( BisRow ) ) );
279 /*
280 * sub( A ) is not distributed and sub( B ) is distributed
281 */
282  if( !( AisR ) )
283  {
284 /*
285 * sub( A ) is not replicated. Since this operation is local if sub( A ) and
286 * sub( B ) are both row or column vectors, choose BprocR = AprocR when RRorCC,
287 * and BprocR = 0 otherwise.
288 */
289  if( BisR ) { BprocR = ( ( RRorCC ) ? AprocR : 0 ); }
290 /*
291 * Now, it is just like sub( B ) is not replicated, this information however is
292 * kept in BisR for later use.
293 */
294  size = TYPE->size;
295 
296  if( ( AmyprocR == AprocR ) || ( BmyprocR == BprocR ) )
297  {
298  zero = TYPE->zero; one = TYPE->one;
299 /*
300 * sub( A ) and sub( B ) are both row or column vectors
301 */
302  if( RRorCC )
303  {
304  if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd;
305  else add = TYPE->Fmmadd;
306 
307  BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, BmyprocD, BprocD,
308  BnprocsD );
309 /*
310 * sub( A ) and sub( B ) are in the same process row or column
311 */
312  if( AprocR == BprocR )
313  {
314 /*
315 * In each process, the non distributed part of sub( A ) is added to sub( B ).
316 */
317  if( BnpD > 0 )
318  {
319  Broc = BprocD;
320  if( AisRow ) { kk = Bjj; ktmp = JA + N; kn = JA + Binb1D; }
321  else { kk = Bii; ktmp = IA + M; kn = IA + Binb1D; }
322 
323  if( BmyprocD == Broc )
324  {
325  if( AisRow )
326  add( &M, &Binb1D, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
327  &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld );
328  else
329  add( &Binb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
330  &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld );
331  kk += Binb1D;
332  }
333  Broc = MModAdd1( Broc, BnprocsD );
334 
335  for( k = kn; k < ktmp; k += BnbD )
336  {
337  kbb = ktmp - k; kbb = MIN( kbb, BnbD );
338 
339  if( BmyprocD == Broc )
340  {
341  if( AisRow )
342  add( &M, &kbb, ALPHA, Mptr( A, Aii, k, Ald, size ),
343  &Ald, BETA, Mptr( B, Bii, kk, Bld, size ),
344  &Bld );
345  else
346  add( &kbb, &N, ALPHA, Mptr( A, k, Ajj, Ald, size ),
347  &Ald, BETA, Mptr( B, kk, Bjj, Bld, size ),
348  &Bld );
349  kk += kbb;
350  }
351  Broc = MModAdd1( Broc, BnprocsD );
352  }
353  }
354  }
355  else
356  {
357 /*
358 * sub( A ) and sub( B ) are in a different process row or column
359 */
360  if( BmyprocR == BprocR )
361  {
362 /*
363 * If I own a piece of sub( B ), then receive the relevant piece of sub( A )
364 * from the corresponding process row or column where it resides.
365 */
366  if( BnpD > 0 )
367  {
368  if( BisRow )
369  {
370  buf = PB_Cmalloc( M * BnpD * size );
371  TYPE->Cgerv2d( ctxt, M, BnpD, buf, M, AprocR,
372  BmyprocD );
373  add( &M, &BnpD, ALPHA, buf, &M, BETA, Mptr( B, Bii, Bjj,
374  Bld, size ), &Bld );
375  if( buf ) free( buf );
376  }
377  else
378  {
379  buf = PB_Cmalloc( BnpD * N * size );
380  TYPE->Cgerv2d( ctxt, BnpD, N, buf, BnpD, BmyprocD,
381  AprocR );
382  add( &BnpD, &N, ALPHA, buf, &BnpD, BETA, Mptr( B, Bii,
383  Bjj, Bld, size ), &Bld );
384  if( buf ) free( buf );
385  }
386  }
387  }
388 
389  if( AmyprocR == AprocR )
390  {
391 /*
392 * If I own sub( A ), then pack and send the distributed part that should be
393 * added to the distributed part of sub( B ) that resides in my row or column.
394 */
395  if( BnpD > 0 )
396  {
397  if( AisRow )
398  {
399  ktmp = JA + N;
400  kn = JA + Binb1D;
401  buf = PB_Cmalloc( M * BnpD * size );
402  }
403  else
404  {
405  ktmp = IA + M;
406  kn = IA + Binb1D;
407  buf = PB_Cmalloc( BnpD * N * size );
408  }
409  Broc = BprocD;
410  kk = 0;
411 
412  if( BmyprocD == Broc )
413  {
414  if( AisRow )
415  add( &M, &Binb1D, one, Mptr( A, Aii, Ajj, Ald,
416  size ), &Ald, zero, buf, &M );
417  else
418  add( &Binb1D, &N, one, Mptr( A, Aii, Ajj, Ald,
419  size ), &Ald, zero, buf, &BnpD );
420  kk += Binb1D;
421  }
422 
423  Broc = MModAdd1( Broc, BnprocsD );
424  for( k = kn; k < ktmp; k += BnbD )
425  {
426  kbb = ktmp - k; kbb = MIN( kbb, BnbD );
427 
428  if( BmyprocD == Broc )
429  {
430  if( AisRow )
431  add( &M, &kbb, one, Mptr( A, Aii, k, Ald, size ),
432  &Ald, zero, Mptr( buf, 0, kk, M, size ),
433  &M );
434  else
435  add( &kbb, &N, one, Mptr( A, k, Ajj, Ald, size ),
436  &Ald, zero, Mptr( buf, kk, 0, BnpD, size ),
437  &BnpD );
438  kk += kbb;
439  }
440  Broc = MModAdd1( Broc, BnprocsD );
441  }
442 
443  if( AisRow )
444  TYPE->Cgesd2d( ctxt, M, BnpD, buf, M, BprocR,
445  AmyprocD );
446  else
447  TYPE->Cgesd2d( ctxt, BnpD, N, buf, BnpD, AmyprocD,
448  BprocR );
449  if( buf ) free( buf );
450  }
451  }
452  }
453  }
454  else
455  {
456 /*
457 * sub( A ) and sub( B ) are not both row or column vectors
458 */
459  if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd;
460  else add = TYPE->Fmmtadd;
461 
462  Aroc = 0;
463  if( AisRow ) { ktmp = JA + N; kn = JA + Binb1D; }
464  else { ktmp = IA + M; kn = IA + Binb1D; }
465 /*
466 * Loop over the processes in which sub( B ) resides, for each process find the
467 * next process Xroc. Exchange and add the data.
468 */
469  for( p = 0; p < BnprocsD; p++ )
470  {
471  mydist = MModSub( p, BprocD, BnprocsD );
472  myproc = MModAdd( BprocD, mydist, BnprocsD );
473 
474  if( ( AprocR == p ) && ( BprocR == Aroc ) )
475  {
476  if( ( AmyprocR == p ) && ( AmyprocD == Aroc ) )
477  {
478 /*
479 * local add at the intersection of the process cross
480 */
481  BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, p, BprocD,
482  BnprocsD );
483  if( BnpD > 0 )
484  {
485  Broc = BprocD;
486  kk = ( AisRow ? Bii : Bjj );
487 
488  if( myproc == Broc )
489  {
490  if( AisRow )
491  add( &M, &Binb1D, ALPHA, Mptr( A, Aii, Ajj, Ald,
492  size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld,
493  size ), &Bld );
494  else
495  add( &Binb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald,
496  size ), &Ald, BETA, Mptr( B, Bii, Bjj, Bld,
497  size ), &Bld );
498  kk += Binb1D;
499  }
500  Broc = MModAdd1( Broc, BnprocsD );
501 
502  for( k = kn; k < ktmp; k += BnbD )
503  {
504  kbb = ktmp - k; kbb = MIN( kbb, BnbD );
505  if( myproc == Broc )
506  {
507  if( AisRow )
508  add( &M, &kbb, ALPHA, Mptr( A, Aii, k, Ald,
509  size ), &Ald, BETA, Mptr( B, kk, Bjj, Bld,
510  size ), &Bld );
511  else
512  add( &kbb, &N, ALPHA, Mptr( A, k, Ajj, Ald,
513  size ), &Ald, BETA, Mptr( B, Bii, kk, Bld,
514  size ), &Bld );
515  kk += kbb;
516  }
517  Broc = MModAdd1( Broc, BnprocsD );
518  }
519  }
520  }
521  }
522  else
523  {
524 /*
525 * Message exchange
526 */
527  if( ( BmyprocR == BprocR ) && ( BmyprocD == p ) )
528  {
529  BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, p, BprocD,
530  BnprocsD );
531  if( BnpD > 0 )
532  {
533  if( AisRow )
534  {
535  buf = PB_Cmalloc( M * BnpD * size );
536  TYPE->Cgerv2d( ctxt, BnpD, M, buf, BnpD, AprocR,
537  Aroc );
538  TYPE->Fmmadd( &BnpD, &M, ALPHA, buf, &BnpD, BETA,
539  Mptr( B, Bii, Bjj, Bld, size ), &Bld );
540  }
541  else
542  {
543  buf = PB_Cmalloc( BnpD * N * size );
544  TYPE->Cgerv2d( ctxt, N, BnpD, buf, N, Aroc, AprocR );
545  TYPE->Fmmadd( &N, &BnpD, ALPHA, buf, &N, BETA,
546  Mptr( B, Bii, Bjj, Bld, size ), &Bld );
547  }
548  if( buf ) free( buf );
549  }
550  }
551 
552  if( ( AmyprocR == AprocR ) && ( AmyprocD == Aroc ) )
553  {
554  BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, p, BprocD,
555  BnprocsD );
556  if( BnpD > 0 )
557  {
558  if( AisRow )
559  buf = PB_Cmalloc( M * BnpD * size );
560  else
561  buf = PB_Cmalloc( BnpD * N * size );
562  Broc = BprocD;
563  kk = 0;
564 
565  if( myproc == Broc )
566  {
567  if( AisRow )
568  add( &M, &Binb1D, one, Mptr( A, Aii, Ajj, Ald,
569  size ), &Ald, zero, buf, &BnpD );
570  else
571  add( &Binb1D, &N, one, Mptr( A, Aii, Ajj, Ald,
572  size ), &Ald, zero, buf, &N );
573  kk += Binb1D;
574  }
575  Broc = MModAdd1( Broc, BnprocsD );
576 
577  for( k = kn; k < ktmp; k += BnbD )
578  {
579  kbb = ktmp - k; kbb = MIN( kbb, BnbD );
580  if( myproc == Broc )
581  {
582  if( AisRow )
583  add( &M, &kbb, one, Mptr( A, Aii, k, Ald,
584  size ), &Ald, zero, Mptr( buf, kk, 0,
585  BnpD, size ), &BnpD );
586  else
587  add( &kbb, &N, one, Mptr( A, k, Ajj, Ald,
588  size ), &Ald, zero, Mptr( buf, 0, kk,
589  N, size ), &N );
590  kk += kbb;
591  }
592  Broc = MModAdd1( Broc, BnprocsD );
593  }
594  if( AisRow )
595  TYPE->Cgesd2d( ctxt, BnpD, M, buf, BnpD, p, BprocR );
596  else
597  TYPE->Cgesd2d( ctxt, N, BnpD, buf, N, BprocR, p );
598  if( buf ) free( buf );
599  }
600  }
601  }
602  Aroc = MModAdd1( Aroc, AnprocsD );
603  }
604  }
605  }
606 
607  if( BisR )
608  {
609 /*
610 * Replicate sub( B )
611 */
612  BnpD = PB_Cnumroc( BnD, 0, Binb1D, BnbD, BmyprocD, BprocD, BnprocsD );
613  if( BnpD > 0 )
614  {
615  if( BisRow )
616  {
617  top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
618  if( BmyprocR == BprocR )
619  TYPE->Cgebs2d( ctxt, COLUMN, top, ( AisRow ? M : N ), BnpD,
620  Mptr( B, Bii, Bjj, Bld, size ), Bld );
621  else
622  TYPE->Cgebr2d( ctxt, COLUMN, top, ( AisRow ? M : N ), BnpD,
623  Mptr( B, Bii, Bjj, Bld, size ), Bld, BprocR,
624  BmyprocD );
625  }
626  else
627  {
628  top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
629  if( BmyprocR == BprocR )
630  TYPE->Cgebs2d( ctxt, ROW, top, BnpD, ( AisRow ? M : N ),
631  Mptr( B, Bii, Bjj, Bld, size ), Bld );
632  else
633  TYPE->Cgebr2d( ctxt, ROW, top, BnpD, ( AisRow ? M : N ),
634  Mptr( B, Bii, Bjj, Bld, size ), Bld, BmyprocD,
635  BprocR );
636  }
637  }
638  }
639  }
640  else
641  {
642 /*
643 * sub( A ) is replicated in every process. Add the data in process row or
644 * column BprocR when sub( B ) is not replicated and in every process otherwise.
645 */
646  if( !( BisR ) && ( BmyprocR != BprocR ) ) return;
647 
648  size = TYPE->size;
649 
650  if( RRorCC )
651  {
652  if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd;
653  else add = TYPE->Fmmadd;
654  }
655  else
656  {
657  if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd;
658  else add = TYPE->Fmmtadd;
659  }
660 
661  Broc = BprocD;
662  kk = ( BisRow ? Bjj : Bii );
663  if( AisRow ) { ktmp = JA + N; kn = JA + Binb1D; }
664  else { ktmp = IA + M; kn = IA + Binb1D; }
665 
666  if( BmyprocD == Broc )
667  {
668  if( AisRow )
669  add( &M, &Binb1D, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA,
670  Mptr( B, Bii, Bjj, Bld, size ), &Bld );
671  else
672  add( &Binb1D, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA,
673  Mptr( B, Bii, Bjj, Bld, size ), &Bld );
674  kk += Binb1D;
675  }
676  Broc = MModAdd1( Broc, BnprocsD );
677 
678  for( k = kn; k < ktmp; k += BnbD )
679  {
680  kbb = ktmp - k; kbb = MIN( kbb, BnbD );
681  if( BmyprocD == Broc )
682  {
683  if( BisRow ) { buf = Mptr( B, Bii, kk, Bld, size ); }
684  else { buf = Mptr( B, kk, Bjj, Bld, size ); }
685  if( AisRow )
686  add( &M, &kbb, ALPHA, Mptr( A, Aii, k, Ald, size ), &Ald, BETA,
687  buf, &Bld );
688  else
689  add( &kbb, &N, ALPHA, Mptr( A, k, Ajj, Ald, size ), &Ald, BETA,
690  buf, &Bld );
691  kk += kbb;
692  }
693  Broc = MModAdd1( Broc, BnprocsD );
694  }
695  }
696 /*
697 * End of PB_CpaxpbyND
698 */
699 }
TYPE
#define TYPE
Definition: clamov.c:7
ROW
#define ROW
Definition: PBblacs.h:46
MB_
#define MB_
Definition: PBtools.h:43
NB_
#define NB_
Definition: PBtools.h:44
COLUMN
#define COLUMN
Definition: PBblacs.h:45
PB_CpaxpbyND
void PB_CpaxpbyND(PBTYP_T *TYPE, char *CONJUG, int M, int N, char *ALPHA, char *A, int IA, int JA, int *DESCA, char *AROC, char *BETA, char *B, int IB, int JB, int *DESCB, char *BROC)
Definition: PB_CpaxpbyND.c:26
PB_Cfirstnb
int PB_Cfirstnb()
LLD_
#define LLD_
Definition: PBtools.h:47
CNOCONJG
#define CNOCONJG
Definition: PBblas.h:19
MModAdd
#define MModAdd(I1, I2, d)
Definition: PBtools.h:97
CROW
#define CROW
Definition: PBblacs.h:21
IMB_
#define IMB_
Definition: PBtools.h:41
MModSub
#define MModSub(I1, I2, d)
Definition: PBtools.h:102
MModAdd1
#define MModAdd1(I, d)
Definition: PBtools.h:100
MMADD_T
F_VOID_FCT(* MMADD_T)()
Definition: pblas.h:284
TOP_GET
#define TOP_GET
Definition: PBblacs.h:50
PB_Ctop
char * PB_Ctop()
BCAST
#define BCAST
Definition: PBblacs.h:48
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cnumroc
int PB_Cnumroc()
PB_Cmalloc
char * PB_Cmalloc()
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
INB_
#define INB_
Definition: PBtools.h:42
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CTXT_
#define CTXT_
Definition: PBtools.h:38