ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_CGatherV.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_CGatherV( PBTYP_T * TYPE, char * ALLOC, char * DIRECA, int M, int N,
21  char * A, int IA, int JA, int * DESCA, char * AROC,
22  char * * B, int * DESCB, int * BFREE )
23 #else
24 void PB_CGatherV( TYPE, ALLOC, DIRECA, M, N, A, IA, JA, DESCA, AROC, B,
25  DESCB, BFREE )
26 /*
27 * .. Scalar Arguments ..
28 */
29  char * ALLOC, * AROC, * DIRECA;
30  int * BFREE, IA, JA, M, N;
31  PBTYP_T * TYPE;
32 /*
33 * .. Array Arguments ..
34 */
35  int * DESCA, * DESCB;
36  char * A, * * B;
37 #endif
38 {
39 /*
40 * Purpose
41 * =======
42 *
43 * PB_CGatherV aggregates a submatrix sub( A ) = A(IA:IA+M-1, JA:JA+N-1)
44 * into a one-dimensional multivector B. The submatrix sub( A ) is spe-
45 * cified on input to this routine that is reused whenever possible. On
46 * return, the one-dimensional multivector is specified by a pointer to
47 * some data, a descriptor array describing its layout and a logical va-
48 * lue indicating if this local piece of data has been dynamically allo-
49 * cated by this function.
50 *
51 * Notes
52 * =====
53 *
54 * A description vector is associated with each 2D block-cyclicly dis-
55 * tributed matrix. This vector stores the information required to
56 * establish the mapping between a matrix entry and its corresponding
57 * process and memory location.
58 *
59 * In the following comments, the character _ should be read as
60 * "of the distributed matrix". Let A be a generic term for any 2D
61 * block cyclicly distributed matrix. Its description vector is DESC_A:
62 *
63 * NOTATION STORED IN EXPLANATION
64 * ---------------- --------------- ------------------------------------
65 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
66 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
67 * the NPROW x NPCOL BLACS process grid
68 * A is distributed over. The context
69 * itself is global, but the handle
70 * (the integer value) may vary.
71 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
72 * ted matrix A, M_A >= 0.
73 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
74 * buted matrix A, N_A >= 0.
75 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
76 * block of the matrix A, IMB_A > 0.
77 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
78 * left block of the matrix A,
79 * INB_A > 0.
80 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
81 * bute the last M_A-IMB_A rows of A,
82 * MB_A > 0.
83 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
84 * bute the last N_A-INB_A columns of
85 * A, NB_A > 0.
86 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
87 * row of the matrix A is distributed,
88 * NPROW > RSRC_A >= 0.
89 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
90 * first column of A is distributed.
91 * NPCOL > CSRC_A >= 0.
92 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
93 * array storing the local blocks of
94 * the distributed matrix A,
95 * IF( Lc( 1, N_A ) > 0 )
96 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
97 * ELSE
98 * LLD_A >= 1.
99 *
100 * Let K be the number of rows of a matrix A starting at the global in-
101 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
102 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
103 * receive if these K rows were distributed over NPROW processes. If K
104 * is the number of columns of a matrix A starting at the global index
105 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
106 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
107 * these K columns were distributed over NPCOL processes.
108 *
109 * The values of Lr() and Lc() may be determined via a call to the func-
110 * tion PB_Cnumroc:
111 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
112 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
113 *
114 * Arguments
115 * =========
116 *
117 * TYPE (local input) pointer to a PBTYP_T structure
118 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
119 * that contains type information (See pblas.h).
120 *
121 * ALLOC (global input) pointer to CHAR
122 * On entry, ALLOC specifies if data should be allocated even
123 * when unnecessary as follows:
124 * ALLOC = 'A' or 'a' data allocation is enforced,
125 * ALLOC = 'R' or 'r' data is reused when possible.
126 *
127 * DIRECA (global input) pointer to CHAR
128 * On entry, DIRECA specifies the direction in which the rows
129 * or columns of sub( A ) should be aggregated as follows:
130 * DIRECA = 'F' or 'f' forward or increasing,
131 * DIRECA = 'B' or 'b' backward or decreasing.
132 *
133 * M (global input) INTEGER
134 * On entry, M specifies the number of rows of the submatrix
135 * sub( A ). M must be at least zero.
136 *
137 * N (global input) INTEGER
138 * On entry, N specifies the number of columns of the submatrix
139 * sub( A ). N must be at least zero.
140 *
141 * A (local input) pointer to CHAR
142 * On entry, A is an array of dimension (LLD_A, Ka), where LLD_A
143 * is DESCA[LLD_], i.e. at least MAX( 1, Lr( M, IA ) ), and,
144 * Ka is at least Lc( N, JA ). Before entry, this array contains
145 * the local entries of the matrix A.
146 *
147 * IA (global input) INTEGER
148 * On entry, IA specifies A's global row index, which points to
149 * the beginning of the submatrix sub( A ).
150 *
151 * JA (global input) INTEGER
152 * On entry, JA specifies A's global column index, which points
153 * to the beginning of the submatrix sub( A ).
154 *
155 * DESCA (global and local input) INTEGER array
156 * On entry, DESCA is an integer array of dimension DLEN_. This
157 * is the array descriptor for the matrix A.
158 *
159 * AROC (global input) pointer to CHAR
160 * On entry, AROC specifies the orientation of the submatrix
161 * sub( A ). When AROC is 'R' or 'r', sub( A ) is a row matrix,
162 * and a column matrix otherwise.
163 *
164 * B (local output) pointer to pointer to CHAR
165 * On exit, * B is an array containing the aggregated submatrix
166 * sub( A ).
167 *
168 * DESCB (global and local output) INTEGER array
169 * On exit, DESCB is a descriptor array of dimension DLEN_ des-
170 * cribing the data layout of the data pointed to by * B.
171 *
172 * BFREE (local output) INTEGER
173 * On exit, BFREE specifies if it has been possible to reuse
174 * the submatrix sub( A ), i.e., if some dynamic memory was al-
175 * located for the data pointed to by *B or not. When BFREE is
176 * zero, no dynamic memory was allocated. Otherwise, some dyna-
177 * mic memory was allocated by this function that one MUST re-
178 * lease as soon as possible.
179 *
180 * -- Written on April 1, 1998 by
181 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
182 *
183 * ---------------------------------------------------------------------
184 */
185 /*
186 * .. Local Scalars ..
187 */
188  char * one, * zero;
189  int Afwd, AggRow, AiiD, AiiR, Ainb1D, Ainb1R, Ald, AmyprocD,
190  AmyprocR, AnR, AnbD, AnbR, AnnxtL, AnnxtR, AnpD, AnpR, AnpreR,
191  AnprocsR, ArocR, AsrcD, AsrcR, Bld, Bsrc_, ctxt, k, kb, kblks,
192  kn, ktmp, mycol, mydist, mydistnb, myrow, nlen, npcol, nprow,
193  offset, size, srcdist;
194  MMADD_T add;
195  MMSHFT_T shft;
196 /*
197 * .. Local Arrays ..
198 */
199  char * Aptr = NULL, * Bptr = NULL;
200 /* ..
201 * .. Executable Statements ..
202 *
203 */
204 /*
205 * Initialize the output parameters to a default value
206 */
207  *BFREE = 0;
208  *B = NULL;
209 /*
210 * Quick return if possible
211 */
212  if( ( M <= 0 ) || ( N <= 0 ) )
213  {
214  PB_Cdescset( DESCB, M, N, DESCA[IMB_], DESCA[INB_], DESCA[MB_],
215  DESCA[NB_], DESCA[RSRC_], DESCA[CSRC_], DESCA[CTXT_], 1 );
216  return;
217  }
218 /*
219 * Retrieve process grid information
220 */
221  Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
222 
223  if( ( AggRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 )
224  {
225 /*
226 * Accumulate rows of sub( A )
227 */
228  AnbR = DESCA[MB_]; AnbD = DESCA[NB_]; Ald = DESCA[LLD_];
229  PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &AiiR, &AiiD,
230  &AsrcR, &AsrcD );
231  Ainb1D = PB_Cfirstnb( N, JA, DESCA[INB_], AnbD );
232  AnpD = PB_Cnumroc( N, 0, Ainb1D, AnbD, mycol, AsrcD, npcol );
233 /*
234 * If sub( A ) is either replicated or spans only one process row, no data needs
235 * to be exchanged by the processes, the operation is purely local.
236 */
237  if( !( PB_Cspan( M, IA, DESCA[IMB_], AnbR, AsrcR, nprow ) ) )
238  {
239  if( Mupcase( ALLOC[0] ) == CREUSE )
240  {
241 /*
242 * sub( A ) can be reused
243 */
244  if( ( ( myrow == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
245  {
246 /*
247 * If I own some entries of sub( A ), set *B
248 */
249  Bld = Ald;
250  *B = Mptr( A, AiiR, AiiD, Ald, TYPE->size );
251  }
252  else { Bld = 1; }
253  }
254  else
255  {
256 /*
257 * sub( A ) cannot be reused, make a copy of it.
258 */
259  if( ( ( myrow == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
260  {
261 /*
262 * If I own some entries of sub( A ), allocate space for the copy, and copy the
263 * data.
264 */
265  Bld = M;
266  if( AnpD > 0 )
267  {
268  size = TYPE->size;
269  *B = PB_Cmalloc( AnpD * M * size );
270  *BFREE = 1;
271  TYPE->Fmmadd( &M, &AnpD, TYPE->one, Mptr( A, AiiR, AiiD, Ald,
272  size ), &Ald, TYPE->zero, *B, &Bld );
273  }
274  }
275  else { Bld = 1; }
276  }
277 /*
278 * Describe the resulting operand
279 */
280  PB_Cdescset( DESCB, M, N, M, Ainb1D, AnbR, AnbD, AsrcR, AsrcD, ctxt,
281  Bld );
282  return;
283  }
284 
285  AnR = M; Bsrc_ = RSRC_;
286  AmyprocR = myrow; AmyprocD = mycol; AnprocsR = nprow;
287  Ainb1R = PB_Cfirstnb( M, IA, DESCA[IMB_], AnbR );
288  AnpR = PB_Cnumroc( M, 0, Ainb1R, AnbR, myrow, AsrcR, nprow );
289  }
290  else
291  {
292 /*
293 * Accumulate columns of sub( A )
294 */
295  AnbD = DESCA[MB_ ]; AnbR = DESCA[NB_ ]; Ald = DESCA[LLD_];
296  PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &AiiD, &AiiR,
297  &AsrcD, &AsrcR );
298  Ainb1D = PB_Cfirstnb( M, IA, DESCA[IMB_], AnbD );
299  AnpD = PB_Cnumroc( M, 0, Ainb1D, AnbD, myrow, AsrcD, nprow );
300 /*
301 * If sub( A ) is either replicated or spans only one process column, no data
302 * needs to be exchanged by the processes, the operation is purely local.
303 */
304  if( !( PB_Cspan( N, JA, DESCA[INB_], AnbR, AsrcR, npcol ) ) )
305  {
306  if( Mupcase( ALLOC[0] ) == CREUSE )
307  {
308 /*
309 * sub( A ) can be reused
310 */
311  Bld = Ald;
312  if( ( ( mycol == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
313 /*
314 * If I own some entries of sub( A ), set *B
315 */
316  *B = Mptr( A, AiiD, AiiR, Ald, TYPE->size );
317  }
318  else
319  {
320 /*
321 * sub( A ) cannot be reused, make a copy of it.
322 */
323  Bld = MAX( 1, AnpD );
324  if( ( ( mycol == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
325  {
326 /*
327 * If I own some entries of sub( A ), allocate space for the copy, and copy the
328 * data.
329 */
330  if( AnpD > 0 )
331  {
332  size = TYPE->size;
333  *B = PB_Cmalloc( AnpD * N * size );
334  *BFREE = 1;
335  TYPE->Fmmadd( &AnpD, &N, TYPE->one, Mptr( A, AiiD, AiiR, Ald,
336  size ), &Ald, TYPE->zero, *B, &Bld );
337  }
338  }
339  }
340 /*
341 * Describe the resulting operand
342 */
343  PB_Cdescset( DESCB, M, N, Ainb1D, N, AnbD, AnbR, AsrcD, AsrcR, ctxt,
344  Bld );
345  return;
346  }
347 
348  AnR = N; Bsrc_ = CSRC_;
349  AmyprocR = mycol; AmyprocD = myrow; AnprocsR = npcol;
350  Ainb1R = PB_Cfirstnb( N, JA, DESCA[INB_], AnbR );
351  AnpR = PB_Cnumroc( N, 0, Ainb1R, AnbR, mycol, AsrcR, npcol );
352  }
353 /*
354 * sub( A ) is not replicated and spans more than one process row or column.
355 * Forward row (resp. column) accumulation will leave the resulting operand in
356 * the process(es) where the global row IA+M-1 (resp. global column JA+N-1)
357 * resides.
358 */
359  if( ( Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD ) ) != 0 )
360  {
361  if( ( AnpD > 0 ) && ( AnpR > 0 ) )
362  {
363 /*
364 * Compute how may rows or columns are before me -> AnpreR
365 */
366  AnpreR = PB_Cnpreroc( AnR, 0, Ainb1R, AnbR, AmyprocR, AsrcR,
367  AnprocsR );
368 
369  if( AnpreR == 0 )
370  {
371 /*
372 * If zero rows or columns are before me, I must be the source, so send my piece
373 * to the process after me in the grid.
374 */
375  if( AggRow )
376  {
377  TYPE->Cgesd2d( ctxt, AnpR, AnpD, Mptr( A, AiiR, AiiD, Ald,
378  TYPE->size ), Ald, MModAdd1( AmyprocR, AnprocsR ),
379  AmyprocD );
380  }
381  else
382  {
383  TYPE->Cgesd2d( ctxt, AnpD, AnpR, Mptr( A, AiiD, AiiR,
384  Ald, TYPE->size ), Ald, AmyprocD,
385  MModAdd1( AmyprocR, AnprocsR ) );
386  }
387  }
388  else if( AnpreR > 0 )
389  {
390 /*
391 * Otherwise, allocate some space for the rows or columns I have and the ones
392 * globally preceeding the ones I have, that I am about to receive.
393 */
394  size = TYPE->size; one = TYPE->one; zero = TYPE->zero;
395  add = TYPE->Fmmadd;
396  *B = Bptr = PB_Cmalloc( ( AnpreR + AnpR ) * AnpD * size );
397  nlen = AnpreR;
398  mydistnb = MModSub( AmyprocR, AsrcR, AnprocsR ) * AnbR;
399  kblks = ( ( ( ktmp = AnR - Ainb1R - 1 ) >= 0 ) ?
400  ( ( ktmp / AnbR ) + 1 ) / AnprocsR : 0 );
401  offset = kblks * AnbR;
402  kn = Ainb1R + mydistnb - AnbR;
403  kn = MIN( kn, AnpreR ) +
404  ( MAX( 1, kblks ) - 1 ) * mydistnb;
405  if( AggRow )
406  {
407  shft = TYPE->Frshft;
408  Aptr = Mptr( A, AiiR, AiiD, Ald, size );
409  Bld = AnpreR + AnpR;
410 /*
411 * Receive the rows globally preceeding the ones I have
412 */
413  TYPE->Cgerv2d( ctxt, AnpreR, AnpD, *B, Bld, MModSub1( AmyprocR,
414  AnprocsR ), AmyprocD );
415 /*
416 * Sort the received buffer and insert at the correct place the rows of sub( A )
417 * I own (from bottom to top).
418 */
419  if( ( ( AnpR - 1 ) / AnbR ) == kblks )
420  {
421  kb = AnpR - offset;
422  add( &kb, &AnpD, one, Mptr( Aptr, offset, 0, Ald, size ),
423  &Ald, zero, Mptr( Bptr, nlen+offset, 0, Bld, size ),
424  &Bld );
425  }
426 
427  for( k = kblks; k >= 1; k-- )
428  {
429  kb = nlen - kn;
430  shft( &kb, &AnpD, &offset, Mptr( Bptr, kn, 0, Bld, size ),
431  &Bld );
432  offset -= AnbR;
433  add( &AnbR, &AnpD, one, Mptr( Aptr, offset, 0, Ald, size ),
434  &Ald, zero, Mptr( Bptr, kn+offset, 0, Bld, size ),
435  &Bld );
436  kn -= mydistnb;
437  nlen -= kb;
438  }
439 
440  if( AnpreR + AnpR != AnR )
441  {
442 /*
443 * If I am not the last process, i.e I am not supposed to own all of the AnR
444 * rows by the end of the operation, then send the sorted buffer to the next
445 * process and release the dynamically allocated buffer.
446 */
447  TYPE->Cgesd2d( ctxt, AnpreR+AnpR, AnpD, *B, Bld,
448  MModAdd1( AmyprocR, AnprocsR ), AmyprocD );
449  if( *B ) free( *B );
450  }
451  }
452  else
453  {
454  shft = TYPE->Fcshft;
455  Aptr = Mptr( A, AiiD, AiiR, Ald, size );
456  Bld = MAX( 1, AnpD );
457 /*
458 * Receive the columns globally preceeding the ones I have
459 */
460  TYPE->Cgerv2d( ctxt, AnpD, AnpreR, *B, Bld, AmyprocD,
461  MModSub1( AmyprocR, AnprocsR ) );
462 /*
463 * Sort the received buffer and insert at the correct place the columns of
464 * sub( A ) I own (from right to left).
465 */
466  if( ( ( AnpR - 1 ) / AnbR ) == kblks )
467  {
468  kb = AnpR - offset;
469  add( &AnpD, &kb, one, Mptr( Aptr, 0, offset, Ald, size ),
470  &Ald, zero, Mptr( Bptr, 0, nlen+offset, Bld, size ),
471  &Bld );
472  }
473 
474  for( k = kblks; k >= 1; k-- )
475  {
476  kb = nlen - kn;
477  shft( &AnpD, &kb, &offset, Mptr( Bptr, 0, kn, Bld, size ),
478  &Bld );
479  offset -= AnbR;
480  add( &AnpD, &AnbR, one, Mptr( Aptr, 0, offset, Ald, size ),
481  &Ald, zero, Mptr( Bptr, 0, kn + offset, Bld, size ),
482  &Bld );
483  kn -= mydistnb;
484  nlen -= kb;
485  }
486 
487  if( AnpreR + AnpR != AnR )
488  {
489 /*
490 * If I am not the last process, i.e I am not supposed to own all of the AnR
491 * columns by the end of the operation, then send the sorted buffer to the next
492 * process and release the dynamically allocated buffer.
493 */
494  TYPE->Cgesd2d( ctxt, AnpD, AnpreR+AnpR, *B, Bld, AmyprocD,
495  MModAdd1( AmyprocR, AnprocsR ) );
496  if( *B ) free( *B );
497  }
498  }
499  }
500  }
501  }
502  else
503  {
504 /*
505 * Backward accumulation, compute the process row or column coordinate ArocR,
506 * that is going to have the resulting operand.
507 */
508  ArocR = PB_Cindxg2p( AnR-1, Ainb1R, AnbR, AsrcR, AsrcR, AnprocsR );
509 
510  if( ( AnpD > 0 ) && ( AnpR > 0 ) )
511  {
512 /*
513 * Compute how may rows or columns are after me -> AnnxtR
514 */
515  AnnxtR = PB_Cnnxtroc( AnR, 0, Ainb1R, AnbR, AmyprocR, AsrcR,
516  AnprocsR );
517  AnnxtL = PB_Cnnxtroc( AnR, 0, Ainb1R, AnbR, ArocR, AsrcR,
518  AnprocsR );
519 
520  if( ( AnnxtR = MModSub( AnnxtR, AnnxtL, AnR ) ) == 0 )
521  {
522 /*
523 * If zero rows or columns are after me, I must be the source, so send my piece
524 * to the process before me in the grid.
525 */
526  if( AggRow )
527  {
528  TYPE->Cgesd2d( ctxt, AnpR, AnpD, Mptr( A, AiiR, AiiD, Ald,
529  TYPE->size ), Ald, MModSub1( AmyprocR, AnprocsR ),
530  AmyprocD );
531  }
532  else
533  {
534  TYPE->Cgesd2d( ctxt, AnpD, AnpR, Mptr( A, AiiD, AiiR, Ald,
535  TYPE->size ), Ald, AmyprocD, MModSub1( AmyprocR,
536  AnprocsR ) );
537  }
538  }
539  else if( AnnxtR > 0 )
540  {
541 /*
542 * Otherwise, allocate some space for the rows or columns I have and the ones
543 * globally following the ones I have, that I am about to receive.
544 */
545  size = TYPE->size; one = TYPE->one; zero = TYPE->zero;
546  add = TYPE->Fmmadd;
547  *B = Bptr = PB_Cmalloc( ( AnnxtR + AnpR ) * AnpD * size );
548  kblks = ( ( ( ktmp = AnR - Ainb1R - 1 ) >= 0 ) ?
549  ( ( ktmp / AnbR ) + 1 ) / AnprocsR : 0 );
550  mydist = MModSub( ArocR, AmyprocR, AnprocsR );
551  mydistnb = mydist * AnbR;
552  srcdist = MModSub( ArocR, AsrcR, AnprocsR );
553 
554  if( AggRow )
555  {
556  shft = TYPE->Frshft;
557  Aptr = Mptr( A, AiiR, AiiD, Ald, size );
558  Bld = AnnxtR + AnpR;
559 /*
560 * Receive the rows globally following the ones I have
561 */
562  TYPE->Cgerv2d( ctxt, AnnxtR, AnpD, Mptr( *B, AnpR, 0, Bld,
563  size ), Bld, MModAdd1( AmyprocR, AnprocsR ),
564  AmyprocD );
565 /*
566 * Sort the received buffer and insert at the correct place the rows of sub( A )
567 * I own (from top to bottom).
568 */
569  if( mydist > srcdist )
570  {
571  offset = -AnpR;
572  kb = Ainb1R + srcdist*AnbR;
573  }
574  else if( mydist == srcdist )
575  {
576  add( &Ainb1R, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld );
577  Aptr = Mptr( Aptr, Ainb1R, 0, Ald, size );
578  Bptr = Mptr( Bptr, Ainb1R, 0, Ald, size );
579  offset = Ainb1R - AnpR;
580  kb = mydistnb;
581  }
582  else
583  {
584  add( &AnbR, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld );
585  Aptr = Mptr( Aptr, AnbR, 0, Ald, size );
586  Bptr = Mptr( Bptr, AnbR, 0, Ald, size );
587  offset = AnbR - AnpR;
588  kb = mydistnb;
589  }
590 
591  for( k = kblks; k >= 1; k-- )
592  {
593  shft( &kb, &AnpD, &offset, Bptr, &Bld );
594  Bptr = Mptr( Bptr, kb, 0, Bld, size );
595  add( &AnbR, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld );
596  Aptr = Mptr( Aptr, AnbR, 0, Ald, size );
597  Bptr = Mptr( Bptr, AnbR, 0, Ald, size );
598  offset += AnbR;
599  kb = mydistnb;
600  }
601 
602  if( AnnxtR + AnpR != AnR )
603  {
604 /*
605 * If I am not the last process, i.e I am not supposed to own all of the AnR
606 * rows by the end of the operation, then send the sorted buffer to the previous
607 * process and release the dynamically allocated buffer.
608 */
609  TYPE->Cgesd2d( ctxt, AnnxtR+AnpR, AnpD, *B, Bld,
610  MModSub1( AmyprocR, AnprocsR ), AmyprocD );
611  if( *B ) free( *B );
612  }
613  }
614  else
615  {
616  shft = TYPE->Fcshft;
617  Aptr = Mptr( A, AiiD, AiiR, Ald, size );
618  Bld = MAX( 1, AnpD );
619 /*
620 * Receive the columns globally following the ones I have
621 */
622  TYPE->Cgerv2d( ctxt, AnpD, AnnxtR, Mptr( *B, 0, AnpR, Bld,
623  size ), Bld, AmyprocD, MModAdd1( AmyprocR,
624  AnprocsR ) );
625 /*
626 * Sort the received buffer and insert at the correct place the columns of
627 * sub( A ) I own (from left to right).
628 */
629  if( mydist > srcdist )
630  {
631  offset = -AnpR;
632  kb = Ainb1R + srcdist*AnbR;
633  }
634  else if( mydist == srcdist )
635  {
636  add( &AnpD, &Ainb1R, one, Aptr, &Ald, zero, Bptr, &Bld );
637  Aptr = Mptr( Aptr, 0, Ainb1R, Ald, size );
638  Bptr = Mptr( Bptr, 0, Ainb1R, Bld, size );
639  offset = Ainb1R - AnpR;
640  kb = mydistnb;
641  }
642  else
643  {
644  add( &AnpD, &AnbR, one, Aptr, &Ald, zero, Bptr, &Bld );
645  Aptr = Mptr( Aptr, 0, AnbR, Ald, size );
646  Bptr = Mptr( Bptr, 0, AnbR, Bld, size );
647  offset = AnbR - AnpR;
648  kb = mydistnb;
649  }
650 
651  for( k = kblks; k >= 1; k-- )
652  {
653  shft( &AnpD, &kb, &offset, Bptr, &Bld );
654  Bptr = Mptr( Bptr, 0, kb, Bld, size );
655  add( &AnpD, &AnbR, one, Aptr, &Ald, zero, Bptr, &Bld );
656  Aptr = Mptr( Aptr, 0, AnbR, Ald, size );
657  Bptr = Mptr( Bptr, 0, AnbR, Bld, size );
658  offset += AnbR;
659  kb = mydistnb;
660  }
661 
662  if( AnnxtR + AnpR != AnR )
663  {
664 /*
665 * If I am not the last process, i.e I am not supposed to own all of the AnR
666 * columns by the end of the operation, then send the sorted buffer to the
667 * previous process and release the dynamically allocated buffer.
668 */
669  TYPE->Cgesd2d( ctxt, AnpD, AnnxtR+AnpR, *B, Bld, AmyprocD,
670  MModSub1( AmyprocR, AnprocsR ) );
671  if( *B ) free( *B );
672  }
673  }
674  }
675  }
676  }
677 /*
678 * Describe the resulting operand
679 */
680  if( AggRow )
681  {
682  PB_Cdescset( DESCB, M, N, M, Ainb1D, AnbR, AnbD, AsrcR, AsrcD, ctxt, M );
683  }
684  else
685  {
686  PB_Cdescset( DESCB, M, N, Ainb1D, N, AnbD, AnbR, AsrcD, AsrcR, ctxt,
687  MAX( 1, AnpD ) );
688  }
689 /*
690 * Compute globally in which process row or column the resulting operand is
691 * residing and set *BFREE accordingly.
692 */
693  if( Afwd )
694  {
695  if( AnR + AnbR > Ainb1R + ( AnprocsR - 1 ) * AnbR )
696  {
697 /*
698 * If sub( A ) is spanning all process rows or columns of the grid, the result
699 * must be in the process row or column preceeding the one owning IA or JA,
700 * don't you think ?
701 */
702  DESCB[Bsrc_] = MModSub1( AsrcR, AnprocsR );
703  }
704  else
705  {
706 /*
707 * Otherwise, the result is in the process row or column where the row IA+M-1
708 * or column JA+N-1 of sub( A ) resides.
709 */
710  DESCB[Bsrc_] = PB_Cindxg2p( AnR-1, Ainb1R, AnbR, AsrcR, AsrcR,
711  AnprocsR );
712  }
713  if( ( AnpD > 0 ) && ( AnpR > 0 ) && ( AmyprocR == DESCB[Bsrc_] ) )
714  *BFREE = 1;
715  }
716  else
717  {
718  if( AnR + AnbR > Ainb1R + ( AnprocsR - 1 ) * AnbR )
719  {
720 /*
721 * If sub( A ) is spanning all process rows or columns of the grid, the result
722 * must be in the process row or column following the one owning IA+M-1 or
723 * JA+N-1, don't you think ?
724 */
725  DESCB[Bsrc_] = MModAdd1( ArocR, AnprocsR );
726  }
727  else
728  {
729 /*
730 * Otherwise, the result is in the process row or column where the row IA or
731 * column JA of sub( A ) resides.
732 */
733  DESCB[Bsrc_] = AsrcR;
734  }
735  if( ( AnpD > 0 ) && ( AnpR > 0 ) && ( AmyprocR == DESCB[Bsrc_] ) )
736  *BFREE = 1;
737  }
738 /*
739 * End of PB_CGatherV
740 */
741 }
TYPE
#define TYPE
Definition: clamov.c:7
MB_
#define MB_
Definition: PBtools.h:43
CREUSE
#define CREUSE
Definition: PBblas.h:41
NB_
#define NB_
Definition: PBtools.h:44
CSRC_
#define CSRC_
Definition: PBtools.h:46
PB_Cnpreroc
int PB_Cnpreroc()
PB_Cfirstnb
int PB_Cfirstnb()
LLD_
#define LLD_
Definition: PBtools.h:47
PB_Cnnxtroc
int PB_Cnnxtroc()
CROW
#define CROW
Definition: PBblacs.h:21
IMB_
#define IMB_
Definition: PBtools.h:41
MModSub
#define MModSub(I1, I2, d)
Definition: PBtools.h:102
PB_Cdescset
void PB_Cdescset()
MModAdd1
#define MModAdd1(I, d)
Definition: PBtools.h:100
MMADD_T
F_VOID_FCT(* MMADD_T)()
Definition: pblas.h:284
PB_Cindxg2p
int PB_Cindxg2p()
RSRC_
#define RSRC_
Definition: PBtools.h:45
PB_Cinfog2l
void PB_Cinfog2l()
PB_Cnumroc
int PB_Cnumroc()
PB_Cmalloc
char * PB_Cmalloc()
CFORWARD
#define CFORWARD
Definition: PBblas.h:38
MIN
#define MIN(a_, b_)
Definition: PBtools.h:76
INB_
#define INB_
Definition: PBtools.h:42
PB_Cspan
int PB_Cspan()
MModSub1
#define MModSub1(I, d)
Definition: PBtools.h:105
MAX
#define MAX(a_, b_)
Definition: PBtools.h:77
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mupcase
#define Mupcase(C)
Definition: PBtools.h:83
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
MMSHFT_T
F_VOID_FCT(* MMSHFT_T)()
Definition: pblas.h:285
CTXT_
#define CTXT_
Definition: PBtools.h:38
PB_CGatherV
void PB_CGatherV(PBTYP_T *TYPE, char *ALLOC, char *DIRECA, int M, int N, char *A, int IA, int JA, int *DESCA, char *AROC, char **B, int *DESCB, int *BFREE)
Definition: PB_CGatherV.c:24