SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_CScatterV.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_CScatterV( PBTYP_T * TYPE, char * DIRECA, Int M, Int N,
21 char * A, Int IA, Int JA, Int * DESCA, char * AROC,
22 char * ALPHA, char * B, Int IB, Int JB, Int * DESCB,
23 char * BROC )
24#else
25void PB_CScatterV( TYPE, DIRECA, M, N, A, IA, JA, DESCA, AROC,
26 ALPHA, B, IB, JB, DESCB, BROC )
27/*
28* .. Scalar Arguments ..
29*/
30 char * ALPHA, * AROC, * BROC, * DIRECA;
31 Int IA, IB, JA, JB, M, N;
32 PBTYP_T * TYPE;
33/*
34* .. Array Arguments ..
35*/
36 Int * DESCA, * DESCB;
37 char * A, * B;
38#endif
39{
40/*
41* Purpose
42* =======
43*
44* PB_CScatterV disaggregates the one-dimensional submatrix sub( A ) de-
45* noting A( IA:IA+M-1, JA:JA+N-1 ) into a two-dimensional submatrix
46* sub( B ) denoting B( IB:IB+M-1, JB:JB+N-1 ) when AROC is equal to
47* BROC and B( IB:IB+N-1, JB:JB+M-1 ) otherwise:
48*
49* sub( B ) := alpha * sub( B ) + sub( A ).
50*
51* Notes
52* =====
53*
54* A description vector is associated with each 2D block-cyclicly dis-
55* tributed matrix. This vector stores the information required to
56* establish the mapping between a matrix entry and its corresponding
57* process and memory location.
58*
59* In the following comments, the character _ should be read as
60* "of the distributed matrix". Let A be a generic term for any 2D
61* block cyclicly distributed matrix. Its description vector is DESC_A:
62*
63* NOTATION STORED IN EXPLANATION
64* ---------------- --------------- ------------------------------------
65* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
66* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
67* the NPROW x NPCOL BLACS process grid
68* A is distributed over. The context
69* itself is global, but the handle
70* (the integer value) may vary.
71* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
72* ted matrix A, M_A >= 0.
73* N_A (global) DESCA[ N_ ] The number of columns in the distri-
74* buted matrix A, N_A >= 0.
75* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
76* block of the matrix A, IMB_A > 0.
77* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
78* left block of the matrix A,
79* INB_A > 0.
80* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
81* bute the last M_A-IMB_A rows of A,
82* MB_A > 0.
83* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
84* bute the last N_A-INB_A columns of
85* A, NB_A > 0.
86* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
87* row of the matrix A is distributed,
88* NPROW > RSRC_A >= 0.
89* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
90* first column of A is distributed.
91* NPCOL > CSRC_A >= 0.
92* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
93* array storing the local blocks of
94* the distributed matrix A,
95* IF( Lc( 1, N_A ) > 0 )
96* LLD_A >= MAX( 1, Lr( 1, M_A ) )
97* ELSE
98* LLD_A >= 1.
99*
100* Let K be the number of rows of a matrix A starting at the global in-
101* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
102* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
103* receive if these K rows were distributed over NPROW processes. If K
104* is the number of columns of a matrix A starting at the global index
105* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
106* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
107* these K columns were distributed over NPCOL processes.
108*
109* The values of Lr() and Lc() may be determined via a call to the func-
110* tion PB_Cnumroc:
111* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
112* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
113*
114* Arguments
115* =========
116*
117* TYPE (local input) pointer to a PBTYP_T structure
118* On entry, TYPE is a pointer to a structure of type PBTYP_T,
119* that contains type information (See pblas.h).
120*
121* DIRECA (global input) pointer to CHAR
122* On entry, DIRECA specifies the direction in which the rows
123* or columns of sub( A ) should be disaggregated as follows:
124* DIRECA = 'F' or 'f' forward or increasing,
125* DIRECA = 'B' or 'b' backward or decreasing.
126*
127* M (global input) INTEGER
128* On entry, M specifies the number of rows of the submatrix
129* sub( A ). M must be at least zero.
130*
131* N (global input) INTEGER
132* On entry, N specifies the number of columns of the submatrix
133* sub( A ). N must be at least zero.
134*
135* A (local input) pointer to CHAR
136* On entry, A is an array of dimension (LLD_A, Ka), where LLD_A
137* is DESCA[LLD_], i.e. at least MAX( 1, Lr( M, IA ) ), and,
138* Ka is at least Lc( N, JA ). Before entry, this array contains
139* the local entries of the matrix A.
140*
141* IA (global input) INTEGER
142* On entry, IA specifies A's global row index, which points to
143* the beginning of the submatrix sub( A ).
144*
145* JA (global input) INTEGER
146* On entry, JA specifies A's global column index, which points
147* to the beginning of the submatrix sub( A ).
148*
149* DESCA (global and local input) INTEGER array
150* On entry, DESCA is an integer array of dimension DLEN_. This
151* is the array descriptor for the matrix A.
152*
153* AROC (global input) pointer to CHAR
154* On entry, AROC specifies the orientation of the submatrix
155* sub( A ). When AROC is 'R' or 'r', sub( A ) is a row matrix,
156* and a column matrix otherwise.
157*
158* ALPHA (local input) pointer to CHAR
159* On entry, ALPHA specifies the scalar alpha.
160*
161* B (local output) pointer to CHAR
162* On entry, A is an array of dimension (LLD_B, Kb), where LLD_B
163* is DESCB[LLD_], i.e. at least MAX( 1, Lr( M, IB ) ) when AROC
164* and BROC are equal, and MAX( 1, Lr( N, IB ) ) otherwise, and,
165* Kb is at least Lc( N, JB ) when AROC and BROC are equal, and
166* Lc( M, JB ) otherwise. On exit, this array contains the local
167* entries of the disaggregated submatrix sub( A ).
168*
169* IB (global input) INTEGER
170* On entry, IB specifies B's global row index, which points to
171* the beginning of the submatrix sub( B ).
172*
173* JB (global input) INTEGER
174* On entry, JB specifies B's global column index, which points
175* to the beginning of the submatrix sub( B ).
176*
177* DESCB (global and local input) INTEGER array
178* On entry, DESCB is an integer array of dimension DLEN_. This
179* is the array descriptor for the matrix B.
180*
181* BROC (global input) pointer to CHAR
182* On entry, BROC specifies the orientation of the submatrix
183* sub( B ). When BROC is 'R' or 'r', sub( B ) is a row matrix,
184* and a column matrix otherwise.
185*
186* -- Written on April 1, 1998 by
187* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
188*
189* ---------------------------------------------------------------------
190*/
191/*
192* .. Local Scalars ..
193*/
194 char * one;
195 Int Afwd, Bbufld, Bcol, Bcurcol, Bcurrow, Bii, Bimb, Bimb1, Binb,
196 Binb1, BisRow, Bjj, Bld, Bm, Bmb, Bmp, Bn, Bnb, Bnnxt, BnnxtL,
197 Bnpre, Bnq, Brow, WAfr, ctxt, kb, mycol, mydist, mydistnb,
198 myrow, nlen, npcol, nprow, offset, size, srcdist, stride,
199 stridenb, tmp;
200 MMADD_T add;
201 MMSHFT_T shft;
202/*
203* .. Local Arrays ..
204*/
205 Int Bd0[DLEN_], WAd[DLEN_];
206 char * Bptr = NULL, * Bbuf = NULL, * Bbufptr = NULL, * WA = NULL;
207/* ..
208* .. Executable Statements ..
209*
210*/
211/*
212* Quick return if possible
213*/
214 if( ( M <= 0 ) || ( N <= 0 ) ) return;
215/*
216* Retrieve process grid information
217*/
218 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
219
220 Afwd = ( Mupcase( DIRECA[0] ) == CFORWARD );
221 BisRow = ( Mupcase( BROC [0] ) == CROW );
222 if( Mupcase( AROC[0] ) == Mupcase( BROC[0] ) ) { Bm = M; Bn = N; }
223 else { Bm = N; Bn = M; }
224/*
225* Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ...
226*/
227 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
228 &Brow, &Bcol );
229 Bimb = DESCB[IMB_]; Binb = DESCB[INB_]; Bmb = DESCB[MB_]; Bnb = DESCB[NB_];
230 Bimb1 = PB_Cfirstnb( Bm, IB, Bimb, Bmb );
231 Bmp = PB_Cnumroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
232 Binb1 = PB_Cfirstnb( Bn, JB, Binb, Bnb );
233 Bnq = PB_Cnumroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
234 Bld = DESCB[LLD_]; size = TYPE->size; one = TYPE->one;
235 if( ( Bmp > 0 ) && ( Bnq > 0 ) ) Bptr = Mptr( B, Bii, Bjj, Bld, size );
236
237 if( BisRow )
238 {
239/*
240* Compute descriptor Bd0 for sub( B ).
241*/
242 if( Afwd ) { Bcurrow = Brow; }
243 else { Bcurrow = PB_Cindxg2p( Bm-1, Bimb1, Bmb, Brow, Brow, nprow ); }
244 PB_Cdescset( Bd0, Bm, Bn, Bm, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld );
245/*
246* Align sub( A ) with sub( B )
247*/
248 PB_CInV( TYPE, NOCONJG, BROC, Bm, Bn, Bd0, Bm, A, IA, JA, DESCA, AROC,
249 &WA, WAd, &WAfr );
250/*
251* Disaggregate WA = sub( A )
252*/
253 if( ( Brow == -1 ) || ( nprow == 1 ) )
254 {
255/*
256* sub( B ) is replicated
257*/
258 if( Bnq > 0 )
259 TYPE->Fmmadd( &Bm, &Bnq, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld );
260 if( WAfr ) free( WA );
261 return;
262 }
263
264 if( !( PB_Cspan( Bm, 0, Bimb1, Bmb, Brow, nprow ) ) )
265 {
266/*
267* sub( B ) spans only one process row
268*/
269 if( ( myrow == Brow ) && ( Bnq > 0 ) )
270 TYPE->Fmmadd( &Bm, &Bnq, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld );
271 if( WAfr ) free( WA );
272 return;
273 }
274/*
275* sub( B ) spans more than one process row
276*/
277 if( Afwd )
278 {
279/*
280* sub( B ) is not replicated and spans more than one process row. Forward row
281* dissagregation starts in the process row where the global row IB resides.
282*/
283 if( ( Bmp > 0 ) && ( Bnq > 0 ) )
284 {
285/*
286* Compute how may rows are before and after me (Bnpre and Bnnxt).
287*/
288 Bnpre = PB_Cnpreroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
289 Bnnxt = PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
290 nlen = Bmp + Bnnxt;
291
292 if( Bnpre > 0 )
293 {
294/*
295* If I don't own the row IB, then allocate and receive a buffer of length
296* ( Bmp + Bnnxt ) * Bnq from the previous process row.
297*/
298 Bbufptr = Bbuf = PB_Cmalloc( nlen * Bnq * size );
299 Bbufld = nlen;
300 TYPE->Cgerv2d( ctxt, nlen, Bnq, Bbuf, Bbufld, MModSub1( myrow,
301 nprow ), mycol );
302 kb = Bmb;
303 }
304 else
305 {
306/*
307* Otherwise, reuse WA.
308*/
309 Bbufptr = Bbuf = WA;
310 Bbufld = WAd[LLD_];
311 kb = Bimb1;
312 }
313/*
314* Unpack the received data
315*/
316 if( Bnnxt > 0 )
317 {
318/*
319* If some rows reside in the process row following mine, then unpack my piece,
320* sort the buffer and send those Bnnxt rows to the next process row.
321*/
322 add = TYPE->Fmmadd; shft = TYPE->Frshft;
323 mydistnb = ( nprow - MModSub( myrow, Brow, nprow ) - 1 );
324 stride = ( mydistnb *= Bmb ) * size;
325
326 do
327 {
328 kb = MIN( kb, nlen );
329 add( &kb, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
330 nlen -= kb;
331 offset = -kb;
332 shft( &nlen, &Bnq, &offset, Bbufptr, &Bbufld );
333 Bptr += kb*size;
334 Bbufptr += stride;
335 nlen -= mydistnb;
336 kb = Bmb;
337 } while( nlen > 0 );
338/*
339* send buffer of length Bnnxt * Bnq to the next process row.
340*/
341 TYPE->Cgesd2d( ctxt, Bnnxt, Bnq, Bbuf, Bbufld, MModAdd1( myrow,
342 nprow ), mycol );
343 }
344 else
345 {
346/*
347* Otherwise, I must be the last process involved in the operation, so no
348* unpacking is necessary.
349*/
350 TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
351 &Bld );
352 }
353/*
354* If I don't own the row IB, then release the dynamically allocated buffer.
355*/
356 if( Bnpre > 0 ) free( Bbuf );
357 }
358 if( WAfr ) free( WA );
359 }
360 else
361 {
362 if( ( Bmp > 0 ) && ( Bnq > 0 ) )
363 {
364/*
365* Compute how may rows are before and after me (Bnpre, Bnnxt).
366*/
367 Bnnxt = PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
368 BnnxtL = PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, Bcurrow, Brow, nprow );
369 Bnnxt = MModSub( Bnnxt, BnnxtL, Bm );
370 Bnpre = ( nlen = Bm - Bnnxt ) - Bmp;
371
372 if( Bnnxt > 0 )
373 {
374/*
375* If I don't own the row IB+M-1, then allocate and receive a buffer of length
376* ( Bm - Bnnxt ) * Bnq from the next process row.
377*/
378 Bbufptr = Bbuf = PB_Cmalloc( nlen * Bnq * size );
379 Bbufld = nlen;
380 TYPE->Cgerv2d( ctxt, nlen, Bnq, Bbuf, Bbufld, MModAdd1( myrow,
381 nprow ), mycol );
382 }
383 else
384 {
385/*
386* Otherwise, reuse WA.
387*/
388 Bbufptr = Bbuf = WA;
389 Bbufld = WAd[LLD_];
390 }
391/*
392* Unpack the received data
393*/
394 if( Bnpre > 0 )
395 {
396/*
397* If some rows reside in the process row preceeding mine, then unpack my piece,
398* sort the buffer and send those Bnpre rows to the previous process row.
399*/
400 add = TYPE->Fmmadd; shft = TYPE->Frshft;
401 mydist = MModSub( Bcurrow, myrow, nprow );
402 srcdist = MModSub( Bcurrow, Brow, nprow );
403 stridenb = ( nprow - mydist - 1 ) * Bmb;
404
405 if( mydist < srcdist )
406 {
407 tmp = ( Bimb1 + ( srcdist - mydist - 1 ) * Bmb );
408 Bbufptr += tmp * size;
409 nlen -= tmp;
410 kb = Bmb;
411 }
412 else if( mydist == srcdist )
413 {
414 kb = Bimb1;
415 }
416 else
417 {
418 Bbufptr += stridenb * size;
419 nlen -= stridenb;
420 kb = Bmb;
421 }
422
423 do
424 {
425 kb = MIN( kb, nlen );
426 add( &kb, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
427 nlen -= kb;
428 offset = -kb;
429 shft( &nlen, &Bnq, &offset, Bbufptr, &Bbufld );
430 Bptr += kb*size;
431 Bbufptr += stridenb*size;
432 nlen -= stridenb;
433 kb = Bmb;
434 } while( nlen > 0 );
435/*
436* send buffer of length Bnpre * Bnq to the previous process row.
437*/
438 TYPE->Cgesd2d( ctxt, Bnpre, Bnq, Bbuf, Bbufld, MModSub1( myrow,
439 nprow ), mycol );
440 }
441 else
442 {
443/*
444* Otherwise, I must be the last process involved in the operation, so no
445* unpacking is necessary.
446*/
447 TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
448 &Bld );
449 }
450/*
451* If I don't own the row IB+M-1, then release the dynamically allocated buffer.
452*/
453 if( Bnnxt > 0 ) free( Bbuf );
454 }
455 if( WAfr ) free( WA );
456 }
457 }
458 else
459 {
460/*
461* Compute descriptor Bd0 for sub( B ).
462*/
463 if( Afwd ) { Bcurcol = Bcol; }
464 else { Bcurcol = PB_Cindxg2p( Bn-1, Binb1, Bnb, Bcol, Bcol, npcol ); }
465 PB_Cdescset( Bd0, Bm, Bn, Bimb1, Bn, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld );
466/*
467* Align sub( A ) with sub( B )
468*/
469 PB_CInV( TYPE, NOCONJG, BROC, Bm, Bn, Bd0, Bn, A, IA, JA, DESCA, AROC,
470 &WA, WAd, &WAfr );
471/*
472* Disaggregate WA = sub( A )
473*/
474 if( ( Bcol == -1 ) || ( npcol == 1 ) )
475 {
476/*
477* sub( B ) is replicated
478*/
479 if( Bmp > 0 )
480 TYPE->Fmmadd( &Bmp, &Bn, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld );
481 if( WAfr ) free( WA );
482 return;
483 }
484
485 if( !( PB_Cspan( Bn, 0, Binb1, Bnb, Bcol, npcol ) ) )
486 {
487/*
488* sub( B ) spans only one process column
489*/
490 if( ( mycol == Bcol ) && ( Bmp > 0 ) )
491 TYPE->Fmmadd( &Bmp, &Bn, one, WA, &WAd[LLD_], ALPHA, Bptr, &Bld );
492 if( WAfr ) free( WA );
493 return;
494 }
495/*
496* sub( B ) spans more than one process column
497*/
498 if( Afwd )
499 {
500/*
501* sub( B ) is not replicated and spans more than one process column. Forward
502* column dissagregation starts in the process column where the global column
503* JB resides.
504*/
505 if( ( Bmp > 0 ) && ( Bnq > 0 ) )
506 {
507/*
508* Compute how may columns are before and after me (Bnpre and Bnnxt).
509*/
510 Bnpre = PB_Cnpreroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
511 Bnnxt = PB_Cnnxtroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
512 nlen = Bnq + Bnnxt;
513
514 if( Bnpre > 0 )
515 {
516/*
517* If I don't own the column JB, then allocate and receive a buffer of length
518* Bmp * ( Bnq + Bnnxt ) from the previous process column.
519*/
520 Bbufptr = Bbuf = PB_Cmalloc( Bmp * nlen * size );
521 Bbufld = Bmp;
522 TYPE->Cgerv2d( ctxt, Bmp, nlen, Bbuf, Bbufld, myrow,
523 MModSub1( mycol, npcol ) );
524 kb = Bnb;
525 }
526 else
527 {
528/*
529* Otherwise, reuse WA.
530*/
531 Bbufptr = Bbuf = WA;
532 Bbufld = WAd[LLD_];
533 kb = Binb1;
534 }
535/*
536* Unpack the received data
537*/
538 if( Bnnxt > 0 )
539 {
540/*
541* If some columns reside in the process column following mine, then unpack my
542* piece, sort the buffer and send those Bnnxt columns to the next process
543* column.
544*/
545 add = TYPE->Fmmadd; shft = TYPE->Fcshft;
546 mydistnb = ( npcol - MModSub( mycol, Bcol, npcol ) - 1 );
547 stride = ( mydistnb *= Bnb ) * Bbufld * size;
548
549 do
550 {
551 kb = MIN( kb, nlen );
552 add( &Bmp, &kb, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
553 nlen -= kb;
554 offset = -kb;
555 shft( &Bmp, &nlen, &offset, Bbufptr, &Bbufld );
556 Bptr += kb*Bld*size;
557 Bbufptr += stride;
558 nlen -= mydistnb;
559 kb = Bnb;
560 } while( nlen > 0 );
561/*
562* send buffer of length Bmp * Bnnxt to the next process column.
563*/
564 TYPE->Cgesd2d( ctxt, Bmp, Bnnxt, Bbuf, Bbufld, myrow,
565 MModAdd1( mycol, npcol ) );
566 }
567 else
568 {
569/*
570* Otherwise, I must be the last process involved in the operation, so no
571* unpacking is necessary.
572*/
573 TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
574 &Bld );
575 }
576/*
577* If I don't own the column JB, then release the dynamically allocated buffer.
578*/
579 if( Bnpre > 0 ) free( Bbuf );
580 }
581 if( WAfr ) free( WA );
582 }
583 else
584 {
585 if( ( Bmp > 0 ) && ( Bnq > 0 ) )
586 {
587/*
588* Compute how may rows are before and after me (Bnpre, Bnnxt).
589*/
590 Bnnxt = PB_Cnnxtroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
591 BnnxtL = PB_Cnnxtroc( Bn, 0, Binb1, Bnb, Bcurcol, Bcol, npcol );
592 Bnnxt = MModSub( Bnnxt, BnnxtL, Bn );
593 Bnpre = ( nlen = Bn - Bnnxt ) - Bnq;
594
595 if( Bnnxt > 0 )
596 {
597/*
598* If I don't own the column JB+N-1, then allocate and receive a buffer of
599* length Bmp * ( Bn - Bnnxt ) from the next process column.
600*/
601 Bbufptr = Bbuf = PB_Cmalloc( Bmp * nlen * size );
602 Bbufld = Bmp;
603 TYPE->Cgerv2d( ctxt, Bmp, nlen, Bbuf, Bbufld, myrow,
604 MModAdd1( mycol, npcol ) );
605 }
606 else
607 {
608/*
609* Otherwise, reuse WA.
610*/
611 Bbufptr = Bbuf = WA;
612 Bbufld = WAd[LLD_];
613 }
614/*
615* Unpack the received data
616*/
617 if( Bnpre > 0 )
618 {
619/*
620* If some columns reside in the process column preceeding mine, then unpack my
621* piece, sort the buffer and send those Bnpre columns to the previous process
622* column.
623*/
624 add = TYPE->Fmmadd; shft = TYPE->Fcshft;
625 mydist = MModSub( Bcurcol, mycol, npcol );
626 srcdist = MModSub( Bcurcol, Bcol, npcol );
627 stridenb = ( npcol - mydist - 1 ) * Bnb;
628
629 if( mydist < srcdist )
630 {
631 tmp = ( Binb1 + ( srcdist - mydist - 1 ) * Bnb );
632 Bbufptr += tmp * Bbufld * size;
633 nlen -= tmp;
634 kb = Bnb;
635 }
636 else if( mydist == srcdist )
637 {
638 kb = Binb1;
639 }
640 else
641 {
642 Bbufptr += stridenb * Bbufld * size;
643 nlen -= stridenb;
644 kb = Bnb;
645 }
646
647 do
648 {
649 kb = MIN( kb, nlen );
650 add( &Bmp, &kb, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
651 nlen -= kb;
652 offset = -kb;
653 shft( &Bmp, &nlen, &offset, Bbufptr, &Bbufld );
654 Bptr += kb * Bld * size;
655 Bbufptr += stridenb * Bbufld * size;
656 nlen -= stridenb;
657 kb = Bnb;
658 } while( nlen > 0 );
659/*
660* send buffer of length Bmp * Bnpre to the previous process column.
661*/
662 TYPE->Cgesd2d( ctxt, Bmp, Bnpre, Bbuf, Bbufld, myrow,
663 MModSub1( mycol, npcol ) );
664 }
665 else
666 {
667/*
668* Otherwise, I must be the last process involved in the operation, so no
669* unpacking is necessary.
670*/
671 TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
672 &Bld );
673 }
674/*
675* If I don't own the column JB+N-1, then release the dynamically allocated
676* buffer.
677*/
678 if( Bnnxt > 0 ) free( Bbuf );
679 }
680 if( WAfr ) free( WA );
681 }
682 }
683/*
684* End of PB_CScatterV
685*/
686}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* MMSHFT_T)()
Definition pblas.h:289
F_VOID_FCT(* MMADD_T)()
Definition pblas.h:288
#define CROW
Definition PBblacs.h:21
void Cblacs_gridinfo()
#define NOCONJG
Definition PBblas.h:45
#define CFORWARD
Definition PBblas.h:38
#define CTXT_
Definition PBtools.h:38
Int PB_Cfirstnb()
#define MB_
Definition PBtools.h:43
char * PB_Cmalloc()
void PB_Cinfog2l()
#define MModSub(I1, I2, d)
Definition PBtools.h:102
#define MIN(a_, b_)
Definition PBtools.h:76
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
Int PB_Cnnxtroc()
#define LLD_
Definition PBtools.h:47
Int PB_Cnumroc()
void PB_CInV()
void PB_CScatterV()
void PB_Cdescset()
#define MModAdd1(I, d)
Definition PBtools.h:100
#define INB_
Definition PBtools.h:42
#define MModSub1(I, d)
Definition PBtools.h:105
#define IMB_
Definition PBtools.h:41
Int PB_Cindxg2p()
#define Mupcase(C)
Definition PBtools.h:83
#define DLEN_
Definition PBtools.h:48
#define NB_
Definition PBtools.h:44
Int PB_Cnpreroc()
Int PB_Cspan()
#define TYPE
Definition clamov.c:7