14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * A,
int IA,
int JA,
int * DESCA,
char * AROC,
22 char * * B,
int * DESCB,
int * BFREE )
24 void PB_CGatherV(
TYPE, ALLOC, DIRECA, M, N, A, IA, JA, DESCA, AROC, B,
29 char * ALLOC, * AROC, * DIRECA;
30 int * BFREE, IA, JA, M, N;
189 int Afwd, AggRow, AiiD, AiiR, Ainb1D, Ainb1R, Ald, AmyprocD,
190 AmyprocR, AnR, AnbD, AnbR, AnnxtL, AnnxtR, AnpD, AnpR, AnpreR,
191 AnprocsR, ArocR, AsrcD, AsrcR, Bld, Bsrc_, ctxt, k, kb, kblks,
192 kn, ktmp, mycol, mydist, mydistnb, myrow, nlen, npcol, nprow,
193 offset, size, srcdist;
199 char * Aptr = NULL, * Bptr = NULL;
212 if( ( M <= 0 ) || ( N <= 0 ) )
223 if( ( AggRow = (
Mupcase( AROC[0] ) ==
CROW ) ) != 0 )
228 AnbR = DESCA[
MB_]; AnbD = DESCA[
NB_]; Ald = DESCA[
LLD_];
229 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &AiiR, &AiiD,
232 AnpD =
PB_Cnumroc( N, 0, Ainb1D, AnbD, mycol, AsrcD, npcol );
237 if( !(
PB_Cspan( M, IA, DESCA[
IMB_], AnbR, AsrcR, nprow ) ) )
244 if( ( ( myrow == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
250 *B =
Mptr( A, AiiR, AiiD, Ald,
TYPE->size );
259 if( ( ( myrow == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
271 TYPE->Fmmadd( &M, &AnpD,
TYPE->one,
Mptr( A, AiiR, AiiD, Ald,
272 size ), &Ald,
TYPE->zero, *B, &Bld );
280 PB_Cdescset( DESCB, M, N, M, Ainb1D, AnbR, AnbD, AsrcR, AsrcD, ctxt,
285 AnR = M; Bsrc_ =
RSRC_;
286 AmyprocR = myrow; AmyprocD = mycol; AnprocsR = nprow;
288 AnpR =
PB_Cnumroc( M, 0, Ainb1R, AnbR, myrow, AsrcR, nprow );
295 AnbD = DESCA[
MB_ ]; AnbR = DESCA[
NB_ ]; Ald = DESCA[
LLD_];
296 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &AiiD, &AiiR,
299 AnpD =
PB_Cnumroc( M, 0, Ainb1D, AnbD, myrow, AsrcD, nprow );
304 if( !(
PB_Cspan( N, JA, DESCA[
INB_], AnbR, AsrcR, npcol ) ) )
312 if( ( ( mycol == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
316 *B =
Mptr( A, AiiD, AiiR, Ald,
TYPE->size );
323 Bld =
MAX( 1, AnpD );
324 if( ( ( mycol == AsrcR ) || ( AsrcR < 0 ) ) && ( AnpD > 0 ) )
335 TYPE->Fmmadd( &AnpD, &N,
TYPE->one,
Mptr( A, AiiD, AiiR, Ald,
336 size ), &Ald,
TYPE->zero, *B, &Bld );
343 PB_Cdescset( DESCB, M, N, Ainb1D, N, AnbD, AnbR, AsrcD, AsrcR, ctxt,
348 AnR = N; Bsrc_ =
CSRC_;
349 AmyprocR = mycol; AmyprocD = myrow; AnprocsR = npcol;
351 AnpR =
PB_Cnumroc( N, 0, Ainb1R, AnbR, mycol, AsrcR, npcol );
361 if( ( AnpD > 0 ) && ( AnpR > 0 ) )
366 AnpreR =
PB_Cnpreroc( AnR, 0, Ainb1R, AnbR, AmyprocR, AsrcR,
377 TYPE->Cgesd2d( ctxt, AnpR, AnpD,
Mptr( A, AiiR, AiiD, Ald,
383 TYPE->Cgesd2d( ctxt, AnpD, AnpR,
Mptr( A, AiiD, AiiR,
384 Ald,
TYPE->size ), Ald, AmyprocD,
388 else if( AnpreR > 0 )
396 *B = Bptr =
PB_Cmalloc( ( AnpreR + AnpR ) * AnpD * size );
398 mydistnb =
MModSub( AmyprocR, AsrcR, AnprocsR ) * AnbR;
399 kblks = ( ( ( ktmp = AnR - Ainb1R - 1 ) >= 0 ) ?
400 ( ( ktmp / AnbR ) + 1 ) / AnprocsR : 0 );
401 offset = kblks * AnbR;
402 kn = Ainb1R + mydistnb - AnbR;
403 kn =
MIN( kn, AnpreR ) +
404 (
MAX( 1, kblks ) - 1 ) * mydistnb;
408 Aptr =
Mptr( A, AiiR, AiiD, Ald, size );
413 TYPE->Cgerv2d( ctxt, AnpreR, AnpD, *B, Bld,
MModSub1( AmyprocR,
414 AnprocsR ), AmyprocD );
419 if( ( ( AnpR - 1 ) / AnbR ) == kblks )
422 add( &kb, &AnpD, one,
Mptr( Aptr, offset, 0, Ald, size ),
423 &Ald, zero,
Mptr( Bptr, nlen+offset, 0, Bld, size ),
427 for( k = kblks; k >= 1; k-- )
430 shft( &kb, &AnpD, &offset,
Mptr( Bptr, kn, 0, Bld, size ),
433 add( &AnbR, &AnpD, one,
Mptr( Aptr, offset, 0, Ald, size ),
434 &Ald, zero,
Mptr( Bptr, kn+offset, 0, Bld, size ),
440 if( AnpreR + AnpR != AnR )
447 TYPE->Cgesd2d( ctxt, AnpreR+AnpR, AnpD, *B, Bld,
448 MModAdd1( AmyprocR, AnprocsR ), AmyprocD );
455 Aptr =
Mptr( A, AiiD, AiiR, Ald, size );
456 Bld =
MAX( 1, AnpD );
460 TYPE->Cgerv2d( ctxt, AnpD, AnpreR, *B, Bld, AmyprocD,
466 if( ( ( AnpR - 1 ) / AnbR ) == kblks )
469 add( &AnpD, &kb, one,
Mptr( Aptr, 0, offset, Ald, size ),
470 &Ald, zero,
Mptr( Bptr, 0, nlen+offset, Bld, size ),
474 for( k = kblks; k >= 1; k-- )
477 shft( &AnpD, &kb, &offset,
Mptr( Bptr, 0, kn, Bld, size ),
480 add( &AnpD, &AnbR, one,
Mptr( Aptr, 0, offset, Ald, size ),
481 &Ald, zero,
Mptr( Bptr, 0, kn + offset, Bld, size ),
487 if( AnpreR + AnpR != AnR )
494 TYPE->Cgesd2d( ctxt, AnpD, AnpreR+AnpR, *B, Bld, AmyprocD,
508 ArocR =
PB_Cindxg2p( AnR-1, Ainb1R, AnbR, AsrcR, AsrcR, AnprocsR );
510 if( ( AnpD > 0 ) && ( AnpR > 0 ) )
515 AnnxtR =
PB_Cnnxtroc( AnR, 0, Ainb1R, AnbR, AmyprocR, AsrcR,
517 AnnxtL =
PB_Cnnxtroc( AnR, 0, Ainb1R, AnbR, ArocR, AsrcR,
520 if( ( AnnxtR =
MModSub( AnnxtR, AnnxtL, AnR ) ) == 0 )
528 TYPE->Cgesd2d( ctxt, AnpR, AnpD,
Mptr( A, AiiR, AiiD, Ald,
534 TYPE->Cgesd2d( ctxt, AnpD, AnpR,
Mptr( A, AiiD, AiiR, Ald,
539 else if( AnnxtR > 0 )
547 *B = Bptr =
PB_Cmalloc( ( AnnxtR + AnpR ) * AnpD * size );
548 kblks = ( ( ( ktmp = AnR - Ainb1R - 1 ) >= 0 ) ?
549 ( ( ktmp / AnbR ) + 1 ) / AnprocsR : 0 );
550 mydist =
MModSub( ArocR, AmyprocR, AnprocsR );
551 mydistnb = mydist * AnbR;
552 srcdist =
MModSub( ArocR, AsrcR, AnprocsR );
557 Aptr =
Mptr( A, AiiR, AiiD, Ald, size );
562 TYPE->Cgerv2d( ctxt, AnnxtR, AnpD,
Mptr( *B, AnpR, 0, Bld,
563 size ), Bld,
MModAdd1( AmyprocR, AnprocsR ),
569 if( mydist > srcdist )
572 kb = Ainb1R + srcdist*AnbR;
574 else if( mydist == srcdist )
576 add( &Ainb1R, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld );
577 Aptr =
Mptr( Aptr, Ainb1R, 0, Ald, size );
578 Bptr =
Mptr( Bptr, Ainb1R, 0, Ald, size );
579 offset = Ainb1R - AnpR;
584 add( &AnbR, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld );
585 Aptr =
Mptr( Aptr, AnbR, 0, Ald, size );
586 Bptr =
Mptr( Bptr, AnbR, 0, Ald, size );
587 offset = AnbR - AnpR;
591 for( k = kblks; k >= 1; k-- )
593 shft( &kb, &AnpD, &offset, Bptr, &Bld );
594 Bptr =
Mptr( Bptr, kb, 0, Bld, size );
595 add( &AnbR, &AnpD, one, Aptr, &Ald, zero, Bptr, &Bld );
596 Aptr =
Mptr( Aptr, AnbR, 0, Ald, size );
597 Bptr =
Mptr( Bptr, AnbR, 0, Ald, size );
602 if( AnnxtR + AnpR != AnR )
609 TYPE->Cgesd2d( ctxt, AnnxtR+AnpR, AnpD, *B, Bld,
610 MModSub1( AmyprocR, AnprocsR ), AmyprocD );
617 Aptr =
Mptr( A, AiiD, AiiR, Ald, size );
618 Bld =
MAX( 1, AnpD );
622 TYPE->Cgerv2d( ctxt, AnpD, AnnxtR,
Mptr( *B, 0, AnpR, Bld,
623 size ), Bld, AmyprocD,
MModAdd1( AmyprocR,
629 if( mydist > srcdist )
632 kb = Ainb1R + srcdist*AnbR;
634 else if( mydist == srcdist )
636 add( &AnpD, &Ainb1R, one, Aptr, &Ald, zero, Bptr, &Bld );
637 Aptr =
Mptr( Aptr, 0, Ainb1R, Ald, size );
638 Bptr =
Mptr( Bptr, 0, Ainb1R, Bld, size );
639 offset = Ainb1R - AnpR;
644 add( &AnpD, &AnbR, one, Aptr, &Ald, zero, Bptr, &Bld );
645 Aptr =
Mptr( Aptr, 0, AnbR, Ald, size );
646 Bptr =
Mptr( Bptr, 0, AnbR, Bld, size );
647 offset = AnbR - AnpR;
651 for( k = kblks; k >= 1; k-- )
653 shft( &AnpD, &kb, &offset, Bptr, &Bld );
654 Bptr =
Mptr( Bptr, 0, kb, Bld, size );
655 add( &AnpD, &AnbR, one, Aptr, &Ald, zero, Bptr, &Bld );
656 Aptr =
Mptr( Aptr, 0, AnbR, Ald, size );
657 Bptr =
Mptr( Bptr, 0, AnbR, Bld, size );
662 if( AnnxtR + AnpR != AnR )
669 TYPE->Cgesd2d( ctxt, AnpD, AnnxtR+AnpR, *B, Bld, AmyprocD,
682 PB_Cdescset( DESCB, M, N, M, Ainb1D, AnbR, AnbD, AsrcR, AsrcD, ctxt, M );
686 PB_Cdescset( DESCB, M, N, Ainb1D, N, AnbD, AnbR, AsrcD, AsrcR, ctxt,
695 if( AnR + AnbR > Ainb1R + ( AnprocsR - 1 ) * AnbR )
702 DESCB[Bsrc_] =
MModSub1( AsrcR, AnprocsR );
710 DESCB[Bsrc_] =
PB_Cindxg2p( AnR-1, Ainb1R, AnbR, AsrcR, AsrcR,
713 if( ( AnpD > 0 ) && ( AnpR > 0 ) && ( AmyprocR == DESCB[Bsrc_] ) )
718 if( AnR + AnbR > Ainb1R + ( AnprocsR - 1 ) * AnbR )
725 DESCB[Bsrc_] =
MModAdd1( ArocR, AnprocsR );
733 DESCB[Bsrc_] = AsrcR;
735 if( ( AnpD > 0 ) && ( AnpR > 0 ) && ( AmyprocR == DESCB[Bsrc_] ) )