14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * X,
int IX,
int JX,
int * DESCX,
int INCX,
22 char * Y,
int IY,
int JY,
int * DESCY,
int INCY )
24 void PB_CpswapND(
TYPE, N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY )
28 int INCX, INCY, IX, IY, JX, JY, N;
186 char scope, * top, * zero;
187 int RRorCC, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc, Xm,
188 XmyprocD, XmyprocR, Xn, XnprocsD, XnprocsR, XprocR, Xroc,
189 Xrow, Ycol, Yii, Yinb1D, YisR, YisRow, Yjj, Yld, Ylinc,
190 YmyprocD, YmyprocR, YnbD, YnpD, YnprocsD, YnprocsR, YprocD,
191 YprocR, Yroc, Yrow, ctxt, ione=1, k, kbb, kk, kn, ktmp, mycol,
192 mydist, myproc, myrow, npcol, nprow, p, size;
208 PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj,
210 if( ( XisRow = ( INCX == DESCX[
M_] ) ) != 0 )
212 Xld = DESCX[
LLD_]; Xlinc = Xld;
213 XmyprocD = mycol; XnprocsD = npcol;
214 XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow;
215 XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) );
219 Xld = DESCX[
LLD_]; Xlinc = 1;
220 XmyprocD = myrow; XnprocsD = nprow;
221 XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol;
222 XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) );
227 PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj,
229 if( ( YisRow = ( INCY == DESCY[
M_] ) ) != 0 )
231 YnbD = DESCY[
NB_]; Yld = DESCY[
LLD_]; Ylinc = Yld;
232 YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow;
233 YprocD = Ycol; YmyprocD = mycol; YnprocsD = npcol;
238 YnbD = DESCY[
MB_]; Yld = DESCY[
LLD_]; Ylinc = 1;
239 YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol;
240 YprocD = Yrow; YmyprocD = myrow; YnprocsD = nprow;
244 YisR = ( ( YprocR == -1 ) || ( YnprocsR == 1 ) );
248 RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) );
252 size =
TYPE->size; zero =
TYPE->zero;
261 if( YisR ) { YprocR = ( ( RRorCC ) ? XprocR : 0 ); }
266 if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) )
273 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD );
277 if( XprocR == YprocR )
287 if( XisRow ) { kk = Yjj; ktmp = JX + N; kn = JX + Yinb1D; }
288 else { kk = Yii; ktmp = IX + N; kn = IX + Yinb1D; }
290 if( YmyprocD == Yroc )
292 TYPE->Fswap( &Yinb1D,
Mptr( X, Xii, Xjj, Xld, size ),
293 &Xlinc,
Mptr( Y, Yii, Yjj, Yld, size ),
299 TYPE->Fset( &Yinb1D, zero,
Mptr( X, Xii, Xjj, Xld, size ),
304 for( k = kn; k < ktmp; k += YnbD )
306 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
307 if( YmyprocD == Yroc )
310 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld, size ),
311 &Xlinc,
Mptr( Y, Yii, kk, Yld, size ),
314 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld, size ),
315 &Xlinc,
Mptr( Y, kk, Yjj, Yld, size ),
322 TYPE->Fset( &kbb, zero,
Mptr( X, Xii, k, Xld, size ),
325 TYPE->Fset( &kbb, zero,
Mptr( X, k, Xjj, Xld, size ),
337 TYPE->Fset( &N, zero,
Mptr( X, Xii, Xjj, Xld, size ),
346 TYPE->Cgsum2d( ctxt,
ROW, top, 1, N,
Mptr( X, Xii, Xjj, Xld,
347 size ), Xld, -1, 0 );
353 Xld, size ), Xld, -1, 0 );
361 if( YmyprocR == YprocR )
371 TYPE->Cgesd2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj, Yld,
372 size ), Yld, XprocR, YmyprocD );
373 TYPE->Cgerv2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj, Yld,
374 size ), Yld, XprocR, YmyprocD );
378 TYPE->Cgesd2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj, Yld,
379 size ), Yld, YmyprocD, XprocR );
380 TYPE->Cgerv2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj, Yld,
381 size ), Yld, YmyprocD, XprocR );
386 if( XmyprocR == XprocR )
398 TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1, YprocR,
401 TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD, XmyprocD,
406 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
407 else { ktmp = IX + N; kn = IX + Yinb1D; }
409 if( YmyprocD == Yroc )
411 TYPE->Fswap( &Yinb1D,
Mptr( X, Xii, Xjj, Xld, size ),
412 &Xlinc, buf, &ione );
417 TYPE->Fset( &Yinb1D, zero,
Mptr( X, Xii, Xjj, Xld,
422 for( k = kn; k < ktmp; k += YnbD )
424 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
426 if( YmyprocD == Yroc )
429 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld, size ),
430 &Xlinc, buf+kk*size, &ione );
432 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld, size ),
433 &Xlinc, buf+kk*size, &ione );
439 TYPE->Fset( &kbb, zero,
Mptr( X, Xii, k, Xld,
442 TYPE->Fset( &kbb, zero,
Mptr( X, k, Xjj, Xld,
448 TYPE->Cgesd2d( ctxt, 1, YnpD, buf, 1, YprocR,
451 TYPE->Cgesd2d( ctxt, YnpD, 1, buf, YnpD, XmyprocD,
453 if( buf ) free( buf );
457 TYPE->Fset( &N, zero,
Mptr( X, Xii, Xjj, Xld, size ),
466 TYPE->Cgsum2d( ctxt,
ROW, top, 1, N,
Mptr( X, Xii, Xjj,
467 Xld, size ), Xld, -1, 0 );
473 Xld, size ), Xld, -1, 0 );
484 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
485 else { ktmp = IX + N; kn = IX + Yinb1D; }
491 for( p = 0; p < YnprocsD; p++ )
493 mydist =
MModSub( p, YprocD, YnprocsD );
494 myproc =
MModAdd( YprocD, mydist, YnprocsD );
496 if( ( XprocR == p ) && ( YprocR == Xroc ) )
503 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
508 kk = ( XisRow ? Yii : Yjj );
512 if( XmyprocD == Xroc )
514 TYPE->Fswap( &Yinb1D,
Mptr( X, Xii, Xjj, Xld,
515 size ), &Xlinc,
Mptr( Y, Yii, Yjj,
516 Yld, size ), &Ylinc );
521 TYPE->Fset( &Yinb1D, zero,
Mptr( X, Xii, Xjj, Xld,
527 for( k = kn; k < ktmp; k += YnbD )
529 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
532 if( XmyprocD == Xroc )
535 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld,
536 size ), &Xlinc,
Mptr( Y, kk,
537 Yjj, Yld, size ), &Ylinc );
539 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld,
540 size ), &Xlinc,
Mptr( Y, Yii,
541 kk, Yld, size ), &Ylinc );
547 TYPE->Fset( &kbb, zero,
Mptr( X, Xii, k,
548 Xld, size ), &Xlinc );
550 TYPE->Fset( &kbb, zero,
Mptr( X, k, Xjj,
551 Xld, size ), &Xlinc );
564 if( ( YmyprocR == YprocR ) && ( YmyprocD == p ) )
566 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
572 TYPE->Cgesd2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj,
573 Yld, size ), Yld, XprocR, Xroc );
574 TYPE->Cgerv2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj,
575 Yld, size ), Yld, XprocR, Xroc );
579 TYPE->Cgesd2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj,
580 Yld, size ), Yld, Xroc, XprocR );
581 TYPE->Cgerv2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj,
582 Yld, size ), Yld, Xroc, XprocR );
587 if( XmyprocR == XprocR )
589 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
598 if( XmyprocD == Xroc )
602 TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD,
605 TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1,
611 if( XmyprocD == Xroc )
613 TYPE->Fswap( &Yinb1D,
Mptr( X, Xii, Xjj, Xld,
614 size ), &Xlinc, buf, &ione );
619 TYPE->Fset( &Yinb1D, zero,
Mptr( X, Xii, Xjj,
620 Xld, size ), &Xlinc );
625 for( k = kn; k < ktmp; k += YnbD )
627 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
630 if( XmyprocD == Xroc )
633 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld,
634 size ), &Xlinc, buf+kk*size,
637 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld,
638 size ), &Xlinc, buf+kk*size,
645 TYPE->Fset( &kbb, zero,
Mptr( X, Xii, k,
646 Xld, size ), &Xlinc );
648 TYPE->Fset( &kbb, zero,
Mptr( X, k, Xjj,
649 Xld, size ), &Xlinc );
655 if( XmyprocD == Xroc )
658 TYPE->Cgesd2d( ctxt, YnpD, 1, buf, YnpD,
661 TYPE->Cgesd2d( ctxt, 1, YnpD, buf, 1,
663 if( buf ) free( buf );
673 if( XmyprocR == XprocR )
678 TYPE->Cgsum2d( ctxt,
ROW, top, 1, N,
Mptr( X, Xii, Xjj,
679 Xld, size ), Xld, -1, 0 );
685 Xld, size ), Xld, -1, 0 );
696 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD );
702 if( YmyprocR == YprocR )
707 Yld, size ), Yld, YprocR, YmyprocD );
712 if( YmyprocR == YprocR )
713 TYPE->Cgebs2d( ctxt,
ROW, top, YnpD, 1,
Mptr( Y, Yii, Yjj,
716 TYPE->Cgebr2d( ctxt,
ROW, top, YnpD, 1,
Mptr( Y, Yii, Yjj,
717 Yld, size ), Yld, YmyprocD, YprocR );
728 if( YisR || ( YmyprocR == YprocR ) )
730 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD, YnprocsD );
735 kk = ( YisRow ? Yjj : Yii );
737 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
738 else { ktmp = IX + N; kn = IX + Yinb1D; }
740 if( YmyprocD == Yroc )
742 TYPE->Fswap( &Yinb1D,
Mptr( X, Xii, Xjj, Xld, size ), &Xlinc,
743 Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc );
748 TYPE->Fset( &Yinb1D, zero,
Mptr( X, Xii, Xjj, Xld, size ),
753 for( k = kn; k < ktmp; k += YnbD )
755 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
756 if( YmyprocD == Yroc )
761 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld, size ), &Xlinc,
762 Mptr( Y, Yii, kk, Yld, size ), &Ylinc );
764 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld, size ), &Xlinc,
765 Mptr( Y, Yii, kk, Yld, size ), &Ylinc );
770 TYPE->Fswap( &kbb,
Mptr( X, Xii, k, Xld, size ), &Xlinc,
771 Mptr( Y, kk, Yjj, Yld, size ), &Ylinc );
773 TYPE->Fswap( &kbb,
Mptr( X, k, Xjj, Xld, size ), &Xlinc,
774 Mptr( Y, kk, Yjj, Yld, size ), &Ylinc );
781 TYPE->Fset( &kbb, zero,
Mptr( X, Xii, k, Xld, size ),
784 TYPE->Fset( &kbb, zero,
Mptr( X, k, Xjj, Xld, size ),
795 TYPE->Fset( &N, zero,
Mptr( X, Xii, Xjj, Xld, size ), &Xlinc );
804 TYPE->Cgsum2d( ctxt, &scope, top, 1, N,
Mptr( X, Xii, Xjj, Xld,
805 size ), Xld, -1, 0 );
807 TYPE->Cgsum2d( ctxt, &scope, top, N, 1,
Mptr( X, Xii, Xjj, Xld,
808 size ), Xld, -1, 0 );
818 if( XisRow ) { Xm = 1; Xn = N; }
819 else { Xm = N; Xn = 1; }
824 if( YmyprocR == YprocR )
829 size ), Xld, YprocR, YmyprocD );
834 if( YmyprocR == YprocR )
835 TYPE->Cgebs2d( ctxt,
ROW, top, Xm, Xn,
Mptr( X, Xii, Xjj, Xld,
838 TYPE->Cgebr2d( ctxt,
ROW, top, Xm, Xn,
Mptr( X, Xii, Xjj, Xld,
839 size ), Xld, YmyprocD, YprocR );