14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * X,
int IX,
int JX,
int * DESCX,
int INCX,
22 char * Y,
int IY,
int JY,
int * DESCY,
int INCY,
25 void PB_CpdotND(
TYPE, N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY,
30 int INCX, INCY, IX, IY, JX, JY, N;
223 int RRorCC, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc, XmyprocD,
224 XmyprocR, XnprocsD, XnprocsR, XprocR, Xroc, Xrow, Ycol, Yii,
225 Yinb1D, YisR, YisRow, Yjj, Yld, Ylinc, YmyprocD, YmyprocR,
226 YnbD, YnpD, YnprocsD, YnprocsR, YprocD, YprocR, Yroc, Yrow,
227 ctxt, ione=1, k, kbb, kk, kn, ktmp, mycol, mydist, myproc,
228 myrow, npcol, nprow, p, size;
232 char * Xptr = NULL, * Yptr = NULL, * buf = NULL;
244 PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj,
246 if( ( XisRow = ( INCX == DESCX[
M_] ) ) != 0 )
248 Xld = DESCX[
LLD_]; Xlinc = Xld;
249 XmyprocD = mycol; XnprocsD = npcol;
250 XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow;
251 XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) );
255 Xld = DESCX[
LLD_]; Xlinc = 1;
256 XmyprocD = myrow; XnprocsD = nprow;
257 XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol;
258 XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) );
263 PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj,
265 if( ( YisRow = ( INCY == DESCY[
M_] ) ) != 0 )
267 YnbD = DESCY[
NB_]; Yld = DESCY[
LLD_]; Ylinc = Yld;
268 YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow;
269 YprocD = Ycol; YmyprocD = mycol; YnprocsD = npcol;
274 YnbD = DESCY[
MB_]; Yld = DESCY[
LLD_]; Ylinc = 1;
275 YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol;
276 YprocD = Yrow; YmyprocD = myrow; YnprocsD = nprow;
279 YisR = ( ( YprocR == -1 ) || ( YnprocsR == 1 ) );
283 RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) );
294 if( YisR ) { YprocR = ( ( RRorCC ) ? XprocR : 0 ); }
304 if( XprocR == YprocR )
309 if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) )
312 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD,
322 if( XisRow ) { kk = Yjj; ktmp = JX + N; kn = JX + Yinb1D; }
323 else { kk = Yii; ktmp = IX + N; kn = IX + Yinb1D; }
325 if( YmyprocD == Yroc )
327 FDOT( &Yinb1D, DOT,
Mptr( X, Xii, Xjj, Xld, size ), &Xlinc,
328 Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc );
333 for( k = kn; k < ktmp; k += YnbD )
335 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
336 if( YmyprocD == Yroc )
339 FDOT( &kbb, DOT,
Mptr( X, Xii, k, Xld, size ),
340 &Xlinc,
Mptr( Y, Yii, kk, Yld, size ),
343 FDOT( &kbb, DOT,
Mptr( X, k, Xjj, Xld, size ),
344 &Xlinc,
Mptr( Y, kk, Yjj, Yld, size ),
357 TYPE->Cgsum2d( ctxt,
ROW, top, 1, 1, DOT, 1, -1, 0 );
362 TYPE->Cgsum2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, -1, 0 );
371 if( YmyprocR == YprocR )
374 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD,
383 TYPE->Cgesd2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj, Yld,
384 size ), Yld, XprocR, YmyprocD );
385 TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XprocR, XmyprocD );
390 TYPE->Cgesd2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj, Yld,
391 size ), Yld, YmyprocD, XprocR );
392 TYPE->Cgerv2d( ctxt, 1, 1, DOT, 1, XmyprocD, XprocR );
396 if( XmyprocR == XprocR )
399 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, YmyprocD, YprocD,
411 TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1, YprocR,
414 TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD, XmyprocD,
419 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
420 else { ktmp = IX + N; kn = IX + Yinb1D; }
422 if( YmyprocD == Yroc )
424 FDOT( &Yinb1D, DOT,
Mptr( X, Xii, Xjj, Xld, size ),
425 &Xlinc, buf, &ione );
430 for( k = kn; k < ktmp; k += YnbD )
432 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
433 if( YmyprocD == Yroc )
436 FDOT( &kbb, DOT,
Mptr( X, Xii, k, Xld, size ),
437 &Xlinc, buf+kk*size, &ione );
439 FDOT( &kbb, DOT,
Mptr( X, k, Xjj, Xld, size ),
440 &Xlinc, buf+kk*size, &ione );
445 if( buf ) free( buf );
455 TYPE->Cgsum2d( ctxt,
ROW, top, 1, 1, DOT, 1, -1, 0 );
457 TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, YprocR, YmyprocD );
462 TYPE->Cgsum2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, -1, 0 );
464 TYPE->Cgesd2d( ctxt, 1, 1, DOT, 1, YmyprocD, YprocR );
477 if( XmyprocR == XprocR )
478 TYPE->Cgebs2d( ctxt,
COLUMN, top, 1, 1, DOT, 1 );
480 TYPE->Cgebr2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, XprocR,
486 if( XmyprocR == XprocR )
487 TYPE->Cgebs2d( ctxt,
ROW, top, 1, 1, DOT, 1 );
489 TYPE->Cgebr2d( ctxt,
ROW, top, 1, 1, DOT, 1, XmyprocD,
499 if( ( XmyprocR == XprocR ) || ( YmyprocR == YprocR ) )
503 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
504 else { ktmp = IX + N; kn = IX + Yinb1D; }
510 for( p = 0; p < YnprocsD; p++ )
512 mydist =
MModSub( p, YprocD, YnprocsD );
513 myproc =
MModAdd( YprocD, mydist, YnprocsD );
515 if( ( XprocR == p ) && ( YprocR == Xroc ) )
521 if( ( XmyprocR == p ) && ( XmyprocD == Xroc ) )
523 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
528 kk = ( XisRow ? Yii : Yjj );
532 FDOT( &Yinb1D, DOT,
Mptr( X, Xii, Xjj, Xld, size ),
533 &Xlinc,
Mptr( Y, Yii, Yjj, Yld, size ),
539 for( k = kn; k < ktmp; k += YnbD )
541 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
545 FDOT( &kbb, DOT,
Mptr( X, Xii, k, Xld, size ),
546 &Xlinc,
Mptr( Y, kk, Yjj, Yld, size ),
549 FDOT( &kbb, DOT,
Mptr( X, k, Xjj, Xld, size ),
550 &Xlinc,
Mptr( Y, Yii, kk, Yld, size ),
564 if( ( YmyprocR == YprocR ) && ( YmyprocD == p ) )
566 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
571 TYPE->Cgesd2d( ctxt, YnpD, 1,
Mptr( Y, Yii, Yjj, Yld,
572 size ), Yld, XprocR, Xroc );
574 TYPE->Cgesd2d( ctxt, 1, YnpD,
Mptr( Y, Yii, Yjj, Yld,
575 size ), Yld, Xroc, XprocR );
579 if( ( XmyprocR == XprocR ) && ( XmyprocD == Xroc ) )
581 YnpD =
PB_Cnumroc( N, 0, Yinb1D, YnbD, p, YprocD,
592 TYPE->Cgerv2d( ctxt, YnpD, 1, buf, YnpD, p, YprocR );
594 TYPE->Cgerv2d( ctxt, 1, YnpD, buf, 1, YprocR, p );
598 FDOT( &Yinb1D, DOT,
Mptr( X, Xii, Xjj, Xld, size ),
599 &Xlinc, buf, &ione );
604 for( k = kn; k < ktmp; k += YnbD )
606 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
610 FDOT( &kbb, DOT,
Mptr( X, Xii, k, Xld, size ),
611 &Xlinc, buf+kk*size, &ione );
613 FDOT( &kbb, DOT,
Mptr( X, k, Xjj, Xld, size ),
614 &Xlinc, buf+kk*size, &ione );
619 if( buf ) free( buf );
628 if( XmyprocR == XprocR )
633 TYPE->Cgsum2d( ctxt,
ROW, top, 1, 1, DOT, 1, -1, 0 );
638 TYPE->Cgsum2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, -1, 0 );
645 if( YisR || ( YmyprocR == YprocR ) )
650 if( XmyprocR == XprocR )
651 TYPE->Cgebs2d( ctxt,
ROW, top, 1, 1, DOT, 1 );
653 TYPE->Cgebr2d( ctxt,
ROW, top, 1, 1, DOT, 1, YmyprocR,
659 if( XmyprocR == XprocR )
660 TYPE->Cgebs2d( ctxt,
COLUMN, top, 1, 1, DOT, 1 );
662 TYPE->Cgebr2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, XprocR,
675 if( YisR || ( YmyprocR == YprocR ) )
679 kk = ( YisRow ? Yjj : Yii );
681 if( XisRow ) { ktmp = JX + N; kn = JX + Yinb1D; }
682 else { ktmp = IX + N; kn = IX + Yinb1D; }
684 if( YmyprocD == Yroc )
686 FDOT( &Yinb1D, DOT,
Mptr( X, Xii, Xjj, Xld, size ), &Xlinc,
Mptr( Y,
687 Yii, Yjj, Yld, size ), &Ylinc );
692 for( k = kn; k < ktmp; k += YnbD )
694 kbb = ktmp - k; kbb =
MIN( kbb, YnbD );
695 if( YmyprocD == Yroc )
697 if( XisRow ) { Xptr =
Mptr( X, Xii, k, Xld, size ); }
698 else { Xptr =
Mptr( X, k, Xjj, Xld, size ); }
699 if( YisRow ) { Yptr =
Mptr( Y, Yii, kk, Yld, size ); }
700 else { Yptr =
Mptr( Y, kk, Yjj, Yld, size ); }
701 FDOT( &kbb, DOT, Xptr, &Xlinc, Yptr, &Ylinc );
716 TYPE->Cgsum2d( ctxt,
ROW, top, 1, 1, DOT, 1, -1, 0 );
721 TYPE->Cgsum2d( ctxt,
COLUMN, top, 1, 1, DOT, 1, -1, 0 );
730 TYPE->Cgsum2d( ctxt,
ALL, top, 1, 1, DOT, 1, -1, 0 );