14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * TRANSA,
char * TRANSB,
int M,
int N,
int K,
22 char * ALPHA,
char * A,
int IA,
int JA,
int * DESCA,
23 char * B,
int IB,
int JB,
int * DESCB,
char * BETA,
24 char * C,
int IC,
int JC,
int * DESCC )
27 A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC,
32 char * DIRECA, * DIRECC, * TRANSA, * TRANSB;
33 int IA, IB, IC, JA, JB, JC, K, M, N;
39 int * DESCA, * DESCB, * DESCC;
260 char Aroc, GemmTa, GemmTb, TrA, TrB, * one, * talpha, * tbeta,
262 int Abufld, AcurrocR, Afr, Afwd, AiD, AiR, AiiD, AiiR, AinbD,
263 AinbR, Ainb1D, Ainb1R, AisR, Akk, Ald, AmyprocD, AmyprocR,
264 AnbD, AnbR, AnpD, AnpR, AnprocsD, AnprocsR, Aoff, ArocD,
265 ArocR, AsrcR, Asrc_, Bcol, Bii, Bimb1, Binb1, Bjj, Bld, Bm,
266 Bmb, Bmp, Bn, Bnb, Bnq, Brow, Cbufld, Ccol, Ccurrow, Cfr,
267 Cfwd, Cii, Cimb, Cimb1, Cinb, Cinb1, CisR, Cjj, Ckk, Cld,
268 Cmb, Cmp, Cnb, Cnq, Coff, Crow, Csrc, WAfr, WCfr, WCsum,
269 ctxt, lcmb, m, maxp, maxpm1, maxq, mb, mbb, mycol, myrow,
270 ncpq, nota, notb, npcol, npq=0, nprow, nrpq, p=0, q=0, size,
280 char * Abuf = NULL, * Bptr = NULL, * Cbuf = NULL, * WA = NULL,
294 gemm =
TYPE->Fgemm; gsum2d =
TYPE->Cgsum2d;
303 AinbR = DESCA[
IMB_ ]; AinbD = DESCA[
INB_];
304 AnbR = DESCA[
MB_ ]; AnbD = DESCA[
NB_ ];
305 AsrcR = DESCA[Asrc_]; Ald = DESCA[
LLD_];
306 AmyprocD = mycol; AnprocsD = npcol;
307 AmyprocR = myrow; AnprocsR = nprow;
308 PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD,
309 &AiiR, &AiiD, &ArocR, &ArocD );
315 AinbD = DESCA[
IMB_ ]; AinbR = DESCA[
INB_];
316 AnbD = DESCA[
MB_ ]; AnbR = DESCA[
NB_ ];
317 AsrcR = DESCA[Asrc_]; Ald = DESCA[
LLD_];
318 AmyprocD = myrow; AnprocsD = nprow;
319 AmyprocR = mycol; AnprocsR = npcol;
320 PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR,
321 &AiiD, &AiiR, &ArocD, &ArocR );
324 AnpD =
PB_Cnumroc( K, 0, Ainb1D, AnbD, AmyprocD, ArocD, AnprocsD );
327 Cimb = DESCC[
IMB_ ]; Cinb = DESCC[
INB_];
328 Cmb = DESCC[
MB_ ]; Cnb = DESCC[
NB_ ];
330 PB_Cinfog2l( IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj,
334 Cnq =
PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol );
344 talpha = ALPHA; GemmTa = ( nota ?
CTRAN : TrA ); GemmTb =
CNOTRAN;
365 PB_Cdescribe( Bm, Bn, IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
366 &Bld, &Bimb1, &Binb1, &Bmb, &Bnb, &Brow, &Bcol, Bd0 );
368 Bmp =
PB_Cnumroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
369 Bnq =
PB_Cnumroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
370 if( ( Bmp > 0 ) && ( Bnq > 0 ) ) Bptr =
Mptr( B, Bii, Bjj, Bld, size );
375 if( !( AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) ) ) && !Afwd )
377 tmp =
PB_Cindxg2p( M - 1, Ainb1R, AnbR, ArocR, ArocR, AnprocsR );
378 q =
MModSub( tmp, ArocR, AnprocsR );
384 if( !( CisR = ( ( Crow < 0 ) || ( nprow == 1 ) ) ) && !Cfwd )
386 tmp =
PB_Cindxg2p( M - 1, Cimb1, Cmb, Crow, Crow, nprow );
387 p =
MModSub( tmp, Crow, nprow );
393 lcmb =
PB_Clcm( ( maxp = ( CisR ? 1 : nprow ) ) * Cmb,
394 ( maxq = ( AisR ? 1 : AnprocsR ) ) * AnbR );
403 AcurrocR = ( AisR ? -1 :
MModAdd( ArocR, q, AnprocsR ) );
404 Akk =
PB_Cg2lrem( AiR, AinbR, AnbR, AcurrocR, AsrcR, AnprocsR );
405 AnpR =
PB_Cnumroc( M, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR );
407 Ccurrow = ( CisR ? -1 :
MModAdd( Crow, p, nprow ) );
408 Ckk =
PB_Cg2lrem( IC, Cimb, Cmb, Ccurrow, Csrc, nprow );
409 Cmp =
PB_Cnumroc( M, 0, Cimb1, Cmb, Ccurrow, Crow, nprow );
411 PB_CVMinit( &VM, 0, Cmp, AnpR, Cimb1, Ainb1R, Cmb, AnbR, p, q,
423 if( npq ) mbb = npq / ( ( npq - 1 ) / mb + 1 );
427 mbb =
MIN( mbb, npq );
439 if( ( Afr = ( ncpq < mbb ) ) != 0 )
446 if( AisR || ( AmyprocR == AcurrocR ) )
450 AnpD, one,
Mptr( A, Akk, AiiD, Ald, size ), Ald,
451 zero, Abuf, Abufld );
460 if( AisR || ( AmyprocR == AcurrocR ) )
461 Abuf =
Mptr( A, Akk+Aoff, AiiD, Ald, size );
463 PB_Cdescset( DBUFA, mbb, K, mbb, Ainb1D, mbb, AnbD, AcurrocR,
464 ArocD, ctxt, Abufld );
472 if( ( Afr = ( ncpq < mbb ) ) != 0 )
478 Abufld =
MAX( 1, AnpD );
479 if( AisR || ( AmyprocR == AcurrocR ) )
483 AnpD, one,
Mptr( A, AiiD, Akk, Ald, size ), Ald,
484 zero, Abuf, Abufld );
493 if( AisR || ( AmyprocR == AcurrocR ) )
494 Abuf =
Mptr( A, AiiD, Akk+Aoff, Ald, size );
496 PB_Cdescset( DBUFA, K, mbb, Ainb1D, mbb, AnbD, mbb, ArocD,
497 AcurrocR, ctxt, Abufld );
505 PB_CInV(
TYPE,
NOCONJG,
COLUMN, Bm, Bn, Bd0, mbb, Abuf, 0, 0,
506 DBUFA, &Aroc, &WA, WAd, &WAfr );
510 PB_COutV(
TYPE,
ROW,
INIT, Bm, Bn, Bd0, mbb, &WC, WCd, &WCfr,
515 if( Bmp > 0 && Bnq > 0 )
517 talpha, WA, &WAd[
LLD_], Bptr, &Bld, zero, WC, &WCd[
LLD_] );
518 if( WAfr ) free( WA );
519 if( Afr && ( AisR || ( AmyprocR == AcurrocR ) ) )
520 if( Abuf ) free( Abuf );
526 WCd[
RSRC_] = Ccurrow;
528 gsum2d( ctxt,
COLUMN, &top, mbb, Bnq, WC, WCd[
LLD_],
535 if( ( Cfr = ( nrpq < mbb ) ) != 0 )
540 Cbufld = mbb; tbeta = zero;
541 if( CisR || ( myrow == Ccurrow ) )
549 Cbufld = Cld; tbeta = BETA;
550 if( CisR || ( myrow == Ccurrow ) )
551 Cbuf =
Mptr( C, Ckk+Coff, Cjj, Cld, size );
553 PB_Cdescset( DBUFC, mbb, N, mbb, Cinb1, mbb, Cnb, Ccurrow, Ccol,
558 PB_Cpaxpby(
TYPE,
NOCONJG, mbb, N, one, WC, 0, 0, WCd,
ROW, tbeta,
559 Cbuf, 0, 0, DBUFC,
ROW );
563 if( Cfr && ( CisR || ( myrow == Ccurrow ) ) )
566 BETA,
Mptr( C, Ckk, Cjj, Cld, size ), Cld, one, Cbuf,
568 if( Cbuf ) free( Cbuf );
570 if( WCfr ) free( WC );
577 PB_CInV(
TYPE,
NOCONJG,
ROW, Bm, Bn, Bd0, mbb, Abuf, 0, 0,
578 DBUFA, &Aroc, &WA, WAd, &WAfr );
582 PB_COutV(
TYPE,
COLUMN,
INIT, Bm, Bn, Bd0, mbb, &WC, WCd, &WCfr,
587 if( Bmp > 0 && Bnq > 0 )
589 talpha, Bptr, &Bld, WA, &WAd[
LLD_], zero, WC, &WCd[
LLD_] );
590 if( WAfr ) free( WA );
591 if( Afr && ( AisR || ( AmyprocR == AcurrocR ) ) )
592 if( Abuf ) free( Abuf );
600 gsum2d( ctxt,
ROW, &top, Bmp, mbb, WC, WCd[
LLD_], myrow,
607 if( ( Cfr = ( nrpq < mbb ) ) != 0 )
612 Cbufld = mbb; tbeta = zero;
613 if( CisR || ( myrow == Ccurrow ) )
621 Cbufld = Cld; tbeta = BETA;
622 if( CisR || ( myrow == Ccurrow ) )
623 Cbuf =
Mptr( C, Ckk+Coff, Cjj, Cld, size );
625 PB_Cdescset( DBUFC, mbb, N, mbb, Cinb1, mbb, Cnb, Ccurrow, Ccol,
631 one, WC, 0, 0, WCd,
COLUMN, tbeta, Cbuf, 0, 0, DBUFC,
636 if( Cfr && ( CisR || ( myrow == Ccurrow ) ) )
639 BETA,
Mptr( C, Ckk, Cjj, Cld, size ), Cld, one, Cbuf,
641 if( Cbuf ) free( Cbuf );
643 if( WCfr ) free( WC );
655 if( ( Cfwd && ( p == maxpm1 ) ) ||
656 ( !( Cfwd ) && ( p == 0 ) ) )
661 if( TrB ==
CCOTRAN ) free( talpha );