14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * TRANSA,
char * TRANSB,
int M,
int N,
int K,
22 char * ALPHA,
char * A,
int IA,
int JA,
int * DESCA,
23 char * B,
int IB,
int JB,
int * DESCB,
char * BETA,
24 char * C,
int IC,
int JC,
int * DESCC )
27 A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC,
32 char * DIRECB, * DIRECC, * TRANSA, * TRANSB;
33 int IA, IB, IC, JA, JB, JC, K, M, N;
39 int * DESCA, * DESCB, * DESCC;
260 char Broc, GemmTa, GemmTb, TrA, TrB, * one, * talpha, * tbeta,
262 int Acol, Aii, Aimb1, Ainb1, Ajj, Ald, Am, Amb, Amp, An, Anb,
263 Anq, Arow, Bbufld, BcurrocR, Bfr, Bfwd, BiD, BiR, BiiD, BiiR,
264 BinbD, BinbR, Binb1D, Binb1R, BisR, Bkk, Bld, BmyprocD,
265 BmyprocR, BnbD, BnbR, BnpD, BnpR, BnprocsD, BnprocsR, Boff,
266 BrocD, BrocR, BsrcR, Bsrc_, Cbufld, Ccol, Ccurcol, Cfr, Cfwd,
267 Cii, Cimb, Cimb1, Cinb, Cinb1, CisR, Cjj, Ckk, Cld, Cmb, Cmp,
268 Cnb, Cnq, Coff, Crow, Csrc, WBfr, WCfr, WCsum, ctxt, lcmb,
269 maxp, maxpm1, maxq, mycol, myrow, n, nb, nbb, ncpq, nota,
270 notb, npcol, npq=0, nprow, nrpq, p=0, q=0, size, tmp;
279 char * Aptr = NULL, * Bbuf = NULL, * Cbuf = NULL, * WB = NULL,
293 gemm =
TYPE->Fgemm; gsum2d =
TYPE->Cgsum2d;
302 BinbD = DESCB[
IMB_ ]; BinbR = DESCB[
INB_];
303 BnbD = DESCB[
MB_ ]; BnbR = DESCB[
NB_ ];
304 BsrcR = DESCB[Bsrc_]; Bld = DESCB[
LLD_];
305 BmyprocD = myrow; BnprocsD = nprow;
306 BmyprocR = mycol; BnprocsR = npcol;
307 PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR,
308 &BiiD, &BiiR, &BrocD, &BrocR );
314 BinbR = DESCB[
IMB_ ]; BinbD = DESCB[
INB_];
315 BnbR = DESCB[
MB_ ]; BnbD = DESCB[
NB_ ];
316 BsrcR = DESCB[Bsrc_]; Bld = DESCB[
LLD_];
317 BmyprocD = mycol; BnprocsD = npcol;
318 BmyprocR = myrow; BnprocsR = nprow;
319 PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD,
320 &BiiR, &BiiD, &BrocR, &BrocD );
323 BnpD =
PB_Cnumroc( K, 0, Binb1D, BnbD, BmyprocD, BrocD, BnprocsD );
326 Cimb = DESCC[
IMB_ ]; Cinb = DESCC[
INB_];
327 Cmb = DESCC[
MB_ ]; Cnb = DESCC[
NB_ ];
329 PB_Cinfog2l( IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj,
332 Cmp =
PB_Cnumroc( M, 0, Cimb1, Cmb, myrow, Crow, nprow );
343 talpha = ALPHA; GemmTa =
CNOTRAN; GemmTb = ( notb ?
CTRAN : TrB );
364 PB_Cdescribe( Am, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
365 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
367 Amp =
PB_Cnumroc( Am, 0, Aimb1, Amb, myrow, Arow, nprow );
368 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
369 if( ( Amp > 0 ) && ( Anq > 0 ) ) { Aptr =
Mptr( A, Aii, Ajj, Ald, size ); }
374 if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !Bfwd )
376 tmp =
PB_Cindxg2p( N - 1, Binb1R, BnbR, BrocR, BrocR, BnprocsR );
377 q =
MModSub( tmp, BrocR, BnprocsR );
383 if( !( CisR = ( ( Ccol < 0 ) || ( npcol == 1 ) ) ) && !Cfwd )
385 tmp =
PB_Cindxg2p( N - 1, Cinb1, Cnb, Ccol, Ccol, npcol );
386 p =
MModSub( tmp, Ccol, npcol );
392 lcmb =
PB_Clcm( ( maxp = ( CisR ? 1 : npcol ) ) * Cnb,
393 ( maxq = ( BisR ? 1 : BnprocsR ) ) * BnbR );
402 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, q, BnprocsR ) );
403 Bkk =
PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, BnprocsR );
404 BnpR =
PB_Cnumroc( N, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR );
406 Ccurcol = ( CisR ? -1 :
MModAdd( Ccol, p, npcol ) );
407 Ckk =
PB_Cg2lrem( JC, Cinb, Cnb, Ccurcol, Csrc, npcol );
408 Cnq =
PB_Cnumroc( N, 0, Cinb1, Cnb, Ccurcol, Ccol, npcol );
410 PB_CVMinit( &VM, 0, Cnq, BnpR, Cinb1, Binb1R, Cnb, BnbR, p, q,
422 if( npq ) nbb = npq / ( ( npq - 1 ) / nb + 1 );
426 nbb =
MIN( nbb, npq );
438 if( ( Bfr = ( ncpq < nbb ) ) != 0 )
444 Bbufld =
MAX( 1, BnpD );
445 if( BisR || ( BmyprocR == BcurrocR ) )
449 BnpD, one,
Mptr( B, BiiD, Bkk, Bld, size ), Bld,
450 zero, Bbuf, Bbufld );
459 if( BisR || ( BmyprocR == BcurrocR ) )
460 Bbuf =
Mptr( B, BiiD, Bkk+Boff, Bld, size );
462 PB_Cdescset( DBUFB, K, nbb, Binb1D, nbb, BnbD, nbb, BrocD,
463 BcurrocR, ctxt, Bbufld );
471 if( ( Bfr = ( ncpq < nbb ) ) != 0 )
478 if( BisR || ( BmyprocR == BcurrocR ) )
482 BnpD, one,
Mptr( B, Bkk, BiiD, Bld, size ), Bld,
483 zero, Bbuf, Bbufld );
492 if( BisR || ( BmyprocR == BcurrocR ) )
493 Bbuf =
Mptr( B, Bkk+Boff, BiiD, Bld, size );
495 PB_Cdescset( DBUFB, nbb, K, nbb, Binb1D, nbb, BnbD, BcurrocR,
496 BrocD, ctxt, Bbufld );
504 PB_CInV(
TYPE,
NOCONJG,
ROW, Am, An, Ad0, nbb, Bbuf, 0, 0,
505 DBUFB, &Broc, &WB, WBd, &WBfr );
509 PB_COutV(
TYPE,
COLUMN,
INIT, Am, An, Ad0, nbb, &WC, WCd, &WCfr,
514 if( Amp > 0 && Anq > 0 )
516 &Anq, talpha, Aptr, &Ald, WB, &WBd[
LLD_], zero,
518 if( WBfr ) free( WB );
519 if( Bfr && ( BisR || ( BmyprocR == BcurrocR ) ) )
520 if( Bbuf ) free( Bbuf );
526 WCd[
CSRC_] = Ccurcol;
528 gsum2d( ctxt,
ROW, &top, Amp, nbb, WC, WCd[
LLD_], myrow,
535 if( ( Cfr = ( nrpq < nbb ) ) != 0 )
540 Cbufld =
MAX( 1, Cmp ); tbeta = zero;
541 if( CisR || ( mycol == Ccurcol ) )
549 Cbufld = Cld; tbeta = BETA;
550 if( CisR || ( mycol == Ccurcol ) )
551 Cbuf =
Mptr( C, Cii, Ckk+Coff, Cld, size );
553 PB_Cdescset( DBUFC, M, nbb, Cimb1, nbb, Cmb, nbb, Crow, Ccurcol,
558 PB_Cpaxpby(
TYPE,
NOCONJG, M, nbb, one, WC, 0, 0, WCd,
COLUMN,
559 tbeta, Cbuf, 0, 0, DBUFC,
COLUMN );
563 if( Cfr && ( CisR || ( mycol == Ccurcol ) ) )
566 BETA,
Mptr( C, Cii, Ckk, Cld, size ), Cld, one, Cbuf,
568 if( Cbuf ) free( Cbuf );
570 if( WCfr ) free( WC );
577 PB_CInV(
TYPE,
NOCONJG,
COLUMN, Am, An, Ad0, nbb, Bbuf, 0, 0,
578 DBUFB, &Broc, &WB, WBd, &WBfr );
582 PB_COutV(
TYPE,
ROW,
INIT, Am, An, Ad0, nbb, &WC, WCd, &WCfr,
587 if( Amp > 0 && Anq > 0 )
589 &Amp, talpha, WB, &WBd[
LLD_], Aptr, &Ald, zero, WC,
591 if( WBfr ) free( WB );
592 if( Bfr && ( BisR || ( BmyprocR == BcurrocR ) ) )
593 if( Bbuf ) free( Bbuf );
601 gsum2d( ctxt,
COLUMN, &top, nbb, Anq, WC, WCd[
LLD_],
608 if( ( Cfr = ( nrpq < nbb ) ) != 0 )
613 Cbufld =
MAX( 1, Cmp ); tbeta = zero;
614 if( CisR || ( mycol == Ccurcol ) )
622 Cbufld = Cld; tbeta = BETA;
623 if( CisR || ( mycol == Ccurcol ) )
624 Cbuf =
Mptr( C, Cii, Ckk+Coff, Cld, size );
626 PB_Cdescset( DBUFC, M, nbb, Cimb1, nbb, Cmb, nbb, Crow, Ccurcol,
632 one, WC, 0, 0, WCd,
ROW, tbeta, Cbuf, 0, 0, DBUFC,
637 if( Cfr && ( CisR || ( mycol == Ccurcol ) ) )
640 BETA,
Mptr( C, Cii, Ckk, Cld, size ), Cld, one, Cbuf,
642 if( Cbuf ) free( Cbuf );
644 if( WCfr ) free( WC );
656 if( ( Cfwd && ( p == maxpm1 ) ) ||
657 ( !( Cfwd ) && ( p == 0 ) ) )
662 if( TrA ==
CCOTRAN ) free( talpha );