14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * TRANSA,
char * TRANSB,
int M,
int N,
int K,
22 char * ALPHA,
char * A,
int IA,
int JA,
int * DESCA,
23 char * B,
int IB,
int JB,
int * DESCB,
char * BETA,
24 char * C,
int IC,
int JC,
int * DESCC )
27 A, IA, JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC,
32 char * DIRECA, * DIRECB, * TRANSA, * TRANSB;
33 int IA, IB, IC, JA, JB, JC, K, M, N;
39 int * DESCA, * DESCB, * DESCC;
261 char Aroc, Broc, TrA, TrB, * one, * tbeta, * zero;
262 int ABrocs, Abufld, AcurrocR, Afr, Afwd, AiD, AiR, AiiD, AiiR,
263 AinbD, AinbR, Ainb1D, Ainb1R, AisR, AkkR, Ald, AmyprocD,
264 AmyprocR, AnbD, AnbR, AnpD, AnpR, AnprocsD, AnprocsR, Aoff,
265 ArocD, ArocR, AsrcR, Bbufld, BcurrocR, Bfr, Bfwd, BiD, BiR,
266 BiiD, BiiR, BinbD, BinbR, Binb1D, Binb1R, BisR, BkkR, Bld,
267 BmyprocD, BmyprocR, BnbD, BnbR, BnpD, BnpR, BnprocsD,
268 BnprocsR, Boff, BrocD, BrocR, BsrcR, Ccol, Cii, Cimb1, Cinb1,
269 Cjj, Cld, Cmb, Cmp, Cnb, Cnq, Crow, WAfr, WAsum, WBfr, WBsum,
270 Wkbb=0, ctxt, k, kb, kbb, lcmb, maxp, maxpm1, maxq, mycol,
271 myrow, ncpq, nota, notb, npcol, npq=0, nprow, nrpq, p=0, q=0,
280 char * Abuf = NULL, * Bbuf = NULL, * Cptr = NULL, * WA = NULL,
302 AiR = JA; Aroc =
CCOLUMN; AnprocsR = npcol;
303 AinbR = DESCA[
INB_]; AnbR = DESCA[
NB_ ]; AsrcR = DESCA[
CSRC_];
307 AiR = IA; Aroc =
CROW; AnprocsR = nprow;
308 AinbR = DESCA[
IMB_]; AnbR = DESCA[
MB_ ]; AsrcR = DESCA[
RSRC_];
313 BiR = IB; Broc =
CROW; BnprocsR = nprow;
314 BinbR = DESCB[
IMB_]; BnbR = DESCB[
MB_ ]; BsrcR = DESCB[
RSRC_];
318 BiR = JB; Broc =
CCOLUMN; BnprocsR = npcol;
319 BinbR = DESCB[
INB_]; BnbR = DESCB[
NB_ ]; BsrcR = DESCB[
CSRC_];
324 PB_Cdescribe( M, N, IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj,
325 &Cld, &Cimb1, &Cinb1, &Cmb, &Cnb, &Crow, &Ccol, Cd0 );
327 Cmp =
PB_Cnumroc( M, 0, Cimb1, Cmb, myrow, Crow, nprow );
328 Cnq =
PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol );
333 if( !(
PB_Cspan( K, AiR, AinbR, AnbR, AsrcR, AnprocsR ) ) &&
334 !(
PB_Cspan( K, BiR, BinbR, BnbR, BsrcR, BnprocsR ) ) )
336 PB_CInV(
TYPE, &TrA,
COLUMN, M, N, Cd0, K, A, IA, JA, DESCA, &Aroc, &WA,
338 PB_CInV(
TYPE, &TrB,
ROW, M, N, Cd0, K, B, IB, JB, DESCB, &Broc, &WB,
340 if( ( Cmp > 0 ) && ( Cnq > 0 ) )
347 Cii, Cjj, Cld, size ), &Cld );
349 if( WAfr ) free( WA );
350 if( WBfr ) free( WB );
359 one =
TYPE->one; zero =
TYPE->zero; tbeta = BETA; gemm =
TYPE->Fgemm;
366 AiD = IA; AinbD = DESCA[
IMB_]; AnbD = DESCA[
MB_];
367 Ald = DESCA[
LLD_]; AmyprocD = myrow; AmyprocR = mycol;
369 PB_Cinfog2l( IA, JA, DESCA, AnprocsD, AnprocsR, AmyprocD, AmyprocR,
370 &AiiD, &AiiR, &ArocD, &ArocR );
374 AiD = JA; AinbD = DESCA[
INB_]; AnbD = DESCA[
NB_];
375 Ald = DESCA[
LLD_]; AmyprocD = mycol; AmyprocR = myrow;
377 PB_Cinfog2l( IA, JA, DESCA, AnprocsR, AnprocsD, AmyprocR, AmyprocD,
378 &AiiR, &AiiD, &ArocR, &ArocD );
381 AnpD =
PB_Cnumroc( M, 0, Ainb1D, AnbD, AmyprocD, ArocD, AnprocsD );
383 AisR = ( ( AsrcR < 0 ) || ( AnprocsR == 1 ) );
387 BiD = JB; BinbD = DESCB[
INB_]; BnbD = DESCB[
NB_];
388 Bld = DESCB[
LLD_]; BmyprocD = mycol; BmyprocR = myrow;
390 PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD,
391 &BiiR, &BiiD, &BrocR, &BrocD );
395 BiD = IB; BinbD = DESCB[
IMB_]; BnbD = DESCB[
MB_];
396 Bld = DESCB[
LLD_]; BmyprocD = myrow; BmyprocR = mycol;
398 PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR,
399 &BiiD, &BiiR, &BrocD, &BrocR );
402 BnpD =
PB_Cnumroc( N, 0, Binb1D, BnbD, BmyprocD, BrocD, BnprocsD );
404 BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) );
409 if( !( AisR ) && !( Afwd ) )
411 tmp =
PB_Cindxg2p( K - 1, Ainb1R, AnbR, ArocR, ArocR, AnprocsR );
412 q =
MModSub( tmp, ArocR, AnprocsR );
418 if( !( BisR ) && !( Bfwd ) )
420 tmp =
PB_Cindxg2p( K - 1, Binb1R, BnbR, BrocR, BrocR, BnprocsR );
421 p =
MModSub( tmp, BrocR, BnprocsR );
424 if( Cmp > 0 && Cnq > 0 ) Cptr =
Mptr( C, Cii, Cjj, Cld, size );
428 PB_COutV(
TYPE,
COLUMN,
NOINIT, M, N, Cd0, kb, &WA, WAd0, &WAfr, &WAsum );
429 PB_COutV(
TYPE,
ROW,
NOINIT, M, N, Cd0, kb, &WB, WBd0, &WBfr, &WBsum );
433 lcmb =
PB_Clcm( ( maxp = ( BisR ? 1 : BnprocsR ) ) * BnbR,
434 ( maxq = ( AisR ? 1 : AnprocsR ) ) * AnbR );
439 AcurrocR = ( AisR ? -1 :
MModAdd( ArocR, q, AnprocsR ) );
440 AkkR =
PB_Cg2lrem( AiR, AinbR, AnbR, AcurrocR, AsrcR, AnprocsR );
441 AnpR =
PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, AnprocsR );
443 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, p, BnprocsR ) );
444 BkkR =
PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, BnprocsR );
445 BnpR =
PB_Cnumroc( K, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR );
449 PB_CVMinit( &VM, 0, BnpR, AnpR, Binb1R, Ainb1R, BnbR, AnbR, p, q,
453 for( k = 0; k < K; k += kb )
455 kbb = K - k; kbb =
MIN( kbb, kb );
465 if( ( Bfwd && ( p == maxpm1 ) ) ||
466 ( !( Bfwd ) && ( p == 0 ) ) )
470 AcurrocR = ( AisR ? -1 :
MModAdd( ArocR, q, AnprocsR ) );
471 AkkR =
PB_Cg2lrem( AiR, AinbR, AnbR, AcurrocR, AsrcR,
473 AnpR =
PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR,
476 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, p, BnprocsR ) );
477 BkkR =
PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR,
479 BnpR =
PB_Cnumroc( K, 0, Binb1R, BnbR, BcurrocR, BrocR,
482 PB_CVMinit( &VM, 0, BnpR, AnpR, Binb1R, Ainb1R, BnbR, AnbR,
483 p, q, maxp, maxq, lcmb );
490 if( Wkbb == 0 ) { ABrocs = ( npq < kbb ? npq : kbb ); }
491 else { ABrocs = kbb - Wkbb; ABrocs =
MIN( ABrocs, npq ); }
503 if( ( Afr = ( ncpq < ABrocs ) ) != 0 )
509 Abufld =
MAX( 1, AnpD );
510 if( AisR || ( AmyprocR == AcurrocR ) )
514 ABrocs, AnpD, one,
Mptr( A, AiiD, AkkR, Ald,
515 size ), Ald, zero, Abuf, Abufld );
524 if( AisR || ( AmyprocR == AcurrocR ) )
525 Abuf =
Mptr( A, AiiD, AkkR + Aoff, Ald, size );
527 PB_Cdescset( DBUFA, M, ABrocs, Ainb1D, ABrocs, AnbD, ABrocs,
528 ArocD, AcurrocR, ctxt, Abufld );
536 if( ( Afr = ( ncpq < ABrocs ) ) != 0 )
543 if( AisR || ( AmyprocR == AcurrocR ) )
547 ABrocs, AnpD, one,
Mptr( A, AkkR, AiiD, Ald,
548 size ), Ald, zero, Abuf, Abufld );
557 if( AisR || ( AmyprocR == AcurrocR ) )
558 Abuf =
Mptr( A, AkkR + Aoff, AiiD, Ald, size );
560 PB_Cdescset( DBUFA, ABrocs, M, ABrocs, Ainb1D, ABrocs, AnbD,
561 AcurrocR, ArocD, ctxt, Abufld );
570 if( ( Bfr = ( nrpq < ABrocs ) ) != 0 )
577 if( BisR || ( BmyprocR == BcurrocR ) )
581 ABrocs, BnpD, one,
Mptr( B, BkkR, BiiD, Bld,
582 size ), Bld, zero, Bbuf, Bbufld );
591 if( BisR || ( BmyprocR == BcurrocR ) )
592 Bbuf =
Mptr( B, BkkR + Boff, BiiD, Bld, size );
594 PB_Cdescset( DBUFB, ABrocs, N, ABrocs, Binb1D, ABrocs, BnbD,
595 BcurrocR, BrocD, ctxt, Bbufld );
603 if( ( Bfr = ( nrpq < ABrocs ) ) != 0 )
609 Bbufld =
MAX( 1, BnpD );
610 if( BisR || ( BmyprocR == BcurrocR ) )
614 ABrocs, BnpD, one,
Mptr( B, BiiD, BkkR, Bld,
615 size ), Bld, zero, Bbuf, Bbufld );
624 if( BisR || ( BmyprocR == BcurrocR ) )
625 Bbuf =
Mptr( B, BiiD, BkkR + Boff, Bld, size );
627 PB_Cdescset( DBUFB, N, ABrocs, Binb1D, ABrocs, BnbD, ABrocs,
628 BrocD, BcurrocR, ctxt, Bbufld );
638 PB_CInV2(
TYPE, &TrA,
COLUMN, M, N, Cd0, ABrocs, Abuf, 0, 0,
639 DBUFA, &Aroc, WA, Wkbb, WAd0 );
640 PB_CInV2(
TYPE, &TrB,
ROW, M, N, Cd0, ABrocs, Bbuf, 0, 0,
641 DBUFB, &Broc, WB, Wkbb, WBd0 );
643 if( Afr & ( AisR || ( AmyprocR == AcurrocR ) ) )
644 if( Abuf ) free( Abuf );
645 if( Bfr & ( BisR || ( BmyprocR == BcurrocR ) ) )
646 if( Bbuf ) free( Bbuf );
658 if( Cmp > 0 && Cnq > 0 )
661 ALPHA, WA, &WAd0[
LLD_], WB, &WBd0[
LLD_], tbeta, Cptr, &Cld );
668 if( WAfr ) free( WA );
669 if( WBfr ) free( WB );