14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * UPLO,
char * TRANSA,
char * DIAG,
int M,
int N,
22 char * ALPHA,
char * A,
int IA,
int JA,
int * DESCA,
23 char * B,
int IB,
int JB,
int * DESCB )
25 void PB_CptrmmB(
TYPE, DIRECB, SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A,
26 IA, JA, DESCA, B, IB, JB, DESCB )
30 char * DIAG, * DIRECB, * SIDE, * TRANSA, * UPLO;
31 int IA, IB, JA, JB, M, N;
257 char Broc, GemmTa, GemmTb, TranOp, WBroc, WCroc, conjg, * one,
258 * talpha, * tbeta, top, * zero;
259 int Acol, Aii, Aimb1, Ainb1, Ajj, Alcmb, Ald, Alp, Alp0, Alq,
260 Alq0, Amb, Amp, An, Anb, Anq, Arow, BcurrocR, Bfwd, BiiD,
261 BiiR, Binb1D, Binb1R, BisR, Bld, BmyprocD, BmyprocR, BnD,
262 BnR, BnbD, BnbR, BnpR, BnprocsD, BnprocsR, BrocD, BrocR,
263 BsrcR, LNorRT, WBfr, WBld, WCfr, WCld, WCpbY, WCsum, ctxt,
264 l, lb, lside, ltmp, mycol, myrow, n, nb, nbb, notran, npcol,
265 nprow, p=0, size, tmp, upper;
272 char * Aptr = NULL, * Bptr = NULL, * WB = NULL, * WC = NULL;
286 LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) );
289 gemm =
TYPE->Fgemm; gsum2d =
TYPE->Cgsum2d;
296 BnD = An = M; BnR = N; Broc =
CCOLUMN;
297 BmyprocD = myrow; BnprocsD = nprow;
298 BmyprocR = mycol; BnprocsR = npcol;
299 BnbD = DESCB[
MB_ ]; BnbR = DESCB[
NB_ ];
301 PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR,
302 &BiiD, &BiiR, &BrocD, &BrocR );
308 BnD = An = N; BnR = M; Broc =
CROW;
309 BmyprocD = mycol; BnprocsD = npcol;
310 BmyprocR = myrow; BnprocsR = nprow;
311 BnbR = DESCB[
MB_ ]; BnbD = DESCB[
NB_ ];
313 PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD,
314 &BiiR, &BiiD, &BrocR, &BrocD );
321 PB_Cdescribe( An, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
322 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
324 Amp =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
325 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
326 if( ( Amp > 0 ) && ( Anq > 0 ) ) Aptr =
Mptr( A, Aii, Ajj, Ald, size );
335 else { conjg =
CNOCONJG; talpha = ALPHA; }
357 Alcmb = 2 * nb *
PB_Clcm( ( Arow >= 0 ? nprow : 1 ),
358 ( Acol >= 0 ? npcol : 1 ) );
363 if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !Bfwd )
365 tmp =
PB_Cindxg2p( BnR-1, Binb1R, BnbR, BrocR, BrocR, BnprocsR );
366 p =
MModSub( tmp, BrocR, BnprocsR );
380 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, p, BnprocsR ) );
381 BnpR =
PB_Cnumroc( BnR, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR );
388 if( BnpR ) nbb = BnpR / ( ( BnpR - 1 ) / nb + 1 );
392 nbb =
MIN( nbb, BnpR );
398 PB_Cdescset( DBUFB, BnD, nbb, Binb1D, nbb, BnbD, BnbR, BrocD,
399 BcurrocR, ctxt, Bld );
400 if( BisR || ( BmyprocR == BcurrocR ) )
401 Bptr =
Mptr( B, BiiD, BiiR, Bld, size );
405 PB_Cdescset( DBUFB, nbb, BnD, nbb, Binb1D, BnbR, BnbD, BcurrocR,
407 if( BisR || ( BmyprocR == BcurrocR ) )
408 Bptr =
Mptr( B, BiiR, BiiD, Bld, size );
413 PB_CInV(
TYPE,
NOCONJG, &WBroc, An, An, Ad0, nbb, Bptr, 0, 0, DBUFB,
414 &Broc, &WB, WBd, &WBfr );
419 PB_CInOutV(
TYPE, &WCroc, An, An, Ad0, nbb, one, Bptr, 0, 0, DBUFB,
420 &Broc, &tbeta, &WC, WCd, &WCfr, &WCsum, &WCpbY );
430 Aimb1 = Ad0[
IMB_ ]; Ainb1 = Ad0[
INB_ ]; Amb = Ad0[
MB_]; Anb = Ad0[
NB_];
432 Amp =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
433 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
437 if( ( Amp > 0 ) && ( Anq > 0 ) )
448 for( l = 0; l < An; l += Alcmb )
450 lb = An - l; lb =
MIN( lb, Alcmb );
451 Alp =
PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
452 Alq =
PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
455 Alq0 =
PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
458 &nbb, &Alq0, talpha,
Mptr( Aptr, 0, Alq, Ald,
459 size ), &Ald,
Mptr( WB, 0, Alq, WBld, size ),
460 &WBld, one, WC, &WCld );
463 talpha, Aptr, l, l, Ad0,
Mptr( WB, 0, Alq, WBld,
464 size ), WBld,
Mptr( WC, Alp, 0, WCld, size ),
470 for( l = 0; l < An; l += Alcmb )
472 lb = An - l; lb =
MIN( lb, Alcmb );
473 Alp =
PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
474 Alq =
PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
475 Alq0 =
PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol );
478 &Alq0, &Alp, talpha, WB, &WBld,
Mptr( Aptr, 0,
479 Alq, Ald, size ), &Ald, one,
Mptr( WC, 0, Alq,
480 WCld, size ), &WCld );
482 talpha, Aptr, l, l, Ad0,
Mptr( WB, Alp, 0, WBld,
483 size ), WBld,
Mptr( WC, 0, Alq, WCld, size ),
495 for( l = 0; l < An; l += Alcmb )
497 lb = An - l; ltmp = l + ( lb =
MIN( lb, Alcmb ) );
498 Alp =
PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
499 Alq =
PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
501 talpha, Aptr, l, l, Ad0,
Mptr( WB, 0, Alq, WBld,
502 size ), WBld,
Mptr( WC, Alp, 0, WCld, size ),
504 Alp =
PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow,
507 Alq0 =
PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
511 &nbb, &Alq0, talpha,
Mptr( Aptr, Alp, Alq, Ald,
512 size ), &Ald,
Mptr( WB, 0, Alq, WBld, size ),
513 &WBld, one,
Mptr( WC, Alp, 0, WCld, size ),
519 for( l = 0; l < An; l += Alcmb )
521 lb = An - l; ltmp = l + ( lb =
MIN( lb, Alcmb ) );
522 Alp =
PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
523 Alq =
PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
525 talpha, Aptr, l, l, Ad0,
Mptr( WB, Alp, 0, WBld,
526 size ), WBld,
Mptr( WC, 0, Alq, WCld, size ),
528 Alp =
PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow,
531 Alq0 =
PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol,
535 &Alq0, &Alp0, talpha,
Mptr( WB, Alp, 0, WBld,
536 size ), &WBld,
Mptr( Aptr, Alp, Alq, Ald, size ),
537 &Ald, one,
Mptr( WC, 0, Alq, WCld, size ),
543 if( WBfr ) free( WB );
550 if( WCsum && ( Amp > 0 ) )
551 gsum2d( ctxt,
ROW, &top, Amp, nbb, WC, WCld, myrow, WCd[
CSRC_] );
556 PB_Cpaxpby(
TYPE, &conjg, An, nbb, one, WC, 0, 0, WCd, &WCroc,
557 zero, Bptr, 0, 0, DBUFB, &Broc );
564 if( WCsum && ( Anq > 0 ) )
565 gsum2d( ctxt,
COLUMN, &top, nbb, Anq, WC, WCld, WCd[
RSRC_],
571 PB_Cpaxpby(
TYPE, &conjg, nbb, An, one, WC, 0, 0, WCd, &WCroc,
572 zero, Bptr, 0, 0, DBUFB, &Broc );
574 if( WCfr ) free( WC );
580 if( BisR || ( BmyprocR == BcurrocR ) ) BiiR += nbb;
589 if( TranOp ==
CCOTRAN ) free( talpha );