14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * SIDE,
char * UPLO,
int M,
int N,
char * ALPHA,
22 char * A,
int IA,
int JA,
int * DESCA,
char * B,
23 int IB,
int JB,
int * DESCB,
char * BETA,
char * C,
24 int IC,
int JC,
int * DESCC )
26 void PB_CpsymmBC(
TYPE, DIRECAB, CONJUG, SIDE, UPLO, M, N, ALPHA, A, IA,
27 JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC )
31 char * CONJUG, * DIRECAB, * SIDE, * UPLO;
32 int IA, IB, IC, JA, JB, JC, M, N;
38 int * DESCA, * DESCB, * DESCC;
282 char GemmTa, GemmTb, cctop, * one, rctop, * talphaCR, * talphaRC,
284 int Acol, Aii, Aimb1, Ainb1, Ajj, Alcmb, Ald, Alp, Alp0, Alq,
285 Alq0, Amb, Amp, An, Anb, Anq, Arow, BCfwd, BCmyprocD,
286 BCmyprocR, BCnD, BCnR, BCnprocsD, BCnprocsR, Bbufld, BcurrocR,
287 Bfr, BiD, BiR, BiiD, BiiR, BinbD, BinbR, Binb1D, Binb1R, BisR,
288 Bkk, Bld, BnbD, BnbR, BnpD, BnpR, Boff, BrocD, BrocR, BsrcR,
289 Cfr, CiD, CiR, CiiD, CiiR, CinbD, CinbR, Cinb1D, Cinb1R, Ckk,
290 CnbD, CnbR, CnpD, CnpR, Coff, CrocD, CrocR, CsrcR, Cbufld,
291 CcurrocR, CisR, Cld, WBCfr, WBCld, WBRfr, WBRld, WCCfr, WCCld,
292 WCCsum, WCRfr, WCRld, WCRsum, conjg, ctxt, l, lb, lcmb, lside,
293 ltmp, maxp, maxpm1, maxq, mycol, myrow, n, nb, nbb, ncpq,
294 npcol, npq=0, nprow, nrpq, p=0, q=0, size, tmp, upper;
304 char * Aptr = NULL, * Bbuf = NULL, * Cbuf = NULL, * WBC = NULL,
305 * WBR = NULL, * WCC = NULL, * WCR = NULL;
318 gemm =
TYPE->Fgemm; gsum2d =
TYPE->Cgsum2d;
325 BCnD = An = M; BCnR = N;
326 BCmyprocD = myrow; BCnprocsD = nprow;
327 BCmyprocR = mycol; BCnprocsR = npcol;
330 BinbD = DESCB[
IMB_ ]; BinbR = DESCB[
INB_];
331 BnbD = DESCB[
MB_ ]; BnbR = DESCB[
NB_ ];
333 PB_Cinfog2l( IB, JB, DESCB, BCnprocsD, BCnprocsR, BCmyprocD, BCmyprocR,
334 &BiiD, &BiiR, &BrocD, &BrocR );
337 CinbD = DESCC[
IMB_ ]; CinbR = DESCC[
INB_];
338 CnbD = DESCC[
MB_ ]; CnbR = DESCC[
NB_ ];
340 PB_Cinfog2l( IC, JC, DESCC, BCnprocsD, BCnprocsR, BCmyprocD, BCmyprocR,
341 &CiiD, &CiiR, &CrocD, &CrocR );
345 BCnD = An = N; BCnR = M;
346 BCmyprocD = mycol; BCnprocsD = npcol;
347 BCmyprocR = myrow; BCnprocsR = nprow;
350 BinbR = DESCB[
IMB_ ]; BinbD = DESCB[
INB_];
351 BnbR = DESCB[
MB_ ]; BnbD = DESCB[
NB_ ];
353 PB_Cinfog2l( IB, JB, DESCB, BCnprocsR, BCnprocsD, BCmyprocR, BCmyprocD,
354 &BiiR, &BiiD, &BrocR, &BrocD );
357 CinbR = DESCC[
IMB_ ]; CinbD = DESCC[
INB_];
358 CnbR = DESCC[
MB_ ]; CnbD = DESCC[
NB_ ];
360 PB_Cinfog2l( IC, JC, DESCC, BCnprocsR, BCnprocsD, BCmyprocR, BCmyprocD,
361 &CiiR, &CiiD, &CrocR, &CrocD );
365 BnpD =
PB_Cnumroc( BCnD, 0, Binb1D, BnbD, BCmyprocD, BrocD, BCnprocsD );
367 BisR = ( ( BsrcR < 0 ) || ( BCnprocsR == 1 ) );
370 CnpD =
PB_Cnumroc( BCnD, 0, Cinb1D, CnbD, BCmyprocD, CrocD, BCnprocsD );
372 CisR = ( ( CsrcR < 0 ) || ( BCnprocsR == 1 ) );
376 PB_Cdescribe( An, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
377 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
379 Amp =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
380 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
381 if( ( Amp > 0 ) && ( Anq > 0 ) ) Aptr =
Mptr( A, Aii, Ajj, Ald, size );
408 tzsymm =
PB_Ctzsymm; talphaCR = talphaRC = ALPHA;
415 Alcmb = 2 * nb *
PB_Clcm( ( Arow >= 0 ? nprow : 1 ),
416 ( Acol >= 0 ? npcol : 1 ) );
421 if( !( BisR ) && !( BCfwd ) )
423 tmp =
PB_Cindxg2p( BCnR - 1, Binb1R, BnbR, BrocR, BrocR, BCnprocsR );
424 q =
MModSub( tmp, BrocR, BCnprocsR );
430 if( !( CisR ) && !( BCfwd ) )
432 tmp =
PB_Cindxg2p( BCnR - 1, Cinb1R, CnbR, CrocR, CrocR, BCnprocsR );
433 p =
MModSub( tmp, CrocR, BCnprocsR );
439 lcmb =
PB_Clcm( ( maxp = ( CisR ? 1 : BCnprocsR ) ) * CnbR,
440 ( maxq = ( BisR ? 1 : BCnprocsR ) ) * BnbR );
449 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, q, BCnprocsR ) );
450 Bkk =
PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, BCnprocsR );
451 BnpR =
PB_Cnumroc( BCnR, 0, Binb1R, BnbR, BcurrocR, BrocR, BCnprocsR );
453 CcurrocR = ( CisR ? -1 :
MModAdd( CrocR, p, BCnprocsR ) );
454 Ckk =
PB_Cg2lrem( CiR, CinbR, CnbR, CcurrocR, CsrcR, BCnprocsR );
455 CnpR =
PB_Cnumroc( BCnR, 0, Cinb1R, CnbR, CcurrocR, CrocR, BCnprocsR );
457 PB_CVMinit( &VM, 0, CnpR, BnpR, Cinb1R, Binb1R, CnbR, BnbR, p, q,
469 if( npq ) nbb = npq / ( ( npq - 1 ) / nb + 1 );
473 nbb =
MIN( nbb, npq );
485 if( ( Bfr = ( ncpq < nbb ) ) != 0 )
491 Bbufld =
MAX( 1, BnpD );
492 if( BisR || ( BCmyprocR == BcurrocR ) )
496 BnpD, one,
Mptr( B, BiiD, Bkk, Bld, size ), Bld,
497 zero, Bbuf, Bbufld );
506 if( BisR || ( BCmyprocR == BcurrocR ) )
507 Bbuf =
Mptr( B, BiiD, Bkk+Boff, Bld, size );
509 PB_Cdescset( DBUFB, BCnD, nbb, Binb1D, nbb, BnbD, nbb, BrocD,
510 BcurrocR, ctxt, Bbufld );
515 PB_CInV(
TYPE,
NOCONJG,
COLUMN, An, An, Ad0, nbb, Bbuf, 0, 0,
516 DBUFB,
COLUMN, &WBC, WBCd, &WBCfr );
517 PB_CInV(
TYPE,
NOCONJG,
ROW, An, An, Ad0, nbb, WBC, 0, 0,
518 WBCd,
COLUMN, &WBR, WBRd, &WBRfr );
526 if( ( Bfr = ( ncpq < nbb ) ) != 0 )
533 if( BisR || ( BCmyprocR == BcurrocR ) )
537 BnpD, one,
Mptr( B, Bkk, BiiD, Bld, size ), Bld,
538 zero, Bbuf, Bbufld );
547 if( BisR || ( BCmyprocR == BcurrocR ) )
548 Bbuf =
Mptr( B, Bkk+Boff, BiiD, Bld, size );
550 PB_Cdescset( DBUFB, nbb, BCnD, nbb, Binb1D, nbb, BnbD, BcurrocR,
551 BrocD, ctxt, Bbufld );
556 PB_CInV(
TYPE,
NOCONJG,
ROW, An, An, Ad0, nbb, Bbuf, 0, 0,
557 DBUFB,
ROW, &WBR, WBRd, &WBRfr );
558 PB_CInV(
TYPE,
NOCONJG,
COLUMN, An, An, Ad0, nbb, WBR, 0, 0,
559 WBRd,
ROW, &WBC, WBCd, &WBCfr );
564 PB_COutV(
TYPE,
COLUMN,
INIT, An, An, Ad0, nbb, &WCC, WCCd, &WCCfr,
566 PB_COutV(
TYPE,
ROW,
INIT, An, An, Ad0, nbb, &WCR, WCRd, &WCRfr,
571 WBCld = WBCd[
LLD_]; WBRld = WBRd[
LLD_];
572 WCCld = WCCd[
LLD_]; WCRld = WCRd[
LLD_];
574 if( ( Amp > 0 ) && ( Anq > 0 ) )
581 for( l = 0; l < An; l += Alcmb )
583 lb = An - l; lb =
MIN( lb, Alcmb );
584 Alp =
PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
585 Alq =
PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
586 Alq0 =
PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol );
587 if( Alp > 0 && Alq0 > 0 )
590 &Alq0, talphaRC,
Mptr( Aptr, 0, Alq, Ald, size ),
591 &Ald,
Mptr( WBR, 0, Alq, WBRld, size ), &WBRld, one,
594 &Alp, talphaCR, WBC, &WBCld,
Mptr( Aptr, 0, Alq, Ald,
595 size ), &Ald, one,
Mptr( WCR, 0, Alq, WCRld, size ),
598 PB_Cpsym(
TYPE,
TYPE, SIDE,
UPPER, lb, nbb, ALPHA, Aptr, l, l,
599 Ad0,
Mptr( WBC, Alp, 0, WBCld, size ), WBCld,
600 Mptr( WBR, 0, Alq, WBRld, size ), WBRld,
Mptr( WCC,
601 Alp, 0, WCCld, size ), WCCld,
Mptr( WCR, 0, Alq,
602 WCRld, size ), WCRld, tzsymm );
610 for( l = 0; l < An; l += Alcmb )
612 lb = An - l; ltmp = l + ( lb =
MIN( lb, Alcmb ) );
613 Alp =
PB_Cnumroc( l, 0, Aimb1, Amb, myrow, Arow, nprow );
614 Alq =
PB_Cnumroc( l, 0, Ainb1, Anb, mycol, Acol, npcol );
615 PB_Cpsym(
TYPE,
TYPE, SIDE,
LOWER, lb, nbb, ALPHA, Aptr, l, l,
616 Ad0,
Mptr( WBC, Alp, 0, WBCld, size ), WBCld,
617 Mptr( WBR, 0, Alq, WBRld, size ), WBRld,
Mptr( WCC,
618 Alp, 0, WCCld, size ), WCCld,
Mptr( WCR, 0, Alq,
619 WCRld, size ), WCRld, tzsymm );
620 Alp =
PB_Cnumroc( ltmp, 0, Aimb1, Amb, myrow, Arow, nprow );
622 Alq0 =
PB_Cnumroc( lb, l, Ainb1, Anb, mycol, Acol, npcol );
623 if( Alp0 > 0 && Alq0 > 0 )
626 &Alq0, talphaRC,
Mptr( Aptr, Alp, Alq, Ald, size ),
627 &Ald,
Mptr( WBR, 0, Alq, WBRld, size ), &WBRld, one,
628 Mptr( WCC, Alp, 0, WCCld, size ), &WCCld );
630 &Alp0, talphaCR,
Mptr( WBC, Alp, 0, WBCld, size ),
631 &WBCld,
Mptr( Aptr, Alp, Alq, Ald, size ), &Ald, one,
632 Mptr( WCR, 0, Alq, WCRld, size ), &WCRld );
637 if( WBCfr ) free( WBC );
638 if( WBRfr ) free( WBR );
640 if( Bfr && ( BisR || ( BCmyprocR == BcurrocR ) ) )
641 if( Bbuf ) free( Bbuf );
650 WCCd[
CSRC_] = CcurrocR;
652 gsum2d( ctxt,
ROW, &rctop, Amp, nbb, WCC, WCCld, myrow,
660 gsum2d( ctxt,
COLUMN, &cctop, nbb, Anq, WCR, WCRld,
661 WCRd[
RSRC_], mycol );
666 PB_Cpaxpby(
TYPE, CONJUG, nbb, An, one, WCR, 0, 0, WCRd,
ROW, one,
667 WCC, 0, 0, WCCd,
COLUMN );
668 if( WCRfr ) free( WCR );
673 if( ( Cfr = ( nrpq < nbb ) ) != 0 )
678 Cbufld =
MAX( 1, CnpD ); tbeta = zero;
679 if( CisR || ( BCmyprocR == CcurrocR ) )
687 Cbufld = Cld; tbeta = BETA;
688 if( CisR || ( BCmyprocR == CcurrocR ) )
689 Cbuf =
Mptr( C, CiiD, Ckk+Coff, Cld, size );
691 PB_Cdescset( DBUFC, BCnD, nbb, Cinb1D, nbb, CnbD, nbb, CrocD,
692 CcurrocR, ctxt, Cbufld );
696 PB_Cpaxpby(
TYPE,
NOCONJG, An, nbb, one, WCC, 0, 0, WCCd,
COLUMN,
697 tbeta, Cbuf, 0, 0, DBUFC,
COLUMN );
698 if( WCCfr ) free( WCC );
702 if( Cfr && ( CisR || ( BCmyprocR == CcurrocR ) ) )
705 CnpD, BETA,
Mptr( C, CiiD, Ckk, Cld, size ), Cld,
707 if( Cbuf ) free( Cbuf );
719 gsum2d( ctxt,
ROW, &rctop, Amp, nbb, WCC, WCCld, myrow,
725 WCRd[
RSRC_] = CcurrocR;
727 gsum2d( ctxt,
COLUMN, &cctop, nbb, Anq, WCR, WCRld,
728 WCRd[
RSRC_], mycol );
733 PB_Cpaxpby(
TYPE, CONJUG, An, nbb, one, WCC, 0, 0, WCCd,
COLUMN,
734 one, WCR, 0, 0, WCRd,
ROW );
735 if( WCCfr ) free( WCC );
740 if( ( Cfr = ( nrpq < nbb ) ) != 0 )
745 Cbufld = nbb; tbeta = zero;
746 if( CisR || ( BCmyprocR == CcurrocR ) )
754 Cbufld = Cld; tbeta = BETA;
755 if( CisR || ( BCmyprocR == CcurrocR ) )
756 Cbuf =
Mptr( C, Ckk+Coff, CiiD, Cld, size );
758 PB_Cdescset( DBUFC, nbb, BCnD, nbb, Cinb1D, nbb, CnbD, CcurrocR,
759 CrocD, ctxt, Cbufld );
763 PB_Cpaxpby(
TYPE,
NOCONJG, nbb, An, one, WCR, 0, 0, WCRd,
ROW,
764 tbeta, Cbuf, 0, 0, DBUFC,
ROW );
766 if( WCRfr ) free( WCR );
770 if( Cfr && ( CisR || ( BCmyprocR == CcurrocR ) ) )
773 CnpD, BETA,
Mptr( C, Ckk, CiiD, Cld, size ), Cld,
775 if( Cbuf ) free( Cbuf );
788 if( ( BCfwd && ( p == maxpm1 ) ) ||
789 ( !( BCfwd ) && ( p == 0 ) ) )
794 if( conjg ) free( ( lside ? talphaCR : talphaRC ) );