14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * UPLO,
char * TRANS,
int N,
int K,
char * ALPHA,
22 char * A,
int IA,
int JA,
int * DESCA,
char * B,
23 int IB,
int JB,
int * DESCB,
char * BETA,
char * C,
24 int IC,
int JC,
int * DESCC )
26 void PB_Cpsyr2kAC(
TYPE, DIRECAB, CONJUG, UPLO, TRANS, N, K, ALPHA, A, IA,
27 JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC )
31 char * CONJUG, * DIRECAB, * TRANS, * UPLO;
32 int IA, IB, IC, JA, JB, JC, K, N;
38 int * DESCA, * DESCB, * DESCC;
319 char GatherDir, ScatterDir, * one, top, * talpha, * tbeta, tran,
321 int ABm, ABn, Acol, Acurcol, Acurrow, Acurimb1, Acurinb1, Afr,
322 Aii, Aimb, Aimb1, Ainb, Ainb1, AisD, AisR, Ajj, Ald, Amb,
323 Amp, Amp0, Anb, Anq, Anq0, Arow, Aspan, Bcol, Bcurcol,
324 Bcurrow, Bcurimb1, Bcurinb1, Bfr, Bii, Bimb, Bimb1, Binb,
325 Binb1, BisD, BisR, Bjj, Bld, Bmb, Bmp, Bmp0, Bnb, Bnq, Bnq0,
326 Brow, Bspan, Ccsrc, Cimb, Cinb, Cmb, Cnb, Crsrc, WAfr, WACfr,
327 WACld, WACreuse, WACsum, WBfr, WBCfr, WBCld, WBCsum, conjg,
328 ctxt, fwd, k, kb, kbb, kend, kstart, kstep, ktmp, mycol,
329 myrow, notran, npcol, nprow, size, upper;
335 char * Aptr = NULL, * Aptr0 = NULL, * Bptr = NULL, * Bptr0 = NULL,
336 * WA = NULL, * WB = NULL, * WAC = NULL, *WBC = NULL;
358 gsum2d =
TYPE->Cgsum2d; gemm =
TYPE->Fgemm;
365 kstart = 0; kend = ( ( N - 1 ) / kb + 1 ) * kb; kstep = kb;
370 kstart = ( ( N - 1 ) / kb ) * kb; kend = kstep = -kb;
381 else { tran =
CTRAN; talpha = ALPHA; }
385 if( notran ) { ABm = N; ABn = K; }
else { ABm = K; ABn = N; }
386 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
388 Aimb = DESCA[
IMB_]; Ainb = DESCA[
INB_];
389 Amb = DESCA[
MB_ ]; Anb = DESCA[
NB_ ]; Ald = DESCA[
LLD_];
391 Amp0 =
PB_Cnumroc( ABm, 0, Aimb1, Amb, myrow, Arow, nprow );
393 Anq0 =
PB_Cnumroc( ABn, 0, Ainb1, Anb, mycol, Acol, npcol );
394 if( ( Amp0 > 0 ) && ( Anq0 > 0 ) ) Aptr0 =
Mptr( A, Aii, Ajj, Ald, size );
396 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
398 Bimb = DESCB[
IMB_]; Binb = DESCB[
INB_];
399 Bmb = DESCB[
MB_ ]; Bnb = DESCB[
NB_ ]; Bld = DESCB[
LLD_];
401 Bmp0 =
PB_Cnumroc( ABm, 0, Bimb1, Bmb, myrow, Brow, nprow );
403 Bnq0 =
PB_Cnumroc( ABn, 0, Binb1, Bnb, mycol, Bcol, npcol );
404 if( ( Bmp0 > 0 ) && ( Bnq0 > 0 ) ) Bptr0 =
Mptr( B, Bii, Bjj, Bld, size );
409 Cinb = DESCC[
INB_]; Cnb = DESCC[
NB_]; Ccsrc = DESCC[
CSRC_];
414 AisR = ( ( Acol < 0 ) || ( npcol == 1 ) );
415 BisR = ( ( Bcol < 0 ) || ( npcol == 1 ) );
417 if( !( AisR ) && !( BisR ) )
425 Aspan =
PB_Cspan( ABn, 0, Ainb1, Anb, Acol, npcol );
426 Bspan =
PB_Cspan( ABn, 0, Binb1, Bnb, Bcol, npcol );
427 WACreuse = ( Aspan ||
428 ( !( Aspan ) && !( Bspan ) && ( Acol == Bcol ) ) );
436 WACreuse = ( AisR && BisR );
442 AisD = ( ( Arow >= 0 ) && ( nprow > 1 ) );
443 BisD = ( ( Brow >= 0 ) && ( nprow > 1 ) );
444 WACreuse = ( WACreuse &&
445 ( ( !AisD && !BisD ) || ( ( AisD && BisD ) &&
446 ( ( Arow == Brow ) &&
447 ( ( ( Aimb1 >= ABm ) && ( Bimb1 >= ABm ) ) ||
448 ( ( Aimb1 == Bimb1 ) && ( Amb == Bmb ) ) ) ) ) ) );
449 tbeta = ( WACreuse ? one : zero );
453 for( k = kstart; k != kend; k += kstep )
455 kbb = N - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
460 ROW, &Bptr, DBUFB, &Bfr );
464 PB_Cdescset( Ad0, ktmp, ABn, Aimb1, Ainb1, Amb, Anb, Arow, Acol,
466 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, ABn, Ad0, kbb, Bptr, 0, 0, DBUFB,
467 ROW, &WB, WBd, &WBfr );
474 Amp =
PB_Cnumroc( ktmp, 0, Aimb1, Amb, myrow, Arow, nprow );
475 if( ( Amp > 0 ) && ( Anq0 > 0 ) )
477 ALPHA, Aptr0, &Ald, WB, &WBd[
LLD_], zero, WAC, &WACld );
478 if( WBfr ) free( WB );
479 if( Bfr ) free( Bptr );
484 ROW, &Aptr, DBUFA, &Afr );
488 PB_Cdescset( Bd0, ktmp, ABn, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol,
490 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, ABn, Bd0, kbb, Aptr, 0, 0, DBUFA,
491 ROW, &WA, WAd, &WAfr );
496 { WBC = WAC;
MDescCopy( WACd, WBCd ); WBCfr = 0; WBCsum = WACsum; }
503 Bmp =
PB_Cnumroc( ktmp, 0, Bimb1, Bmb, myrow, Brow, nprow );
504 if( ( Bmp > 0 ) && ( Bnq0 > 0 ) )
506 talpha, Bptr0, &Bld, WA, &WAd[
LLD_], tbeta, WBC, &WBCld );
507 if( WAfr ) free( WA );
508 if( Afr ) free( Aptr );
515 Cnb, Ccsrc, Ccsrc, npcol );
517 gsum2d( ctxt,
ROW, &top, Amp, kbb, WAC, WACld, myrow,
540 Cinb, Cnb, Ccsrc, Ccsrc,
544 gsum2d( ctxt,
ROW, &top, Bmp, kbb, WBC, WBCld, myrow,
561 one, C, IC, JC+k, DESCC,
COLUMN );
562 if( WACfr ) free( WAC );
570 if( WBCfr ) free( WBC );
576 for( k = kstart; k != kend; k += kstep )
578 ktmp = N - k; kbb =
MIN( ktmp, kb );
583 ROW, &Bptr, DBUFB, &Bfr );
588 Acurrow =
PB_Cindxg2p( k, Aimb1, Amb, Arow, Arow, nprow );
589 PB_Cdescset( Ad0, ktmp, ABn, Acurimb1, Ainb1, Amb, Anb, Acurrow,
591 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, ABn, Ad0, kbb, Bptr, 0, 0, DBUFB,
592 ROW, &WB, WBd, &WBfr );
599 Amp =
PB_Cnumroc( ktmp, k, Aimb1, Amb, myrow, Arow, nprow );
600 if( ( Amp > 0 ) && ( Anq0 > 0 ) )
602 ALPHA,
Mptr( Aptr0, Amp0-Amp, 0, Ald, size ), &Ald, WB,
603 &WBd[
LLD_], zero, WAC, &WACld );
604 if( WBfr ) free( WB );
605 if( Bfr ) free( Bptr );
610 ROW, &Aptr, DBUFA, &Afr );
615 Bcurrow =
PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow );
616 PB_Cdescset( Bd0, ktmp, ABn, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow,
618 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, ABn, Bd0, kbb, Aptr, 0, 0, DBUFA,
619 ROW, &WA, WAd, &WAfr );
624 { WBC = WAC;
MDescCopy( WACd, WBCd ); WBCfr = 0; WBCsum = WACsum; }
631 Bmp =
PB_Cnumroc( ktmp, k, Bimb1, Bmb, myrow, Brow, nprow );
632 if( ( Bmp > 0 ) && ( Bnq0 > 0 ) )
634 talpha,
Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld, WA,
635 &WAd[
LLD_], tbeta, WBC, &WBCld );
636 if( WAfr ) free( WA );
637 if( Afr ) free( Aptr );
644 Cnb, Ccsrc, Ccsrc, npcol );
646 gsum2d( ctxt,
ROW, &top, Amp, kbb, WAC, WACld, myrow,
669 Cinb, Cnb, Ccsrc, Ccsrc,
673 gsum2d( ctxt,
ROW, &top, Bmp, kbb, WBC, WBCld, myrow,
690 one, C, IC+k, JC+k, DESCC,
COLUMN );
691 if( WACfr ) free( WAC );
699 if( WBCfr ) free( WBC );
707 Cimb = DESCC[
IMB_]; Cmb = DESCC[
MB_]; Crsrc = DESCC[
RSRC_];
712 AisR = ( ( Arow < 0 ) || ( nprow == 1 ) );
713 BisR = ( ( Brow < 0 ) || ( nprow == 1 ) );
720 if( !( AisR ) && !( BisR ) )
722 Aspan =
PB_Cspan( ABm, 0, Aimb1, Amb, Arow, nprow );
723 Bspan =
PB_Cspan( ABm, 0, Bimb1, Bmb, Brow, nprow );
724 WACreuse = ( Aspan ||
725 ( !( Aspan ) && !( Bspan ) && ( Arow == Brow ) ) );
733 WACreuse = ( AisR && BisR );
739 AisD = ( ( Acol >= 0 ) && ( npcol > 1 ) );
740 BisD = ( ( Bcol >= 0 ) && ( npcol > 1 ) );
741 WACreuse = ( WACreuse &&
742 ( ( !AisD && !BisD ) || ( ( AisD && BisD ) &&
743 ( ( Acol == Bcol ) &&
744 ( ( ( Ainb1 >= ABn ) && ( Binb1 >= ABn ) ) ||
745 ( ( Ainb1 == Binb1 ) && ( Anb == Bnb ) ) ) ) ) ) );
746 tbeta = ( WACreuse ? one : zero );
750 for( k = kstart; k != kend; k += kstep )
752 ktmp = N - k; kbb =
MIN( ktmp, kb );
757 COLUMN, &Bptr, DBUFB, &Bfr );
762 Acurcol =
PB_Cindxg2p( k, Ainb1, Anb, Acol, Acol, npcol );
763 PB_Cdescset( Ad0, ABm, ktmp, Aimb1, Acurinb1, Amb, Anb, Arow,
764 Acurcol, ctxt, Ald );
765 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ABm, ktmp, Ad0, kbb, Bptr, 0, 0,
766 DBUFB,
COLUMN, &WB, WBd, &WBfr );
770 PB_COutV(
TYPE,
ROW,
INIT, ABm, ktmp, Ad0, kbb, &WAC, WACd, &WACfr,
773 Anq =
PB_Cnumroc( ktmp, k, Ainb1, Anb, mycol, Acol, npcol );
774 if( ( Anq > 0 ) && ( Amp0 > 0 ) )
776 talpha, WB, &WBd[
LLD_],
Mptr( Aptr0, 0, Anq0-Anq, Ald,
777 size ), &Ald, zero, WAC, &WACld );
778 if( WBfr ) free( WB );
779 if( Bfr ) free( Bptr );
784 COLUMN, &Aptr, DBUFA, &Afr );
789 Bcurcol =
PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol );
790 PB_Cdescset( Bd0, ABm, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow,
791 Bcurcol, ctxt, Bld );
792 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ABm, ktmp, Bd0, kbb, Aptr, 0, 0,
793 DBUFA,
COLUMN, &WA, WAd, &WAfr );
798 { WBC = WAC;
MDescCopy( WACd, WBCd ); WBCfr = 0; WBCsum = WACsum; }
805 Bnq =
PB_Cnumroc( ktmp, k, Binb1, Bnb, mycol, Bcol, npcol );
806 if( ( Bnq > 0 ) && ( Bmp0 > 0 ) )
808 ALPHA, WA, &WAd[
LLD_],
Mptr( Bptr0, 0, Bnq0-Bnq, Bld,
809 size ), &Bld, tbeta, WBC, &WBCld );
810 if( WAfr ) free( WA );
811 if( Afr ) free( Aptr );
818 Cmb, Crsrc, Crsrc, nprow );
820 gsum2d( ctxt,
COLUMN, &top, kbb, Anq, WAC, WACld, WACd[
RSRC_],
843 Cimb, Cmb, Crsrc, Crsrc,
847 gsum2d( ctxt,
COLUMN, &top, kbb, Bnq, WBC, WBCld,
848 WBCd[
RSRC_], mycol );
864 one, C, IC+k, JC+k, DESCC,
ROW );
865 if( WACfr ) free( WAC );
872 one, C, IC+k, JC+k, DESCC,
ROW );
873 if( WBCfr ) free( WBC );
879 for( k = kstart; k != kend; k += kstep )
881 kbb = N - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
886 COLUMN, &Bptr, DBUFB, &Bfr );
890 PB_Cdescset( Ad0, ABm, ktmp, Aimb1, Ainb1, Amb, Anb, Arow, Acol,
892 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ABm, ktmp, Ad0, kbb, Bptr, 0, 0,
893 DBUFB,
COLUMN, &WB, WBd, &WBfr );
897 PB_COutV(
TYPE,
ROW,
INIT, ABm, ktmp, Ad0, kbb, &WAC, WACd, &WACfr,
900 Anq =
PB_Cnumroc( ktmp, 0, Ainb1, Anb, mycol, Acol, npcol );
901 if( ( Anq > 0 ) && ( Amp0 > 0 ) )
903 talpha, WB, &WBd[
LLD_], Aptr0, &Ald, zero, WAC, &WACld );
904 if( WBfr ) free( WB );
905 if( Bfr ) free( Bptr );
910 COLUMN, &Aptr, DBUFA, &Afr );
914 PB_Cdescset( Bd0, ABm, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol,
916 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ABm, ktmp, Bd0, kbb, Aptr, 0, 0,
917 DBUFA,
COLUMN, &WA, WAd, &WAfr );
922 { WBC = WAC;
MDescCopy( WACd, WBCd ); WBCfr = 0; WBCsum = WACsum; }
929 Bnq =
PB_Cnumroc( ktmp, 0, Binb1, Bnb, mycol, Bcol, npcol );
930 if( ( Bnq > 0 ) && ( Bmp0 > 0 ) )
932 ALPHA, WA, &WAd[
LLD_], Bptr0, &Bld, tbeta, WBC, &WBCld );
933 if( WAfr ) free( WA );
934 if( Afr ) free( Aptr );
941 Cmb, Crsrc, Crsrc, nprow );
943 gsum2d( ctxt,
COLUMN, &top, kbb, Anq, WAC, WACld, WACd[
RSRC_],
966 Cimb, Cmb, Crsrc, Crsrc,
970 gsum2d( ctxt,
COLUMN, &top, kbb, Bnq, WBC, WBCld,
971 WBCd[
RSRC_], mycol );
987 one, C, IC+k, JC, DESCC,
ROW );
988 if( WACfr ) free( WAC );
995 one, C, IC+k, JC, DESCC,
ROW );
996 if( WBCfr ) free( WBC );
1001 if( conjg ) free( talpha );