14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * UPLO,
char * TRANS,
int N,
int K,
char * ALPHA,
22 char * A,
int IA,
int JA,
int * DESCA,
char * B,
23 int IB,
int JB,
int * DESCB,
char * BETA,
char * C,
24 int IC,
int JC,
int * DESCC )
26 void PB_Cpsyr2kA(
TYPE, DIRECAB, CONJUG, UPLO, TRANS, N, K, ALPHA, A, IA,
27 JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC )
31 char * CONJUG, * DIRECAB, * TRANS, * UPLO;
32 int IA, IB, IC, JA, JB, JC, K, N;
38 int * DESCA, * DESCB, * DESCC;
320 char * one, * talpha, * zero;
321 int ABfwd, ABmyprocD, ABmyprocR, ABnprocsD, ABnprocsR, ABrocs,
322 Abufld, AcurrocR, Afr, AiD, AiR, AiiD, AiiR, AinbD, AinbR,
323 Ainb1D, Ainb1R, AisR, AkkR, Ald, AnbD, AnbR, AnpD, AnpR,
324 Aoff, ArocD, ArocR, AsrcR, Bbufld, BcurrocR, Bfr, BiD, BiR,
325 BiiD, BiiR, BinbD, BinbR, Binb1D, Binb1R, BisR, BkkR, Bld,
327 BnpR, Boff, BrocD, BrocR, BsrcR, Ccol, Cii, Cimb1,
328 Cinb1, Cjj, Clcmb, Cld, Clp, Clq, Cnq0, Cmb, Cmp, Cmp0,
330 WACfr, WACld, WACsum, WARfr, WARld, WARsum,
331 WBCfr, WBCld, WBCsum, WBRfr, WBRld, WBRsum, Wkbb=0,
332 conjg, ctxt, k, kb, kbb, l, lb,
333 lcmb, ltmp, maxp, maxpm1, maxq, mycol, myrow, ncpq, notran,
334 npcol, npq, nprow, nrpq, p=0, q=0, size, tmp, upper;
343 char * Abuf = NULL, * Bbuf = NULL, * Cptr = NULL, * WAC = NULL,
344 * WAR = NULL, * WBC = NULL, * WBR = NULL;
365 PB_Cdescribe( N, N, IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj,
366 &Cld, &Cimb1, &Cinb1, &Cmb, &Cnb, &Crow, &Ccol, Cd0 );
368 Cmp =
PB_Cnumroc( N, 0, Cimb1, Cmb, myrow, Crow, nprow );
369 Cnq =
PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol );
371 if( ( Cmp > 0 ) && ( Cnq > 0 ) )
373 Cptr =
Mptr( C, Cii, Cjj, Cld, size );
385 Clcmb = 2 * kb *
PB_Clcm( ( Crow >= 0 ? nprow : 1 ),
386 ( Ccol >= 0 ? npcol : 1 ) );
394 AiR = JA; AinbR = DESCA[
INB_]; AnbR = DESCA[
NB_]; AsrcR = DESCA[
CSRC_];
395 BiR = JB; BinbR = DESCB[
INB_]; BnbR = DESCB[
NB_]; BsrcR = DESCB[
CSRC_];
400 AiR = IA; AinbR = DESCA[
IMB_]; AnbR = DESCA[
MB_]; AsrcR = DESCA[
RSRC_];
401 BiR = IB; BinbR = DESCB[
IMB_]; BnbR = DESCB[
MB_]; BsrcR = DESCB[
RSRC_];
407 if( !(
PB_Cspan( K, AiR, AinbR, AnbR, AsrcR, ABnprocsR ) ) &&
408 !(
PB_Cspan( K, BiR, BinbR, BnbR, BsrcR, ABnprocsR ) ) )
416 PB_CInV(
TYPE,
NOCONJG,
COLUMN, N, N, Cd0, K, A, IA, JA, DESCA,
COLUMN,
417 &WAC, WACd0, &WACfr );
418 PB_CInV(
TYPE, CONJUG,
ROW, N, N, Cd0, K, WAC, 0, 0, WACd0,
COLUMN,
419 &WAR, WARd0, &WARfr );
421 PB_CInV(
TYPE,
NOCONJG,
COLUMN, N, N, Cd0, K, B, IB, JB, DESCB,
COLUMN,
422 &WBC, WBCd0, &WBCfr );
423 PB_CInV(
TYPE, CONJUG,
ROW, N, N, Cd0, K, WBC, 0, 0, WBCd0,
COLUMN,
424 &WBR, WBRd0, &WBRfr );
428 PB_CInV(
TYPE,
NOCONJG,
ROW, N, N, Cd0, K, A, IA, JA, DESCA,
ROW,
429 &WAR, WARd0, &WARfr );
430 PB_CInV(
TYPE, CONJUG,
COLUMN, N, N, Cd0, K, WAR, 0, 0, WARd0,
ROW,
431 &WAC, WACd0, &WACfr );
433 PB_CInV(
TYPE,
NOCONJG,
ROW, N, N, Cd0, K, B, IB, JB, DESCB,
ROW,
434 &WBR, WBRd0, &WBRfr );
435 PB_CInV(
TYPE, CONJUG,
COLUMN, N, N, Cd0, K, WBR, 0, 0, WBRd0,
ROW,
436 &WBC, WBCd0, &WBCfr );
441 if( ( Cmp > 0 ) && ( Cnq > 0 ) )
443 WACld = WACd0[
LLD_]; WBCld = WBCd0[
LLD_];
444 WARld = WARd0[
LLD_]; WBRld = WBRd0[
LLD_];
448 for( l = 0; l < N; l += Clcmb )
450 lb = N - l; lb =
MIN( lb, Clcmb );
451 Clp =
PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
452 Clq =
PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
453 Cnq0 =
PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
454 if( Clp > 0 && Cnq0 > 0 )
457 ALPHA, WAC, &WACld,
Mptr( WBR, 0, Clq, WBRld, size ),
458 &WBRld, one,
Mptr( Cptr, 0, Clq, Cld, size ), &Cld );
460 talpha, WBC, &WBCld,
Mptr( WAR, 0, Clq, WARld, size ),
461 &WARld, one,
Mptr( Cptr, 0, Clq, Cld, size ), &Cld );
464 size ), WACld,
Mptr( WAR, 0, Clq, WARld, size ),
465 WARld,
Mptr( WBC, Clp, 0, WBCld, size ), WBCld,
466 Mptr( WBR, 0, Clq, WBRld, size ), WBRld, Cptr, l, l,
472 for( l = 0; l < N; l += Clcmb )
474 lb = N - l; ltmp = l + ( lb =
MIN( lb, Clcmb ) );
475 Clp =
PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
476 Clq =
PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
478 size ), WACld,
Mptr( WAR, 0, Clq, WARld, size ),
479 WARld,
Mptr( WBC, Clp, 0, WBCld, size ), WBCld,
480 Mptr( WBR, 0, Clq, WBRld, size ), WBRld, Cptr, l, l,
482 Clp =
PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow );
484 Cnq0 =
PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
485 if( Cmp0 > 0 && Cnq0 > 0 )
488 &K, ALPHA,
Mptr( WAC, Clp, 0, WACld, size ), &WACld,
489 Mptr( WBR, 0, Clq, WBRld, size ), &WBRld, one,
490 Mptr( Cptr, Clp, Clq, Cld, size ), &Cld );
492 &K, talpha,
Mptr( WBC, Clp, 0, WBCld, size ), &WBCld,
493 Mptr( WAR, 0, Clq, WARld, size ), &WARld, one,
494 Mptr( Cptr, Clp, Clq, Cld, size ), &Cld );
498 if( conjg ) free( talpha );
501 if( WACfr ) free( WAC );
502 if( WARfr ) free( WAR );
503 if( WBCfr ) free( WBC );
504 if( WBRfr ) free( WBR );
516 ABmyprocD = myrow; ABmyprocR = mycol; ABnprocsD = nprow;
517 AiD = IA; AinbD = DESCA[
IMB_]; AnbD = DESCA[
MB_]; Ald = DESCA[
LLD_];
518 BiD = IB; BinbD = DESCB[
IMB_]; BnbD = DESCB[
MB_]; Bld = DESCB[
LLD_];
519 PB_Cinfog2l( IA, JA, DESCA, ABnprocsD, ABnprocsR, ABmyprocD, ABmyprocR,
520 &AiiD, &AiiR, &ArocD, &ArocR );
521 PB_Cinfog2l( IB, JB, DESCB, ABnprocsD, ABnprocsR, ABmyprocD, ABmyprocR,
522 &BiiD, &BiiR, &BrocD, &BrocR );
526 ABmyprocD = mycol; ABmyprocR = myrow; ABnprocsD = npcol;
527 AiD = JA; AinbD = DESCA[
INB_]; AnbD = DESCA[
NB_]; Ald = DESCA[
LLD_];
528 BiD = JB; BinbD = DESCB[
INB_]; BnbD = DESCB[
NB_]; Bld = DESCB[
LLD_];
529 PB_Cinfog2l( IA, JA, DESCA, ABnprocsR, ABnprocsD, ABmyprocR, ABmyprocD,
530 &AiiR, &AiiD, &ArocR, &ArocD );
531 PB_Cinfog2l( IB, JB, DESCB, ABnprocsR, ABnprocsD, ABmyprocR, ABmyprocD,
532 &BiiR, &BiiD, &BrocR, &BrocD );
535 AnpD =
PB_Cnumroc( N, 0, Ainb1D, AnbD, ABmyprocD, ArocD, ABnprocsD );
537 AisR = ( ( AsrcR < 0 ) || ( ABnprocsR == 1 ) );
540 BnpD =
PB_Cnumroc( N, 0, Binb1D, BnbD, ABmyprocD, BrocD, ABnprocsD );
542 BisR = ( ( BsrcR < 0 ) || ( ABnprocsR == 1 ) );
547 if( !( AisR ) && !( ABfwd ) )
549 tmp =
PB_Cindxg2p( K - 1, Ainb1R, AnbR, ArocR, ArocR, ABnprocsR );
550 q =
MModSub( tmp, ArocR, ABnprocsR );
556 if( !( BisR ) && !( ABfwd ) )
558 tmp =
PB_Cindxg2p( K - 1, Binb1R, BnbR, BrocR, BrocR, ABnprocsR );
559 p =
MModSub( tmp, BrocR, ABnprocsR );
564 PB_COutV(
TYPE,
COLUMN,
NOINIT, N, N, Cd0, kb, &WAC, WACd0, &WACfr,
566 PB_COutV(
TYPE,
ROW,
NOINIT, N, N, Cd0, kb, &WAR, WARd0, &WARfr,
568 PB_COutV(
TYPE,
COLUMN,
NOINIT, N, N, Cd0, kb, &WBC, WBCd0, &WBCfr,
570 PB_COutV(
TYPE,
ROW,
NOINIT, N, N, Cd0, kb, &WBR, WBRd0, &WBRfr,
576 lcmb =
PB_Clcm( ( maxp = ( BisR ? 1 : ABnprocsR ) ) * BnbR,
577 ( maxq = ( AisR ? 1 : ABnprocsR ) ) * AnbR );
582 AcurrocR = ( AisR ? -1 :
MModAdd( ArocR, q, ABnprocsR ) );
583 AkkR =
PB_Cg2lrem( AiR, AinbR, AnbR, AcurrocR, AsrcR, ABnprocsR );
584 AnpR =
PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR, ABnprocsR );
586 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, p, ABnprocsR ) );
587 BkkR =
PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR, ABnprocsR );
588 BnpR =
PB_Cnumroc( K, 0, Binb1R, BnbR, BcurrocR, BrocR, ABnprocsR );
592 PB_CVMinit( &VM, 0, BnpR, AnpR, Binb1R, Ainb1R, BnbR, AnbR, p, q,
596 for( k = 0; k < K; k += kb )
598 kbb = K - k; kbb =
MIN( kbb, kb );
608 if( ( ABfwd && ( p == maxpm1 ) ) ||
609 ( !( ABfwd ) && ( p == 0 ) ) )
613 AcurrocR = ( AisR ? -1 :
MModAdd( ArocR, q, ABnprocsR ) );
614 AkkR =
PB_Cg2lrem( AiR, AinbR, AnbR, AcurrocR, AsrcR,
616 AnpR =
PB_Cnumroc( K, 0, Ainb1R, AnbR, AcurrocR, ArocR,
619 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, p, ABnprocsR ) );
620 BkkR =
PB_Cg2lrem( BiR, BinbR, BnbR, BcurrocR, BsrcR,
622 BnpR =
PB_Cnumroc( K, 0, Binb1R, BnbR, BcurrocR, BrocR,
625 PB_CVMinit( &VM, 0, BnpR, AnpR, Binb1R, Ainb1R, BnbR, AnbR,
626 p, q, maxp, maxq, lcmb );
633 if( Wkbb == 0 ) { ABrocs = ( npq < kbb ? npq : kbb ); }
634 else { ABrocs = kbb - Wkbb; ABrocs =
MIN( ABrocs, npq ); }
646 if( ( Afr = ( ncpq < ABrocs ) ) != 0 )
652 Abufld =
MAX( 1, AnpD );
653 if( AisR || ( ABmyprocR == AcurrocR ) )
657 ABrocs, AnpD, one,
Mptr( A, AiiD, AkkR, Ald,
658 size ), Ald, zero, Abuf, Abufld );
667 if( AisR || ( ABmyprocR == AcurrocR ) )
668 Abuf =
Mptr( A, AiiD, AkkR + Aoff, Ald, size );
670 PB_Cdescset( DBUFA, N, ABrocs, Ainb1D, ABrocs, AnbD, ABrocs,
671 ArocD, AcurrocR, ctxt, Abufld );
675 PB_CInV2(
TYPE,
NOCONJG,
COLUMN, N, N, Cd0, ABrocs, Abuf, 0, 0,
676 DBUFA,
COLUMN, WAC, Wkbb, WACd0 );
677 if( Afr & ( AisR || ( ABmyprocR == AcurrocR ) ) )
678 if( Abuf ) free( Abuf );
684 if( ( Bfr = ( nrpq < ABrocs ) ) != 0 )
690 Bbufld =
MAX( 1, BnpD );
691 if( BisR || ( ABmyprocR == BcurrocR ) )
695 ABrocs, BnpD, one,
Mptr( B, BiiD, BkkR, Bld,
696 size ), Bld, zero, Bbuf, Bbufld );
705 if( BisR || ( ABmyprocR == BcurrocR ) )
706 Bbuf =
Mptr( B, BiiD, BkkR + Boff, Bld, size );
708 PB_Cdescset( DBUFB, N, ABrocs, Binb1D, ABrocs, BnbD, ABrocs,
709 BrocD, BcurrocR, ctxt, Bbufld );
713 PB_CInV2(
TYPE,
NOCONJG,
COLUMN, N, N, Cd0, ABrocs, Bbuf, 0, 0,
714 DBUFB,
COLUMN, WBC, Wkbb, WBCd0 );
715 if( Bfr & ( BisR || ( ABmyprocR == BcurrocR ) ) )
716 if( Bbuf ) free( Bbuf );
724 if( ( Afr = ( ncpq < ABrocs ) ) != 0 )
731 if( AisR || ( ABmyprocR == AcurrocR ) )
735 ABrocs, AnpD, one,
Mptr( A, AkkR, AiiD, Ald,
736 size ), Ald, zero, Abuf, Abufld );
745 if( AisR || ( ABmyprocR == AcurrocR ) )
746 Abuf =
Mptr( A, AkkR + Aoff, AiiD, Ald, size );
748 PB_Cdescset( DBUFA, ABrocs, N, ABrocs, Ainb1D, ABrocs, AnbD,
749 AcurrocR, ArocD, ctxt, Abufld );
753 PB_CInV2(
TYPE,
NOCONJG,
ROW, N, N, Cd0, ABrocs, Abuf, 0, 0,
754 DBUFA,
ROW, WAR, Wkbb, WARd0 );
755 if( Afr & ( AisR || ( ABmyprocR == AcurrocR ) ) )
756 if( Abuf ) free( Abuf );
761 if( ( Bfr = ( nrpq < ABrocs ) ) != 0 )
768 if( BisR || ( ABmyprocR == BcurrocR ) )
772 ABrocs, BnpD, one,
Mptr( B, BkkR, BiiD, Bld,
773 size ), Bld, zero, Bbuf, Bbufld );
782 if( BisR || ( ABmyprocR == BcurrocR ) )
783 Bbuf =
Mptr( B, BkkR + Boff, BiiD, Bld, size );
785 PB_Cdescset( DBUFB, ABrocs, N, ABrocs, Binb1D, ABrocs, BnbD,
786 BcurrocR, BrocD, ctxt, Bbufld );
790 PB_CInV2(
TYPE,
NOCONJG,
ROW, N, N, Cd0, ABrocs, Bbuf, 0, 0,
791 DBUFB,
ROW, WBR, Wkbb, WBRd0 );
792 if( Bfr & ( BisR || ( ABmyprocR == BcurrocR ) ) )
793 if( Bbuf ) free( Bbuf );
813 PB_CInV2(
TYPE, CONJUG,
ROW, N, N, Cd0, kbb, WAC, 0, 0, WACd0,
818 PB_CInV2(
TYPE, CONJUG,
ROW, N, N, Cd0, kbb, WBC, 0, 0, WBCd0,
826 PB_CInV2(
TYPE, CONJUG,
COLUMN, N, N, Cd0, kbb, WAR, 0, 0, WARd0,
827 ROW, WAC, 0, WACd0 );
831 PB_CInV2(
TYPE, CONJUG,
COLUMN, N, N, Cd0, kbb, WBR, 0, 0, WBRd0,
832 ROW, WBC, 0, WBCd0 );
837 if( ( Cmp > 0 ) && ( Cnq > 0 ) )
839 WACld = WACd0[
LLD_]; WBCld = WBCd0[
LLD_];
840 WARld = WARd0[
LLD_]; WBRld = WBRd0[
LLD_];
844 for( l = 0; l < N; l += Clcmb )
846 lb = N - l; lb =
MIN( lb, Clcmb );
847 Clp =
PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
848 Clq =
PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
849 Cnq0 =
PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
850 if( Clp > 0 && Cnq0 > 0 )
853 &kbb, ALPHA, WAC, &WACld,
Mptr( WBR, 0, Clq, WBRld,
854 size ), &WBRld, one,
Mptr( Cptr, 0, Clq, Cld, size ),
857 &kbb, talpha, WBC, &WBCld,
Mptr( WAR, 0, Clq, WARld,
858 size ), &WARld, one,
Mptr( Cptr, 0, Clq, Cld, size ),
862 size ), WACld,
Mptr( WAR, 0, Clq, WARld, size ),
863 WARld,
Mptr( WBC, Clp, 0, WBCld, size ), WBCld,
864 Mptr( WBR, 0, Clq, WBRld, size ), WBRld, Cptr, l, l,
870 for( l = 0; l < N; l += Clcmb )
872 lb = N - l; ltmp = l + ( lb =
MIN( lb, Clcmb ) );
873 Clp =
PB_Cnumroc( l, 0, Cimb1, Cmb, myrow, Crow, nprow );
874 Clq =
PB_Cnumroc( l, 0, Cinb1, Cnb, mycol, Ccol, npcol );
877 size ), WACld,
Mptr( WAR, 0, Clq, WARld, size ),
878 WARld,
Mptr( WBC, Clp, 0, WBCld, size ), WBCld,
879 Mptr( WBR, 0, Clq, WBRld, size ), WBRld, Cptr, l, l,
881 Clp =
PB_Cnumroc( ltmp, 0, Cimb1, Cmb, myrow, Crow, nprow );
883 Cnq0 =
PB_Cnumroc( lb, l, Cinb1, Cnb, mycol, Ccol, npcol );
884 if( Cmp0 > 0 && Cnq0 > 0 )
887 &kbb, ALPHA,
Mptr( WAC, Clp, 0, WACld, size ), &WACld,
888 Mptr( WBR, 0, Clq, WBRld, size ), &WBRld, one,
889 Mptr( Cptr, Clp, Clq, Cld, size ), &Cld );
891 &kbb, talpha,
Mptr( WBC, Clp, 0, WBCld, size ), &WBCld,
892 Mptr( WAR, 0, Clq, WARld, size ), &WARld, one,
893 Mptr( Cptr, Clp, Clq, Cld, size ), &Cld );
902 if( WACfr ) free( WAC );
903 if( WARfr ) free( WAR );
904 if( WBCfr ) free( WBC );
905 if( WBRfr ) free( WBR );
907 if( conjg && ( Cmp > 0 ) && ( Cnq > 0 ) ) free( talpha );