14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * TRANSA,
char * DIAG,
int M,
int N,
char * ALPHA,
22 char * A,
int IA,
int JA,
int * DESCA,
char * B,
int IB,
25 void PB_CptrmmAB(
TYPE, VARIANT, SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, IA,
26 JA, DESCA, B, IB, JB, DESCB )
30 char * DIAG, * SIDE, * TRANSA, * UPLO, * VARIANT;
31 int IA, IB, JA, JB, M, N;
257 char conjg, * one, top, * zero;
258 int Afr, Bcol, Bcurcol, Bcurimb1, Bcurinb1, Bcurrow, Bfr, Bii,
259 Bimb, Bimb1, Binb, Binb1, Bjj, Bld, Bmb, Bmp, Bmp0, Bnb, Bnq,
260 Bnq0, Brow, WAfr, WBfr, WBsum, ctxt, k, kb, kbb, ktmp, lside,
261 mycol, myrow, notran, npcol, nprow, size, unit, upper;
267 char * Aptr = NULL, * Bptr = NULL, * Bptr0 = NULL, * WA = NULL,
280 gsum2d =
TYPE->Cgsum2d; gemm =
TYPE->Fgemm;
289 Bimb = DESCB[
IMB_]; Binb = DESCB[
INB_];
290 Bmb = DESCB[
MB_ ]; Bnb = DESCB[
NB_ ]; Bld = DESCB[
LLD_];
291 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj, &Brow,
294 Bmp0 =
PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow );
296 Bnq0 =
PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol );
297 if( ( Bmp0 > 0 ) && ( Bnq0 > 0 ) ) Bptr0 =
Mptr( B, Bii, Bjj, Bld, size );
305 for( k = 0; k < M; k += kb )
307 kbb = M - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
312 DESCA,
COLUMN, &Aptr, DBUFA, &Afr );
317 PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol,
319 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0,
320 DBUFA,
COLUMN, &WA, WAd, &WAfr );
334 ROW, &Bptr, DBUFB, &Bfr );
339 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, N, Bd0, kbb, Bptr, 0, 0,
340 DBUFB,
ROW, &WB, WBd, &WBfr );
344 PB_Cplapad(
TYPE,
ALL,
NOCONJG, kbb, N, zero, zero, B, IB+k, JB,
349 Bmp =
PB_Cnumroc( ktmp, 0, Bimb1, Bmb, myrow, Brow, nprow );
350 if( ( Bmp > 0 ) && ( Bnq0 > 0 ) )
352 &kbb, ALPHA, WA, &WAd[
LLD_], WB, &WBd[
LLD_], one, Bptr0,
355 if( WBfr ) free( WB );
356 if( WAfr ) free( WA );
357 if( Bfr ) free( Bptr );
358 if( Afr ) free( Aptr );
363 for( k = ( ( M - 1 ) / kb ) * kb; k >= 0; k -= kb )
365 ktmp = M - k; kbb =
MIN( ktmp, kb );
370 DESCA,
COLUMN, &Aptr, DBUFA, &Afr );
376 Bcurrow =
PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow );
377 PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow,
379 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0,
380 DBUFA,
COLUMN, &WA, WAd, &WAfr );
394 DESCB,
ROW, &Bptr, DBUFB, &Bfr );
399 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, N, Bd0, kbb, Bptr, 0, 0,
400 DBUFB,
ROW, &WB, WBd, &WBfr );
404 PB_Cplapad(
TYPE,
ALL,
NOCONJG, kbb, N, zero, zero, B, IB+k, JB,
409 Bmp =
PB_Cnumroc( ktmp, k, Bimb1, Bmb, myrow, Brow, nprow );
410 if( ( Bmp > 0 ) && ( Bnq0 > 0 ) )
412 &kbb, ALPHA, WA, &WAd[
LLD_], WB, &WBd[
LLD_], one,
413 Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld );
415 if( WBfr ) free( WB );
416 if( WAfr ) free( WA );
417 if( Bfr ) free( Bptr );
418 if( Afr ) free( Aptr );
426 for( k = ( ( N - 1 ) / kb ) * kb; k >= 0; k -= kb )
428 ktmp = N - k; kbb =
MIN( ktmp, kb );
433 DESCA,
ROW, &Aptr, DBUFA, &Afr );
439 Bcurcol =
PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol );
440 PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow,
441 Bcurcol, ctxt, Bld );
442 PB_CInV(
TYPE,
NOCONJG,
ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0,
443 DBUFA,
ROW, &WA, WAd, &WAfr );
457 DESCB,
COLUMN, &Bptr, DBUFB, &Bfr );
462 PB_CInV(
TYPE,
NOCONJG,
COLUMN, M, ktmp, Bd0, kbb, Bptr, 0, 0,
463 DBUFB,
COLUMN, &WB, WBd, &WBfr );
467 PB_Cplapad(
TYPE,
ALL,
NOCONJG, M, kbb, zero, zero, B, IB, JB+k,
472 Bnq =
PB_Cnumroc( ktmp, k, Binb1, Bnb, mycol, Bcol, npcol );
473 if( ( Bmp0 > 0 ) && ( Bnq > 0 ) )
475 &kbb, ALPHA, WB, &WBd[
LLD_], WA, &WAd[
LLD_], one,
476 Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld );
478 if( WBfr ) free( WB );
479 if( WAfr ) free( WA );
480 if( Bfr ) free( Bptr );
481 if( Afr ) free( Aptr );
486 for( k = 0; k < N; k += kb )
488 kbb = N - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
493 DESCA,
ROW, &Aptr, DBUFA, &Afr );
498 PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol,
500 PB_CInV(
TYPE,
NOCONJG,
ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0,
501 DBUFA,
ROW, &WA, WAd, &WAfr );
515 COLUMN, &Bptr, DBUFB, &Bfr );
520 PB_CInV(
TYPE,
NOCONJG,
COLUMN, M, ktmp, Bd0, kbb, Bptr, 0, 0,
521 DBUFB,
COLUMN, &WB, WBd, &WBfr );
525 PB_Cplapad(
TYPE,
ALL,
NOCONJG, M, kbb, zero, zero, B, IB, JB+k,
530 Bnq =
PB_Cnumroc( ktmp, 0, Binb1, Bnb, mycol, Bcol, npcol );
531 if( ( Bmp0 > 0 ) && ( Bnq > 0 ) )
533 &kbb, ALPHA, WB, &WBd[
LLD_], WA, &WAd[
LLD_], one, Bptr0,
536 if( WBfr ) free( WB );
537 if( WAfr ) free( WA );
538 if( Bfr ) free( Bptr );
539 if( Afr ) free( Aptr );
557 for( k = 0; k < M; k += kb )
559 kbb = M - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
564 DESCA,
ROW, &Aptr, DBUFA, &Afr );
569 PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol,
571 PB_CInV(
TYPE, &conjg,
COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0,
572 DBUFA,
ROW, &WA, WAd, &WAfr );
586 DESCB,
ROW, &Bptr, DBUFB, &Bfr );
591 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, N, Bd0, kbb, Bptr, 0, 0,
592 DBUFB,
ROW, &WB, WBd, &WBfr );
601 Bmp =
PB_Cnumroc( ktmp, 0, Bimb1, Bmb, myrow, Brow, nprow );
602 if( ( Bmp > 0 ) && ( Bnq0 > 0 ) )
604 &kbb, ALPHA, WA, &WAd[
LLD_], WB, &WBd[
LLD_], one,
607 if( WBfr ) free( WB );
608 if( WAfr ) free( WA );
609 if( Bfr ) free( Bptr );
610 if( Afr ) free( Aptr );
615 for( k = ( ( M - 1 ) / kb ) * kb; k >= 0; k -= kb )
617 ktmp = M - k; kbb =
MIN( ktmp, kb );
622 JA+k, DESCA,
ROW, &Aptr, DBUFA, &Afr );
628 Bcurrow =
PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow );
629 PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow,
631 PB_CInV(
TYPE, &conjg,
COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0,
632 DBUFA,
ROW, &WA, WAd, &WAfr );
646 DESCB,
ROW, &Bptr, DBUFB, &Bfr );
651 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, N, Bd0, kbb, Bptr, 0, 0,
652 DBUFB,
ROW, &WB, WBd, &WBfr );
661 Bmp =
PB_Cnumroc( ktmp, k, Bimb1, Bmb, myrow, Brow, nprow );
662 if( ( Bmp > 0 ) && ( Bnq0 > 0 ) )
664 &kbb, ALPHA, WA, &WAd[
LLD_], WB, &WBd[
LLD_], one,
665 Mptr( Bptr0, Bmp0-Bmp, 0, Bld, size ), &Bld );
667 if( WBfr ) free( WB );
668 if( WAfr ) free( WA );
669 if( Bfr ) free( Bptr );
670 if( Afr ) free( Aptr );
678 for( k = ( ( N - 1 ) / kb ) * kb; k >= 0; k -= kb )
680 ktmp = N - k; kbb =
MIN( ktmp, kb );
685 JA+k, DESCA,
COLUMN, &Aptr, DBUFA, &Afr );
691 Bcurcol =
PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol );
692 PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow,
693 Bcurcol, ctxt, Bld );
694 PB_CInV(
TYPE, &conjg,
ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0,
695 DBUFA,
COLUMN, &WA, WAd, &WAfr );
709 DESCB,
COLUMN, &Bptr, DBUFB, &Bfr );
714 PB_CInV(
TYPE,
NOCONJG,
COLUMN, M, ktmp, Bd0, kbb, Bptr, 0, 0,
715 DBUFB,
COLUMN, &WB, WBd, &WBfr );
724 Bnq =
PB_Cnumroc( ktmp, k, Binb1, Bnb, mycol, Bcol, npcol );
725 if( ( Bmp0 > 0 ) && ( Bnq > 0 ) )
727 &kbb, ALPHA, WB, &WBd[
LLD_], WA, &WAd[
LLD_], one,
728 Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld );
730 if( WBfr ) free( WB );
731 if( WAfr ) free( WA );
732 if( Bfr ) free( Bptr );
733 if( Afr ) free( Aptr );
738 for( k = 0; k < N; k += kb )
740 kbb = N - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
745 DESCA,
COLUMN, &Aptr, DBUFA, &Afr );
750 PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol,
752 PB_CInV(
TYPE, &conjg,
ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0,
753 DBUFA,
COLUMN, &WA, WAd, &WAfr );
767 DESCB,
COLUMN, &Bptr, DBUFB, &Bfr );
772 PB_CInV(
TYPE,
NOCONJG,
COLUMN, M, ktmp, Bd0, kbb, Bptr, 0, 0,
773 DBUFB,
COLUMN, &WB, WBd, &WBfr );
782 Bnq =
PB_Cnumroc( ktmp, 0, Binb1, Bnb, mycol, Bcol, npcol );
783 if( ( Bmp0 > 0 ) && ( Bnq > 0 ) )
785 &kbb, ALPHA, WB, &WBd[
LLD_], WA, &WAd[
LLD_], one,
788 if( WBfr ) free( WB );
789 if( WAfr ) free( WA );
790 if( Bfr ) free( Bptr );
791 if( Afr ) free( Aptr );
807 for( k = ( ( M - 1 ) / kb ) * kb; k >= 0; k -= kb )
809 kbb = M - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
814 DESCA,
COLUMN, &Aptr, DBUFA, &Afr );
819 PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol,
821 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0,
822 DBUFA,
COLUMN, &WA, WAd, &WAfr );
835 PB_COutV(
TYPE,
ROW,
INIT, ktmp, N, Bd0, kbb, &WB, WBd, &WBfr,
837 Bmp =
PB_Cnumroc( ktmp, 0, Bimb1, Bmb, myrow, Brow, nprow );
838 if( ( Bnq0 > 0 ) && ( Bmp > 0 ) )
840 &Bmp, ALPHA, WA, &WAd[
LLD_], Bptr0, &Bld, zero, WB,
847 gsum2d( ctxt,
COLUMN, &top, kbb, Bnq0, WB, WBd[
LLD_],
850 if( WAfr ) free( WA );
851 if( Afr ) free( Aptr );
855 PB_CScatterV(
TYPE,
FORWARD, kbb, N, WB, 0, 0, WBd,
ROW, zero,
856 B, IB+k, JB, DESCB,
ROW );
857 if( WBfr ) free( WB );
862 for( k = 0; k < M; k += kb )
864 ktmp = M - k; kbb =
MIN( ktmp, kb );
869 JA+k, DESCA,
COLUMN, &Aptr, DBUFA, &Afr );
875 Bcurrow =
PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow );
876 PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow,
878 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ktmp, N, Bd0, kbb, Aptr, 0, 0,
879 DBUFA,
COLUMN, &WA, WAd, &WAfr );
892 PB_COutV(
TYPE,
ROW,
INIT, ktmp, N, Bd0, kbb, &WB, WBd, &WBfr,
894 Bmp =
PB_Cnumroc( ktmp, k, Bimb1, Bmb, myrow, Brow, nprow );
895 if( ( Bnq0 > 0 ) && ( Bmp > 0 ) )
897 &Bmp, ALPHA, WA, &WAd[
LLD_],
Mptr( Bptr0, Bmp0-Bmp,
898 0, Bld, size ), &Bld, zero, WB, &WBd[
LLD_] );
904 gsum2d( ctxt,
COLUMN, &top, kbb, Bnq0, WB, WBd[
LLD_],
908 if( WAfr ) free( WA );
909 if( Afr ) free( Aptr );
914 zero, B, IB+k, JB, DESCB,
ROW );
915 if( WBfr ) free( WB );
925 for( k = 0; k < N; k += kb )
927 ktmp = N - k; kbb =
MIN( ktmp, kb );
932 JA+k, DESCA,
ROW, &Aptr, DBUFA, &Afr );
938 Bcurcol =
PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol );
939 PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow,
940 Bcurcol, ctxt, Bld );
941 PB_CInV(
TYPE,
NOCONJG,
ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0,
942 DBUFA,
ROW, &WA, WAd, &WAfr );
957 Bnq =
PB_Cnumroc( ktmp, k, Binb1, Bnb, mycol, Bcol, npcol );
958 if( ( Bmp0 > 0 ) && ( Bnq > 0 ) )
960 &Bnq, ALPHA,
Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ),
961 &Bld, WA, &WAd[
LLD_], zero, WB, &WBd[
LLD_] );
967 gsum2d( ctxt,
ROW, &top, Bmp0, kbb, WB, WBd[
LLD_],
970 if( WAfr ) free( WA );
971 if( Afr ) free( Aptr );
976 zero, B, IB, JB+k, DESCB,
COLUMN );
977 if( WBfr ) free( WB );
982 for( k = ( ( N - 1 ) / kb ) * kb; k >= 0; k -= kb )
984 kbb = N - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
989 DESCA,
ROW, &Aptr, DBUFA, &Afr );
994 PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol,
996 PB_CInV(
TYPE,
NOCONJG,
ROW, M, ktmp, Bd0, kbb, Aptr, 0, 0,
997 DBUFA,
ROW, &WA, WAd, &WAfr );
1012 Bnq =
PB_Cnumroc( ktmp, 0, Binb1, Bnb, mycol, Bcol, npcol );
1013 if( ( Bmp0 > 0 ) && ( Bnq > 0 ) )
1015 &Bnq, ALPHA, Bptr0, &Bld, WA, &WAd[
LLD_], zero, WB,
1022 gsum2d( ctxt,
ROW, &top, Bmp0, kbb, WB, WBd[
LLD_],
1023 myrow, WBd[
CSRC_] );
1025 if( WAfr ) free( WA );
1026 if( Afr ) free( Aptr );
1031 zero, B, IB, JB+k, DESCB,
COLUMN );
1032 if( WBfr ) free( WB );