14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * UPLO,
char * TRANSA,
char * DIAG,
int M,
int N,
22 char * ALPHA,
char * A,
int IA,
int JA,
int * DESCA,
23 char * B,
int IB,
int JB,
int * DESCB )
25 void PB_CptrsmB(
TYPE, DIRECB, SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A,
26 IA, JA, DESCA, B, IB, JB, DESCB )
30 char * DIAG, * DIRECB, * SIDE, * TRANSA, * UPLO;
31 int IA, IB, JA, JB, M, N;
257 char Broc, TranOp, conjg, * negone, * one, * talpha, * talph0, top,
259 int Acol, Aii, Aimb1, Ainb1, Ajj, Akp, Akq, Alcmb, Ald, Amb, An,
260 Anb, Anp, Anp0, Anq, Anq0, Arow, Asrc, Astart, BcurrocR, Bfwd,
261 BiiD, BiiR, Binb1D, Binb1R, BisR, Bld, BmyprocD, BmyprocR,
262 BnD, BnR, BnbD, BnbR, BnpR, BnprocsD, BnprocsR, BrocD, BrocR,
263 BsrcR, LNorRT, WBCfr, WBCld, WBCapbX, WBCsum, WBRfr, WBRld,
264 WBRapbX, WBRsum, ctxt, izero=0, k, kb, kbnext, kbprev, ktmp,
265 lside, mycol, myrow, n, nb, nbb, notran, npcol, nprow, p=0,
274 char * Aptr = NULL, * Bptr = NULL, * WBC = NULL, * WBR = NULL;
288 LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) );
291 one =
TYPE->one; zero =
TYPE->zero; negone =
TYPE->negone;
292 pad =
TYPE->Ftzpad; gemm =
TYPE->Fgemm; gsum2d =
TYPE->Cgsum2d;
299 BnD = An = M; BnR = N; Broc =
CCOLUMN;
300 BmyprocD = myrow; BnprocsD = nprow;
301 BmyprocR = mycol; BnprocsR = npcol;
302 BnbD = DESCB[
MB_ ]; BnbR = DESCB[
NB_ ];
304 PB_Cinfog2l( IB, JB, DESCB, BnprocsD, BnprocsR, BmyprocD, BmyprocR,
305 &BiiD, &BiiR, &BrocD, &BrocR );
311 BnD = An = N; BnR = M; Broc =
CROW;
312 BmyprocD = mycol; BnprocsD = npcol;
313 BmyprocR = myrow; BnprocsR = nprow;
314 BnbR = DESCB[
MB_ ]; BnbD = DESCB[
NB_ ];
316 PB_Cinfog2l( IB, JB, DESCB, BnprocsR, BnprocsD, BmyprocR, BmyprocD,
317 &BiiR, &BiiD, &BrocR, &BrocD );
324 PB_Cdescribe( An, An, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
325 &Ald, &Aimb1, &Ainb1, &Amb, &Anb, &Arow, &Acol, Ad0 );
334 else { conjg =
CNOCONJG; talpha = ALPHA; }
341 Astart = ( upper ? An - 1 : 0 );
346 Astart = ( upper ? 0 : An - 1 );
352 Alcmb = 2 * nb *
PB_Clcm( ( Arow >= 0 ? nprow : 1 ),
353 ( Acol >= 0 ? npcol : 1 ) );
358 if( !( BisR = ( ( BsrcR < 0 ) || ( BnprocsR == 1 ) ) ) && !( Bfwd ) )
360 tmp =
PB_Cindxg2p( BnR - 1, Binb1R, BnbR, BrocR, BrocR, BnprocsR );
361 p =
MModSub( tmp, BrocR, BnprocsR );
375 BcurrocR = ( BisR ? -1 :
MModAdd( BrocR, p, BnprocsR ) );
376 BnpR =
PB_Cnumroc( BnR, 0, Binb1R, BnbR, BcurrocR, BrocR, BnprocsR );
383 if( BnpR ) nbb = BnpR / ( ( BnpR - 1 ) / nb + 1 );
387 nbb =
MIN( nbb, BnpR );
393 PB_Cdescset( DBUFB, BnD, nbb, Binb1D, nbb, BnbD, BnbR, BrocD,
394 BcurrocR, ctxt, Bld );
395 if( BisR || ( BmyprocR == BcurrocR ) )
396 Bptr =
Mptr( B, BiiD, BiiR, Bld, size );
400 PB_Cdescset( DBUFB, nbb, BnD, nbb, Binb1D, BnbR, BnbD, BcurrocR,
402 if( BisR || ( BmyprocR == BcurrocR ) )
403 Bptr =
Mptr( B, BiiR, BiiD, Bld, size );
415 0, 0, DBUFB, &Broc, &WBC, WBCd, &WBCfr, &WBCsum,
420 PB_COutV(
TYPE,
ROW,
INIT, An, An, Ad0, nbb, &WBR, WBRd, &WBRfr,
425 Aimb1 = Ad0[
IMB_ ]; Ainb1 = Ad0[
INB_ ];
426 Amb = Ad0[
MB_ ]; Anb = Ad0[
NB_ ];
429 Anp =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
430 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
431 if( ( Anp > 0 ) && ( Anq > 0 ) )
432 Aptr =
Mptr( A, Aii, Ajj, Ald, size );
434 WBCld = WBCd[
LLD_]; WBRld = WBRd[
LLD_];
441 for( k = ( Astart / Alcmb ) * Alcmb; k >= 0; k -= Alcmb )
443 ktmp = An - k; kb =
MIN( ktmp, Alcmb );
448 Akp =
PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
449 Akq =
PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
451 talph0, Aptr, k, k, Ad0,
Mptr( WBC, Akp, 0, WBCld,
452 size ), WBCld,
Mptr( WBR, 0, Akq, WBRld, size ),
461 Anq0 =
PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
464 kbprev =
MIN( k, Alcmb );
465 ktmp =
PB_Cnumroc( kbprev, k-kbprev, Aimb1, Amb,
466 myrow, Arow, nprow );
473 &nbb, &Anq0, negone,
Mptr( Aptr, Akp, Akq,
474 Ald, size ), &Ald,
Mptr( WBR, 0, Akq, WBRld,
475 size ), &WBRld, talph0,
Mptr( WBC, Akp, 0,
476 WBCld, size ), &WBCld );
479 gsum2d( ctxt,
ROW, &top, ktmp, nbb,
Mptr( WBC, Akp,
480 0, WBCld, size ), WBCld, myrow, Asrc );
483 &nbb, &izero, zero, zero,
Mptr( WBC, Akp, 0,
484 WBCld, size ), &WBCld );
486 if( ( Akp > 0 ) && ( Anq0 > 0 ) )
488 &nbb, &Anq0, negone,
Mptr( Aptr, 0, Akq, Ald,
489 size ), &Ald,
Mptr( WBR, 0, Akq, WBRld, size ),
490 &WBRld, talph0, WBC, &WBCld );
496 &nbb, &Anq0, negone,
Mptr( Aptr, 0, Akq, Ald,
497 size ), &Ald,
Mptr( WBR, 0, Akq, WBRld, size ),
498 &WBRld, talph0, WBC, &WBCld );
509 for( k = 0; k < An; k += Alcmb )
511 ktmp = An - k; kb =
MIN( ktmp, Alcmb );
516 Akp =
PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
517 Akq =
PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
519 talph0, Aptr, k, k, Ad0,
Mptr( WBC, Akp, 0, WBCld,
520 size ), WBCld,
Mptr( WBR, 0, Akq, WBRld, size ),
527 Akp =
PB_Cnumroc( k+kb, 0, Aimb1, Amb, myrow, Arow, nprow );
528 if( ( Anp0 = Anp - Akp ) > 0 )
530 Anq0 =
PB_Cnumroc( kb, k, Ainb1, Anb, mycol, Acol, npcol );
534 kbnext =
MIN( kbnext, Alcmb );
535 ktmp =
PB_Cnumroc( kbnext, k+kb, Aimb1, Amb, myrow,
543 &nbb, &Anq0, negone,
Mptr( Aptr, Akp, Akq,
544 Ald, size ), &Ald,
Mptr( WBR, 0, Akq, WBRld,
545 size ), &WBRld, talph0,
Mptr( WBC, Akp, 0,
546 WBCld, size ), &WBCld );
549 gsum2d( ctxt,
ROW, &top, ktmp, nbb,
Mptr( WBC, Akp,
550 0, WBCld, size ), WBCld, myrow, Asrc );
553 &nbb, &izero, zero, zero,
Mptr( WBC, Akp, 0,
554 WBCld, size ), &WBCld );
556 if( ( Anp0 > 0 ) && ( Anq0 > 0 ) )
558 &nbb, &Anq0, negone,
Mptr( Aptr, Akp+ktmp, Akq,
559 Ald, size ), &Ald,
Mptr( WBR, 0, Akq, WBRld,
560 size ), &WBRld, talph0,
Mptr( WBC, Akp+ktmp, 0,
561 WBCld, size ), &WBCld );
567 &nbb, &Anq0, negone,
Mptr( Aptr, Akp, Akq, Ald,
568 size ), &Ald,
Mptr( WBR, 0, Akq, WBRld, size ),
569 &WBRld, talph0,
Mptr( WBC, Akp, 0, WBCld,
579 if( WBCsum && ( Anp > 0 ) )
580 gsum2d( ctxt,
ROW, &top, Anp, nbb, WBC, WBCld, myrow,
586 PB_Cpaxpby(
TYPE, &conjg, An, nbb, one, WBC, 0, 0, WBCd,
COLUMN,
587 zero, Bptr, 0, 0, DBUFB, &Broc );
596 0, 0, DBUFB, &Broc, &WBR, WBRd, &WBRfr, &WBRsum,
601 PB_COutV(
TYPE,
COLUMN,
INIT, An, An, Ad0, nbb, &WBC, WBCd, &WBCfr,
606 Aimb1 = Ad0[
IMB_ ]; Ainb1 = Ad0[
INB_ ];
607 Amb = Ad0[
MB_ ]; Anb = Ad0[
NB_ ];
610 Anp =
PB_Cnumroc( An, 0, Aimb1, Amb, myrow, Arow, nprow );
611 Anq =
PB_Cnumroc( An, 0, Ainb1, Anb, mycol, Acol, npcol );
612 if( ( Anp > 0 ) && ( Anq > 0 ) )
613 Aptr =
Mptr( A, Aii, Ajj, Ald, size );
615 WBCld = WBCd[
LLD_]; WBRld = WBRd[
LLD_];
622 for( k = 0; k < An; k += Alcmb )
624 ktmp = An - k; kb =
MIN( ktmp, Alcmb );
629 Akp =
PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
630 Akq =
PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
632 talph0, Aptr, k, k, Ad0,
Mptr( WBC, Akp, 0, WBCld,
633 size ), WBCld,
Mptr( WBR, 0, Akq, WBRld, size ),
640 Akq =
PB_Cnumroc( k+kb, 0, Ainb1, Anb, mycol, Acol, npcol );
641 if( ( Anq0 = Anq - Akq ) > 0 )
643 Anp0 =
PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow );
647 kbnext =
MIN( kbnext, Alcmb );
648 ktmp =
PB_Cnumroc( kbnext, k+kb, Ainb1, Anb, mycol,
656 &ktmp, &Anp0, negone,
Mptr( WBC, Akp, 0,
657 WBCld, size ), &WBCld,
Mptr( Aptr, Akp, Akq,
658 Ald, size ), &Ald, talph0,
Mptr( WBR, 0,
659 Akq, WBRld, size ), &WBRld );
662 gsum2d( ctxt,
COLUMN, &top, nbb, ktmp,
Mptr( WBR, 0,
663 Akq, WBRld, size ), WBRld, Asrc, mycol );
666 &ktmp, &izero, zero, zero,
Mptr( WBR, 0, Akq,
667 WBRld, size ), &WBRld );
669 if( ( Anp0 > 0 ) && ( Anq0 > 0 ) )
671 &Anq0, &Anp0, negone,
Mptr( WBC, Akp, 0, WBCld,
672 size ), &WBCld,
Mptr( Aptr, Akp, Akq+ktmp, Ald,
673 size ), &Ald, talph0,
Mptr( WBR, 0, Akq+ktmp,
674 WBRld, size ), &WBRld );
680 &Anq0, &Anp0, negone,
Mptr( WBC, Akp, 0, WBCld,
681 size ), &WBCld,
Mptr( Aptr, Akp, Akq, Ald,
682 size ), &Ald, talph0,
Mptr( WBR, 0, Akq, WBRld,
694 for( k = ( Astart / Alcmb ) * Alcmb; k >= 0; k -= Alcmb )
696 ktmp = An - k; kb =
MIN( ktmp, Alcmb );
701 Akp =
PB_Cnumroc( k, 0, Aimb1, Amb, myrow, Arow, nprow );
702 Akq =
PB_Cnumroc( k, 0, Ainb1, Anb, mycol, Acol, npcol );
704 talph0, Aptr, k, k, Ad0,
Mptr( WBC, Akp, 0, WBCld,
705 size ), WBCld,
Mptr( WBR, 0, Akq, WBRld, size ),
714 Anp0 =
PB_Cnumroc( kb, k, Aimb1, Amb, myrow, Arow, nprow );
717 kbprev =
MIN( k, Alcmb );
718 ktmp =
PB_Cnumroc( kbprev, k-kbprev, Ainb1, Anb,
719 mycol, Acol, npcol );
726 &ktmp, &Anp0, negone,
Mptr( WBC, Akp, 0,
727 WBCld, size ), &WBCld,
Mptr( Aptr, Akp, Akq,
728 Ald, size ), &Ald, talph0,
Mptr( WBR, 0,
729 Akq, WBRld, size ), &WBRld );
732 gsum2d( ctxt,
COLUMN, &top, nbb, ktmp,
Mptr( WBR, 0,
733 Akq, WBRld, size ), WBRld, Asrc, mycol );
736 &ktmp, &izero, zero, zero,
Mptr( WBR, 0, Akq,
737 WBRld, size ), &WBRld );
739 if( ( Anp0 > 0 ) && ( Akq > 0 ) )
741 &Akq, &Anp0, negone,
Mptr( WBC, Akp, 0, WBCld,
742 size ), &WBCld,
Mptr( Aptr, Akp, 0, Ald,
743 size ), &Ald, talph0, WBR, &WBRld );
749 &Akq, &Anp0, negone,
Mptr( WBC, Akp, 0, WBCld,
750 size ), &WBCld,
Mptr( Aptr, Akp, 0, Ald,
751 size ), &Ald, talph0, WBR, &WBRld );
760 if( WBRsum && ( Anq > 0 ) )
761 gsum2d( ctxt,
COLUMN, &top, nbb, Anq, WBR, WBRld, WBRd[
RSRC_],
767 PB_Cpaxpby(
TYPE, &conjg, nbb, An, one, WBR, 0, 0, WBRd,
ROW,
768 zero, Bptr, 0, 0, DBUFB, &Broc );
771 if( WBCfr ) free( WBC );
772 if( WBRfr ) free( WBR );
778 if( BisR || ( BmyprocR == BcurrocR ) ) BiiR += nbb;
787 if( TranOp ==
CCOTRAN ) free( talpha );