14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * TRANS,
char * DIAG,
int M,
int N,
char * ALPHA,
22 char * A,
int IA,
int JA,
int * DESCA,
char * BC,
23 int LDBC,
char * BR,
int LDBR )
26 A, IA, JA, DESCA, BC, LDBC, BR, LDBR )
30 char * ALPHA, * DIAG, * SIDE, * TRANS, * UPLO;
31 int FBCAST, IA, JA, LDBC, LDBR, M, N;
254 char btop, * negone, * one, * talpha1, * talpha2, * zero;
255 int Acol, Aii, Aimb1, Ainb1, Ais1Col, Ais1Row, AisColRep,
256 AisRowRep, Ajj, Alcol, Ald, Alrow, Amb, Anpprev, Anb, Anp,
257 Anq, Arow, Asrc, ChangeRoc=0, LNorRT, Na, Nb, bcst, ctxt,
258 izero=0, k=0, kb, kbprev=0, kbsize, lside, mb1, mycol, myrow,
259 n1, n1last, n1p, n1pprev=0, nb1, nlast, notran, npcol, nprow,
260 rocprev, size, tmp1, tmp2;
272 char * Aprev = NULL, * Bd = NULL, * Bdprev = NULL,
273 * Bprev = NULL, * work = NULL;
278 if( ( M <= 0 ) || ( N <= 0 ) )
return;
286 LNorRT = ( lside && notran ) || ( !( lside ) && !( notran ) );
287 if( LNorRT ) { Na = M; Nb = N; }
else { Na = N; Nb = M; }
291 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj, &Arow,
297 Amb = DESCA[
MB_]; Anb = DESCA[
NB_]; Ald = DESCA[
LLD_ ];
299 Anp =
PB_Cnumroc( Na, 0, Aimb1, Amb, myrow, Arow, nprow );
300 Ais1Row = !(
PB_Cspan( Na, 0, Aimb1, Amb, Arow, nprow ) );
302 Anq =
PB_Cnumroc( Na, 0, Ainb1, Anb, mycol, Acol, npcol );
303 Ais1Col = !(
PB_Cspan( Na, 0, Ainb1, Anb, Acol, npcol ) );
307 if( Ais1Row && Ais1Col )
318 Ald,
TYPE->size ), &Ald, BC, &LDBC );
319 TYPE->Fmmtadd( &M, &N,
TYPE->one, BC, &LDBC,
TYPE->zero, BR,
322 if( ( Arow >= 0 ) && FBCAST )
326 TYPE->Cgebs2d( ctxt,
COLUMN, &btop, N, M, BR, LDBR );
328 TYPE->Cgebr2d( ctxt,
COLUMN, &btop, N, M, BR, LDBR, Arow,
342 Ald,
TYPE->size ), &Ald, BR, &LDBR );
343 TYPE->Fmmtadd( &M, &N,
TYPE->one, BR, &LDBR,
TYPE->zero, BC,
346 if( ( Acol >= 0 ) && FBCAST )
350 TYPE->Cgebs2d( ctxt,
ROW, &btop, N, M, BC, LDBC );
352 TYPE->Cgebr2d( ctxt,
ROW, &btop, N, M, BC, LDBC, myrow,
363 negone =
TYPE->negone; one =
TYPE->one; zero =
TYPE->zero;
364 add =
TYPE->Fmmadd; tadd =
TYPE->Fmmtadd; pad =
TYPE->Ftzpad;
365 gemm =
TYPE->Fgemm; trsm =
TYPE->Ftrsm;
366 send =
TYPE->Cgesd2d; recv =
TYPE->Cgerv2d;
367 bsend =
TYPE->Cgebs2d; brecv =
TYPE->Cgebr2d;
369 if( ( Anp > 0 ) && ( Anq > 0 ) ) A =
Mptr( A, Aii, Ajj, Ald, size );
376 if( ( Anq <= 0 ) || ( Ais1Row && ( ( Arow >= 0 ) && !( FBCAST ) &&
377 ( myrow != Arow ) ) ) )
return;
379 bcst = ( ( !Ais1Row ) || ( Ais1Row && ( Arow >= 0 ) && FBCAST ) );
380 AisRowRep = ( ( Arow < 0 ) || ( nprow == 1 ) );
387 nlast = ( npcol - 1 ) * Anb;
388 n1 =
MAX( nlast, Anb );
390 n1last = n1 - Anb +
MAX( Ainb1, Anb );
393 Alrow =
PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
394 Alcol =
PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
395 rocprev = Alcol; Anpprev = Anp; Bprev = BC; Bdprev = BR;
396 Aprev = A =
Mptr( A, 0, Anq, Ald, size );
399 tmp1 = Na - ( kb =
MIN( mb1, nb1 ) );
400 n1 = ( ( Ais1Col || ( Na - nb1 < nlast ) ) ? n1last : n1 );
401 tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 =
MIN( tmp1, tmp2 ) );
403 n1p =
PB_Cnumroc( tmp2,
MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
405 talpha1 = talpha2 = ( ( Ais1Col || ( mycol == Alcol ) ) ?
411 if( Ais1Col || ( mycol == Alcol ) )
412 { A -= Ald*kbsize; Anq -= kb; Bd =
Mptr( BR, 0, Anq, LDBR, size ); }
413 if( ( Arow < 0 ) || ( myrow == Alrow ) ) { Anp -= kb; }
419 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
421 tmp1 = ( Anpprev - n1pprev ) * size;
423 &kbprev, negone, Aprev+tmp1, &Ald, Bdprev, &LDBR,
424 talpha1, Bprev+tmp1, &LDBC );
429 if( !( Ais1Col ) && ChangeRoc )
431 if( mycol == rocprev )
433 send( ctxt, n1pprev, Nb,
Mptr( Bprev, Anpprev-n1pprev, 0,
434 LDBC, size ), LDBC, myrow, Alcol );
436 else if( mycol == Alcol )
438 recv( ctxt, n1pprev, Nb, work, n1pprev, myrow, rocprev );
439 add( &n1pprev, &Nb, one, work, &n1pprev, one,
Mptr( Bprev,
440 Anpprev-n1pprev, 0, LDBC, size ), &LDBC );
447 if( Ais1Col || ( mycol == Alcol ) )
449 if( AisRowRep || ( myrow == Alrow ) )
453 Ald, size ), &Ald,
Mptr( BC, Anp, 0, LDBC, size ),
455 tadd( &kb, &Nb, one,
Mptr( BC, Anp, 0, LDBC, size ), &LDBC,
456 zero,
Mptr( BR, 0, Anq, LDBR, size ), &LDBR );
461 bsend( ctxt,
COLUMN, &btop, Nb, kb,
Mptr( BR, 0, Anq, LDBR,
464 brecv( ctxt,
COLUMN, &btop, Nb, kb,
Mptr( BR, 0, Anq, LDBR,
465 size ), LDBR, Alrow, mycol );
471 if( !( Ais1Col ) && ( AisRowRep || ( myrow == Alrow ) ) )
473 zero, zero,
Mptr( BC, Anp, 0, LDBC, size ), &LDBC );
478 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
480 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
482 &kbprev, negone, Aprev, &Ald, Bdprev, &LDBR, talpha1,
489 if( Ais1Col || ( mycol == Alcol ) ) { Bdprev = Bd; Aprev = A; }
490 if( AisRowRep || ( myrow == Alrow ) ) { Anpprev -= kb; }
501 if( !( Ais1Row ) && ( Alrow >= 0 ) )
503 mb1 = ( Na > Aimb1 ? Amb : Aimb1 );
507 ChangeRoc = ( nb1 == 0 );
511 if( !( Ais1Col ) && ( Alcol >= 0 ) )
513 nb1 = ( Na > Ainb1 ? Anb : Ainb1 );
515 tmp1 = Na - ( kb =
MIN( mb1, nb1 ) );
516 n1 = ( ( Ais1Col || ( Na-nb1 < nlast ) ) ? n1last : n1 );
517 tmp2 = n1 + nb1 - kb; tmp1 -= ( tmp2 =
MIN( tmp1, tmp2 ) );
518 n1p =
PB_Cnumroc( tmp2,
MAX( 0, tmp1 ), Aimb1, Amb, myrow, Asrc,
527 n1 = (
MAX( npcol, 2 ) - 1 ) * Anb;
529 Aprev = A; Bprev = BC, Bdprev = BR; Anpprev = Anp;
530 mb1 = Aimb1; nb1 = Ainb1; rocprev = Acol;
531 tmp1 = Na - ( kb =
MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
533 n1p =
PB_Cnumroc(
MIN( tmp1, tmp2 ), kb, Aimb1, Amb, myrow, Asrc,
535 talpha1 = talpha2 = ( ( Ais1Col || ( mycol == Acol ) ) ?
545 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
547 &kbprev, negone, Aprev, &Ald, Bdprev, &LDBR, talpha1,
552 if( !( Ais1Col ) && ChangeRoc )
554 if( mycol == rocprev )
556 send( ctxt, n1pprev, Nb, Bprev, LDBC, myrow, Acol );
558 else if( mycol == Acol )
560 recv( ctxt, n1pprev, Nb, work, n1pprev, myrow, rocprev );
561 add( &n1pprev, &Nb, one, work, &n1pprev, one, Bprev,
569 if( Ais1Col || ( mycol == Acol ) )
571 if( AisRowRep || ( myrow == Arow ) )
574 C2F_CHAR( DIAG ), &kb, &Nb, talpha2, A, &Ald, BC,
576 tadd( &kb, &Nb, one, BC, &LDBC, zero, BR, &LDBR );
581 bsend( ctxt,
COLUMN, &btop, Nb, kb, BR, LDBR );
583 brecv( ctxt,
COLUMN, &btop, Nb, kb, BR, LDBR, Arow,
590 if( !( Ais1Col ) && ( AisRowRep || ( myrow == Arow ) ) )
592 zero, zero, BC, &LDBC );
597 if( ( Ais1Col || ( mycol == rocprev ) ) && ( kbprev > 0 ) )
599 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
601 tmp2 = n1pprev * size;
603 &kbprev, negone, Aprev+tmp2, &Ald, Bdprev, &LDBR,
604 talpha1, Bprev+tmp2, &LDBC );
606 Aprev += Ald * kbprev * size; talpha1 = one;
611 if( Ais1Col || ( mycol == Acol ) )
612 { A += Ald*kbsize; Bdprev = Bd = BR; BR += LDBR*kbsize; }
613 if( AisRowRep || ( myrow == Arow ) )
615 Bprev = ( BC += kbsize );
618 Anpprev = ( Anp -= kb );
629 if( !( Ais1Row ) && ( Arow >= 0 ) )
631 mb1 =
MIN( Amb, Na );
635 ChangeRoc = ( nb1 == 0 );
639 if( !( Ais1Col ) && ( Acol >= 0 ) )
641 nb1 =
MIN( Anb, Na );
643 tmp1 = Na - ( kb =
MIN( mb1, nb1 ) ); tmp2 = n1 + nb1 - kb;
644 n1p =
PB_Cnumroc(
MIN( tmp2, tmp1 ), k + kb, Aimb1, Amb, myrow,
654 if( ( Anp <= 0 ) || ( Ais1Col && ( ( Acol >= 0 ) && !( FBCAST ) &&
655 ( mycol != Acol ) ) ) )
return;
657 bcst = ( ( !Ais1Col ) || ( Ais1Col && ( Acol >= 0 ) && FBCAST ) );
658 AisColRep = ( ( Acol < 0 ) || ( npcol == 1 ) );
665 n1 = (
MAX( nprow, 2 ) - 1 ) * Amb;
667 Aprev = A; Bprev = BR, Bdprev = BC; Anpprev = Anq;
668 mb1 = Aimb1; nb1 = Ainb1; rocprev = Arow;
669 tmp1 = Na - ( kb =
MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
671 n1p =
PB_Cnumroc(
MIN( tmp1, tmp2 ), kb, Ainb1, Anb, mycol, Asrc,
673 talpha1 = talpha2 = ( ( Ais1Row || ( myrow == Arow ) ) ?
683 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
685 &kbprev, negone, Bdprev, &LDBC, Aprev, &Ald, talpha1,
690 if( !( Ais1Row ) && ChangeRoc )
692 if( myrow == rocprev )
694 send( ctxt, Nb, n1pprev, Bprev, LDBR, Arow, mycol );
696 else if( myrow == Arow )
698 recv( ctxt, Nb, n1pprev, work, Nb, rocprev, mycol );
699 add( &Nb, &n1pprev, one, work, &Nb, one, Bprev, &LDBR );
706 if( Ais1Row || ( myrow == Arow ) )
708 if( AisColRep || ( mycol == Acol ) )
711 C2F_CHAR( DIAG ), &Nb, &kb, talpha2, A, &Ald, BR,
713 tadd( &Nb, &kb, one, BR, &LDBR, zero, BC, &LDBC );
718 bsend( ctxt,
ROW, &btop, kb, Nb, BC, LDBC );
720 brecv( ctxt,
ROW, &btop, kb, Nb, BC, LDBC, myrow, Acol );
726 if( !( Ais1Row ) && ( AisColRep || ( mycol == Acol ) ) )
728 zero, zero, BR, &LDBR );
733 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
735 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
737 tmp2 = n1pprev * size;
739 &kbprev, negone, Bdprev, &LDBC, Aprev+Ald*tmp2, &Ald,
740 talpha1, Bprev+LDBR*tmp2, &LDBR );
742 Aprev += kbprev * size; talpha1 = one;
747 if( Ais1Row || ( myrow == Arow ) )
748 { A += kbsize; Bdprev = Bd = BC; BC += kbsize; }
749 if( AisColRep || ( mycol == Acol ) )
751 Bprev = ( BR += LDBR * kbsize );
753 Anpprev = ( Anq -= kb );
754 Aprev += Ald * kbsize;
765 if( !( Ais1Col ) && ( Acol >= 0 ) )
767 nb1 =
MIN( Anb, Na );
771 ChangeRoc = ( mb1 == 0 );
775 if( !( Ais1Row ) && ( Arow >= 0 ) )
777 mb1 =
MIN( Amb, Na );
779 tmp1 = Na - ( kb =
MIN( mb1, nb1 ) ); tmp2 = n1 + mb1 - kb;
780 n1p =
PB_Cnumroc(
MIN( tmp2, tmp1 ), k + kb, Ainb1, Anb, mycol,
789 nlast = ( nprow - 1 ) * Amb;
790 n1 =
MAX( nlast, Amb );
792 n1last = n1 - Amb +
MAX( Aimb1, Amb );
795 Alrow =
PB_Cindxg2p( tmp1, Aimb1, Amb, Arow, Arow, nprow );
796 Alcol =
PB_Cindxg2p( tmp1, Ainb1, Anb, Acol, Acol, npcol );
797 rocprev = Alrow; Anpprev = Anq; Bprev = BR; Bdprev = BC;
798 Aprev = A =
Mptr( A, Anp, 0, Ald, size );
801 tmp1 = Na - ( kb =
MIN( mb1, nb1 ) );
802 n1 = ( ( Ais1Row || ( Na-mb1 < nlast ) ) ? n1last : n1 );
803 tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 =
MIN( tmp1, tmp2 ) );
805 n1p =
PB_Cnumroc( tmp2,
MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
807 talpha1 = talpha2 = ( ( Ais1Row || ( myrow == Alrow ) ) ?
813 if( Ais1Row || ( myrow == Alrow ) )
814 { A -= kbsize; Anp -= kb; Bd =
Mptr( BC, Anp, 0, LDBC, size ); }
815 if( ( Acol < 0 ) || ( mycol == Alcol ) ) { Anq -= kb; }
821 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
823 tmp1 = ( Anpprev - n1pprev ) * size;
825 &Nb, &n1pprev, &kbprev, negone, Bdprev,
826 &LDBC, Aprev+Ald*tmp1, &Ald, talpha1,
827 Bprev+LDBR*tmp1, &LDBR );
832 if( !( Ais1Row ) && ChangeRoc )
834 if( myrow == rocprev )
836 send( ctxt, Nb, n1pprev,
Mptr( Bprev, 0, Anpprev-n1pprev,
837 LDBR, size ), LDBR, Alrow, mycol );
839 else if( myrow == Alrow )
841 recv( ctxt, Nb, n1pprev, work, Nb, rocprev, mycol );
842 add( &Nb, &n1pprev, one, work, &Nb, one,
Mptr( Bprev, 0,
843 Anpprev-n1pprev, LDBR, size ), &LDBR );
850 if( Ais1Row || ( myrow == Alrow ) )
852 if( AisColRep || ( mycol == Alcol ) )
856 Ald, size ), &Ald,
Mptr( BR, 0, Anq, LDBR, size ),
858 tadd( &Nb, &kb, one,
Mptr( BR, 0, Anq, LDBR, size ), &LDBR,
859 zero,
Mptr( BC, Anp, 0, LDBC, size ), &LDBC );
864 bsend( ctxt,
ROW, &btop, kb, Nb,
Mptr( BC, Anp, 0, LDBC,
867 brecv( ctxt,
ROW, &btop, kb, Nb,
Mptr( BC, Anp, 0, LDBC,
868 size ), LDBC, myrow, Alcol );
874 if( !( Ais1Row ) && ( AisColRep || ( mycol == Alcol ) ) )
876 zero, zero,
Mptr( BR, 0, Anq, LDBR, size ), &LDBR );
881 if( ( Ais1Row || ( myrow == rocprev ) ) && ( kbprev > 0 ) )
883 if( ( tmp1 = Anpprev - n1pprev ) > 0 )
885 &kbprev, negone, Bdprev, &LDBC, Aprev, &Ald, talpha1,
892 if( Ais1Row || ( myrow == Alrow ) ) { Bdprev = Bd; Aprev = A; }
893 if( AisColRep || ( mycol == Alcol ) ) { Anpprev -= kb; }
904 if( !( Ais1Col ) && ( Alcol >= 0 ) )
906 nb1 = ( Na > Ainb1 ? Anb : Ainb1 );
910 ChangeRoc = ( mb1 == 0 );
914 if( !( Ais1Row ) && ( Alrow >= 0 ) )
916 mb1 = ( Na > Aimb1 ? Amb : Aimb1 );
918 tmp1 = Na - ( kb =
MIN( mb1, nb1 ) );
919 n1 = ( ( Ais1Row || ( Na-mb1 < nlast ) ) ? n1last : n1 );
920 tmp2 = n1 + mb1 - kb; tmp1 -= ( tmp2 =
MIN( tmp1, tmp2 ) );
921 n1p =
PB_Cnumroc( tmp2,
MAX( 0, tmp1 ), Ainb1, Anb, mycol, Asrc,
926 if( work ) free( work );