14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
22 char * A,
int IA,
int JA,
int * DESCA,
char * AROC,
24 char * B,
int IB,
int JB,
int * DESCB,
char * BROC )
26 void PB_CpaxpbyDN(
TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC,
27 BETA, B, IB, JB, DESCB, BROC )
31 char * AROC, * BROC, * CONJUG;
32 int IA, IB, JA, JB, M, N;
218 char scope, * top, * zero;
219 int Acol, Aii, Ainb1D, AisR, AisRow, Ajj, Ald, AmyprocD, AmyprocR,
220 AnD, AnbD, AnpD, AnprocsD, AprocD, AprocR, Aroc, Arow, Bcol,
221 Bii, BisR, BisRow, Bjj, Bld, Bm, BmyprocD, BmyprocR, Bn,
222 BnprocsD, BprocR, Broc, Brow, RRorCC, ctxt, izero=0, k, kbb,
223 kk, kn, ktmp, mycol, mydist, myproc, myrow, npcol, nprow, p,
242 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
244 if( ( AisRow = (
Mupcase( AROC[0] ) ==
CROW ) ) != 0 )
246 AnD = N; AnbD = DESCA[
NB_]; Ald = DESCA[
LLD_];
247 AprocD = Acol; AprocR = Arow;
248 AmyprocD = mycol; AmyprocR = myrow; AnprocsD = npcol;
249 AisR = ( ( Arow == -1 ) || ( nprow == 1 ) );
254 AnD = M; AnbD = DESCA[
MB_]; Ald = DESCA[
LLD_];
255 AprocD = Arow; AprocR = Acol;
256 AmyprocD = myrow; AmyprocR = mycol; AnprocsD = nprow;
257 AisR = ( ( Acol == -1 ) || ( npcol == 1 ) );
263 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
265 if( ( BisRow = (
Mupcase( BROC[0] ) ==
CROW ) ) != 0 )
268 BmyprocD = mycol; BnprocsD = npcol;
269 BprocR = Brow; BmyprocR = myrow;
270 BisR = ( ( BprocR == -1 ) || ( nprow == 1 ) );
275 BmyprocD = myrow; BnprocsD = nprow;
276 BprocR = Bcol; BmyprocR = mycol;
277 BisR = ( ( BprocR == -1 ) || ( npcol == 1 ) );
282 RRorCC = ( ( AisRow && BisRow ) || ( !( AisRow ) && !( BisRow ) ) );
297 if( AisR ) { AprocR = ( ( RRorCC ) ? BprocR : 0 ); }
302 if( ( AmyprocR == AprocR ) || ( BmyprocR == BprocR ) )
311 else add =
TYPE->Fmmadd;
314 AnpD =
PB_Cnumroc( AnD, 0, Ainb1D, AnbD, AmyprocD, AprocD,
319 if( AprocR == BprocR )
329 if( BisRow ) { kk = Ajj; ktmp = JB + N; kn = JB + Ainb1D; }
330 else { kk = Aii; ktmp = IB + M; kn = IB + Ainb1D; }
332 if( AmyprocD == Aroc )
335 add( &M, &Ainb1D, ALPHA,
Mptr( A, Aii, Ajj, Ald, size ),
336 &Ald, BETA,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
338 add( &Ainb1D, &N, ALPHA,
Mptr( A, Aii, Ajj, Ald, size ),
339 &Ald, BETA,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
346 &izero, zero, zero,
Mptr( B, Bii, Bjj, Bld, size ),
350 &izero, zero, zero,
Mptr( B, Bii, Bjj, Bld, size ),
355 for( k = kn; k < ktmp; k += AnbD )
357 kbb = ktmp - k; kbb =
MIN( kbb, AnbD );
359 if( AmyprocD == Aroc )
362 add( &M, &kbb, ALPHA,
Mptr( A, Aii, kk, Ald, size ),
363 &Ald, BETA,
Mptr( B, Bii, k, Bld, size ),
366 add( &kbb, &N, ALPHA,
Mptr( A, kk, Ajj, Ald, size ),
367 &Ald, BETA,
Mptr( B, k, Bjj, Bld, size ),
375 &izero, zero, zero,
Mptr( B, Bii, k, Bld,
379 &izero, zero, zero,
Mptr( B, k, Bjj, Bld,
392 zero, zero,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
399 TYPE->Cgsum2d( ctxt, &scope, top, M, N,
Mptr( B, Bii, Bjj, Bld,
400 size ), Bld, -1, 0 );
407 if( AmyprocR == AprocR )
416 TYPE->Cgesd2d( ctxt, M, AnpD,
Mptr( A, Aii, Ajj, Ald,
417 size ), Ald, BprocR, BmyprocD );
419 TYPE->Cgesd2d( ctxt, AnpD, N,
Mptr( A, Aii, Ajj, Ald,
420 size ), Ald, BmyprocD, BprocR );
424 if( BmyprocR == BprocR )
437 TYPE->Cgerv2d( ctxt, M, AnpD, buf, M, AprocR,
445 TYPE->Cgerv2d( ctxt, AnpD, N, buf, AnpD, AmyprocD,
451 if( AmyprocD == Aroc )
454 add( &M, &Ainb1D, ALPHA, buf, &M, BETA,
Mptr( B,
455 Bii, Bjj, Bld, size ), &Bld );
457 add( &Ainb1D, &N, ALPHA, buf, &AnpD, BETA,
Mptr( B,
458 Bii, Bjj, Bld, size ), &Bld );
465 &Ainb1D, &izero, zero, zero,
Mptr( B, Bii, Bjj,
469 &N, &izero, zero, zero,
Mptr( B, Bii, Bjj, Bld,
474 for( k = kn; k < ktmp; k += AnbD )
476 kbb = ktmp - k; kbb =
MIN( kbb, AnbD );
478 if( AmyprocD == Aroc )
481 add( &M, &kbb, ALPHA,
Mptr( buf, 0, kk, M, size ),
482 &M, BETA,
Mptr( B, Bii, k, Bld, size ),
485 add( &kbb, &N, ALPHA,
Mptr( buf, kk, 0, AnpD,
486 size ), &AnpD, BETA,
Mptr( B, k, Bjj, Bld,
494 &kbb, &izero, zero, zero,
Mptr( B, Bii, k,
498 &N, &izero, zero, zero,
Mptr( B, k, Bjj, Bld,
503 if( buf ) free( buf );
512 zero, zero,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
519 TYPE->Cgsum2d( ctxt, &scope, top, M, N,
Mptr( B, Bii, Bjj,
520 Bld, size ), Bld, -1, 0 );
531 else add =
TYPE->Fmmtadd;
535 if( BisRow ) { ktmp = JB + M; kn = JB + Ainb1D; }
536 else { ktmp = IB + N; kn = IB + Ainb1D; }
541 for( p = 0; p < AnprocsD; p++ )
543 mydist =
MModSub( p, AprocD, AnprocsD );
544 myproc =
MModAdd( AprocD, mydist, AnprocsD );
546 if( ( BprocR == p ) && ( AprocR == Broc ) )
553 AnpD =
PB_Cnumroc( AnD, 0, Ainb1D, AnbD, p, AprocD,
558 kk = ( BisRow ? Aii : Ajj );
562 if( BmyprocD == Broc )
565 add( &M, &Ainb1D, ALPHA,
Mptr( A, Aii, Ajj,
566 Ald, size ), &Ald, BETA,
Mptr( B, Bii,
567 Bjj, Bld, size ), &Bld );
569 add( &Ainb1D, &N, ALPHA,
Mptr( A, Aii, Ajj,
570 Ald, size ), &Ald, BETA,
Mptr( B, Bii,
571 Bjj, Bld, size ), &Bld );
578 &Ainb1D, &izero, zero, zero,
Mptr( B, Bii,
579 Bjj, Bld, size ), &Bld );
582 &Ainb1D, &M, &izero, zero, zero,
Mptr( B,
583 Bii, Bjj, Bld, size ), &Bld );
588 for( k = kn; k < ktmp; k += AnbD )
590 kbb = ktmp - k; kbb =
MIN( kbb, AnbD );
593 if( BmyprocD == Broc )
596 add( &M, &kbb, ALPHA,
Mptr( A, Aii, kk, Ald,
597 size ), &Ald, BETA,
Mptr( B, k, Bjj,
600 add( &kbb, &N, ALPHA,
Mptr( A, kk, Ajj, Ald,
601 size ), &Ald, BETA,
Mptr( B, Bii, k,
609 &N, &kbb, &izero, zero, zero,
Mptr( B,
610 Bii, k, Bld, size ), &Bld );
613 &kbb, &M, &izero, zero, zero,
Mptr( B,
614 k, Bjj, Bld, size ), &Bld );
627 if( ( AmyprocR == AprocR ) && ( AmyprocD == p ) )
629 AnpD =
PB_Cnumroc( AnD, 0, Ainb1D, AnbD, p, AprocD,
634 TYPE->Cgesd2d( ctxt, M, AnpD,
Mptr( A, Aii, Ajj, Ald,
635 size ), Ald, Broc, BprocR );
637 TYPE->Cgesd2d( ctxt, AnpD, N,
Mptr( A, Aii, Ajj, Ald,
638 size ), Ald, BprocR, Broc );
642 if( BmyprocR == BprocR )
644 AnpD =
PB_Cnumroc( AnD, 0, Ainb1D, AnbD, p, AprocD,
651 if( BmyprocD == Broc )
656 TYPE->Cgerv2d( ctxt, M, AnpD, buf, M, AprocR, p );
661 TYPE->Cgerv2d( ctxt, AnpD, N, buf, AnpD, p,
668 if( BmyprocD == Broc )
671 add( &M, &Ainb1D, ALPHA, buf, &M, BETA,
672 Mptr( B, Bii, Bjj, Bld, size ), &Bld );
674 add( &Ainb1D, &N, ALPHA, buf, &AnpD, BETA,
675 Mptr( B, Bii, Bjj, Bld, size ), &Bld );
682 &Ainb1D, &izero, zero, zero,
Mptr( B, Bii,
683 Bjj, Bld, size ), &Bld );
686 &Ainb1D, &M, &izero, zero, zero,
Mptr( B,
687 Bii, Bjj, Bld, size ), &Bld );
692 for( k = kn; k < ktmp; k += AnbD )
694 kbb = ktmp - k; kbb =
MIN( kbb, AnbD );
697 if( BmyprocD == Broc )
700 add( &M, &kbb, ALPHA,
Mptr( buf, 0, kk, M,
701 size ), &M, BETA,
Mptr( B, k, Bjj,
704 add( &kbb, &N, ALPHA,
Mptr( buf, kk, 0,
705 AnpD, size ), &AnpD, BETA,
Mptr( B,
706 Bii, k, Bld, size ), &Bld );
713 &N, &kbb, &izero, zero, zero,
Mptr( B,
714 Bii, k, Bld, size ), &Bld );
717 &kbb, &M, &izero, zero, zero,
Mptr( B,
718 k, Bjj, Bld, size ), &Bld );
723 if( ( BmyprocD == Broc ) && ( buf ) ) free( buf );
730 if( BmyprocR == BprocR )
737 TYPE->Cgsum2d( ctxt, &scope, top, N, M,
Mptr( B, Bii, Bjj, Bld,
738 size ), Bld, -1, 0 );
750 if( AisRow ) { Bm = M; Bn = N; }
751 else { Bm = N; Bn = M; }
753 if( BmyprocR == BprocR )
758 size ), Bld, BprocR, BmyprocD );
762 if( AisRow ) { Bm = N; Bn = M; }
763 else { Bm = M; Bn = N; }
765 if( BmyprocR == BprocR )
766 TYPE->Cgebs2d( ctxt,
ROW, top, Bm, Bn,
Mptr( B, Bii, Bjj, Bld,
769 TYPE->Cgebr2d( ctxt,
ROW, top, Bm, Bn,
Mptr( B, Bii, Bjj, Bld,
770 size ), Bld, BmyprocD, BprocR );
780 if( AisR || ( AmyprocR == AprocR ) )
786 else add =
TYPE->Fmmadd;
791 else add =
TYPE->Fmmtadd;
795 AnpD =
PB_Cnumroc( AnD, 0, Ainb1D, AnbD, AmyprocD, AprocD, AnprocsD );
799 kk = ( AisRow ? Ajj : Aii );
801 if( BisRow ) { ktmp = JB + ( RRorCC ? N : M ); kn = JB + Ainb1D; }
802 else { ktmp = IB + ( RRorCC ? M : N ); kn = IB + Ainb1D; }
804 if( AmyprocD == Aroc )
807 add( &M, &Ainb1D, ALPHA,
Mptr( A, Aii, Ajj, Ald, size ), &Ald,
808 BETA,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
810 add( &Ainb1D, &N, ALPHA,
Mptr( A, Aii, Ajj, Ald, size ), &Ald,
811 BETA,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
818 if( AisRow ) { Bm = M; Bn = Ainb1D; }
819 else { Bm = Ainb1D; Bn = N; }
823 if( AisRow ) { Bm = Ainb1D; Bn = M; }
824 else { Bm = N; Bn = Ainb1D; }
827 zero, zero,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
831 for( k = kn; k < ktmp; k += AnbD )
833 kbb = ktmp - k; kbb =
MIN( kbb, AnbD );
835 if( BisRow ) { buf =
Mptr( B, Bii, k, Bld, size ); }
836 else { buf =
Mptr( B, k, Bjj, Bld, size ); }
838 if( AmyprocD == Aroc )
841 add( &M, &kbb, ALPHA,
Mptr( A, Aii, kk, Ald, size ), &Ald,
844 add( &kbb, &N, ALPHA,
Mptr( A, kk, Ajj, Ald, size ), &Ald,
852 if( AisRow ) { Bm = M; Bn = kbb; }
853 else { Bm = kbb; Bn = N; }
857 if( AisRow ) { Bm = kbb; Bn = M; }
858 else { Bm = N; Bn = kbb; }
861 zero, zero, buf, &Bld );
870 zero,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
873 zero,
Mptr( B, Bii, Bjj, Bld, size ), &Bld );
882 TYPE->Cgsum2d( ctxt, &scope, top, M, N,
Mptr( B, Bii, Bjj, Bld,
883 size ), Bld, -1, 0 );
885 TYPE->Cgsum2d( ctxt, &scope, top, N, M,
Mptr( B, Bii, Bjj, Bld,
886 size ), Bld, -1, 0 );
896 if( RRorCC ) { Bm = M; Bn = N; }
897 else { Bm = N; Bn = M; }
902 if( AmyprocR == AprocR )
907 size ), Bld, AprocR, AmyprocD );
912 if( AmyprocR == AprocR )
913 TYPE->Cgebs2d( ctxt,
ROW, top, Bm, Bn,
Mptr( B, Bii, Bjj, Bld,
916 TYPE->Cgebr2d( ctxt,
ROW, top, Bm, Bn,
Mptr( B, Bii, Bjj, Bld,
917 size ), Bld, AmyprocD, AprocR );