14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * A,
int IA,
int JA,
int * DESCA,
char * AROC,
22 char * ALPHA,
char * B,
int IB,
int JB,
int * DESCB,
26 ALPHA, B, IB, JB, DESCB, BROC )
30 char * ALPHA, * AROC, * BROC, * DIRECA;
31 int IA, IB, JA, JB, M, N;
195 int Afwd, Bbufld, Bcol, Bcurcol, Bcurrow, Bii, Bimb, Bimb1, Binb,
196 Binb1, BisRow, Bjj, Bld, Bm, Bmb, Bmp, Bn, Bnb, Bnnxt, BnnxtL,
197 Bnpre, Bnq, Brow, WAfr, ctxt, kb, mycol, mydist, mydistnb,
198 myrow, nlen, npcol, nprow, offset, size, srcdist, stride,
206 char * Bptr = NULL, * Bbuf = NULL, * Bbufptr = NULL, * WA = NULL;
214 if( ( M <= 0 ) || ( N <= 0 ) )
return;
222 if(
Mupcase( AROC[0] ) ==
Mupcase( BROC[0] ) ) { Bm = M; Bn = N; }
223 else { Bm = N; Bn = M; }
227 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
229 Bimb = DESCB[
IMB_]; Binb = DESCB[
INB_]; Bmb = DESCB[
MB_]; Bnb = DESCB[
NB_];
231 Bmp =
PB_Cnumroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
233 Bnq =
PB_Cnumroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
235 if( ( Bmp > 0 ) && ( Bnq > 0 ) ) Bptr =
Mptr( B, Bii, Bjj, Bld, size );
242 if( Afwd ) { Bcurrow = Brow; }
243 else { Bcurrow =
PB_Cindxg2p( Bm-1, Bimb1, Bmb, Brow, Brow, nprow ); }
244 PB_Cdescset( Bd0, Bm, Bn, Bm, Binb1, Bmb, Bnb, Bcurrow, Bcol, ctxt, Bld );
248 PB_CInV(
TYPE,
NOCONJG, BROC, Bm, Bn, Bd0, Bm, A, IA, JA, DESCA, AROC,
253 if( ( Brow == -1 ) || ( nprow == 1 ) )
259 TYPE->Fmmadd( &Bm, &Bnq, one, WA, &WAd[
LLD_], ALPHA, Bptr, &Bld );
260 if( WAfr ) free( WA );
264 if( !(
PB_Cspan( Bm, 0, Bimb1, Bmb, Brow, nprow ) ) )
269 if( ( myrow == Brow ) && ( Bnq > 0 ) )
270 TYPE->Fmmadd( &Bm, &Bnq, one, WA, &WAd[
LLD_], ALPHA, Bptr, &Bld );
271 if( WAfr ) free( WA );
283 if( ( Bmp > 0 ) && ( Bnq > 0 ) )
288 Bnpre =
PB_Cnpreroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
289 Bnnxt =
PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
298 Bbufptr = Bbuf =
PB_Cmalloc( nlen * Bnq * size );
300 TYPE->Cgerv2d( ctxt, nlen, Bnq, Bbuf, Bbufld,
MModSub1( myrow,
322 add =
TYPE->Fmmadd; shft =
TYPE->Frshft;
323 mydistnb = ( nprow -
MModSub( myrow, Brow, nprow ) - 1 );
324 stride = ( mydistnb *= Bmb ) * size;
328 kb =
MIN( kb, nlen );
329 add( &kb, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
332 shft( &nlen, &Bnq, &offset, Bbufptr, &Bbufld );
341 TYPE->Cgesd2d( ctxt, Bnnxt, Bnq, Bbuf, Bbufld,
MModAdd1( myrow,
350 TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
356 if( Bnpre > 0 ) free( Bbuf );
358 if( WAfr ) free( WA );
362 if( ( Bmp > 0 ) && ( Bnq > 0 ) )
367 Bnnxt =
PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, myrow, Brow, nprow );
368 BnnxtL =
PB_Cnnxtroc( Bm, 0, Bimb1, Bmb, Bcurrow, Brow, nprow );
369 Bnnxt =
MModSub( Bnnxt, BnnxtL, Bm );
370 Bnpre = ( nlen = Bm - Bnnxt ) - Bmp;
378 Bbufptr = Bbuf =
PB_Cmalloc( nlen * Bnq * size );
380 TYPE->Cgerv2d( ctxt, nlen, Bnq, Bbuf, Bbufld,
MModAdd1( myrow,
400 add =
TYPE->Fmmadd; shft =
TYPE->Frshft;
401 mydist =
MModSub( Bcurrow, myrow, nprow );
402 srcdist =
MModSub( Bcurrow, Brow, nprow );
403 stridenb = ( nprow - mydist - 1 ) * Bmb;
405 if( mydist < srcdist )
407 tmp = ( Bimb1 + ( srcdist - mydist - 1 ) * Bmb );
408 Bbufptr += tmp * size;
412 else if( mydist == srcdist )
418 Bbufptr += stridenb * size;
425 kb =
MIN( kb, nlen );
426 add( &kb, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
429 shft( &nlen, &Bnq, &offset, Bbufptr, &Bbufld );
431 Bbufptr += stridenb*size;
438 TYPE->Cgesd2d( ctxt, Bnpre, Bnq, Bbuf, Bbufld,
MModSub1( myrow,
447 TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
453 if( Bnnxt > 0 ) free( Bbuf );
455 if( WAfr ) free( WA );
463 if( Afwd ) { Bcurcol = Bcol; }
464 else { Bcurcol =
PB_Cindxg2p( Bn-1, Binb1, Bnb, Bcol, Bcol, npcol ); }
465 PB_Cdescset( Bd0, Bm, Bn, Bimb1, Bn, Bmb, Bnb, Brow, Bcurcol, ctxt, Bld );
469 PB_CInV(
TYPE,
NOCONJG, BROC, Bm, Bn, Bd0, Bn, A, IA, JA, DESCA, AROC,
474 if( ( Bcol == -1 ) || ( npcol == 1 ) )
480 TYPE->Fmmadd( &Bmp, &Bn, one, WA, &WAd[
LLD_], ALPHA, Bptr, &Bld );
481 if( WAfr ) free( WA );
485 if( !(
PB_Cspan( Bn, 0, Binb1, Bnb, Bcol, npcol ) ) )
490 if( ( mycol == Bcol ) && ( Bmp > 0 ) )
491 TYPE->Fmmadd( &Bmp, &Bn, one, WA, &WAd[
LLD_], ALPHA, Bptr, &Bld );
492 if( WAfr ) free( WA );
505 if( ( Bmp > 0 ) && ( Bnq > 0 ) )
510 Bnpre =
PB_Cnpreroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
511 Bnnxt =
PB_Cnnxtroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
520 Bbufptr = Bbuf =
PB_Cmalloc( Bmp * nlen * size );
522 TYPE->Cgerv2d( ctxt, Bmp, nlen, Bbuf, Bbufld, myrow,
545 add =
TYPE->Fmmadd; shft =
TYPE->Fcshft;
546 mydistnb = ( npcol -
MModSub( mycol, Bcol, npcol ) - 1 );
547 stride = ( mydistnb *= Bnb ) * Bbufld * size;
551 kb =
MIN( kb, nlen );
552 add( &Bmp, &kb, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
555 shft( &Bmp, &nlen, &offset, Bbufptr, &Bbufld );
564 TYPE->Cgesd2d( ctxt, Bmp, Bnnxt, Bbuf, Bbufld, myrow,
573 TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
579 if( Bnpre > 0 ) free( Bbuf );
581 if( WAfr ) free( WA );
585 if( ( Bmp > 0 ) && ( Bnq > 0 ) )
590 Bnnxt =
PB_Cnnxtroc( Bn, 0, Binb1, Bnb, mycol, Bcol, npcol );
591 BnnxtL =
PB_Cnnxtroc( Bn, 0, Binb1, Bnb, Bcurcol, Bcol, npcol );
592 Bnnxt =
MModSub( Bnnxt, BnnxtL, Bn );
593 Bnpre = ( nlen = Bn - Bnnxt ) - Bnq;
601 Bbufptr = Bbuf =
PB_Cmalloc( Bmp * nlen * size );
603 TYPE->Cgerv2d( ctxt, Bmp, nlen, Bbuf, Bbufld, myrow,
624 add =
TYPE->Fmmadd; shft =
TYPE->Fcshft;
625 mydist =
MModSub( Bcurcol, mycol, npcol );
626 srcdist =
MModSub( Bcurcol, Bcol, npcol );
627 stridenb = ( npcol - mydist - 1 ) * Bnb;
629 if( mydist < srcdist )
631 tmp = ( Binb1 + ( srcdist - mydist - 1 ) * Bnb );
632 Bbufptr += tmp * Bbufld * size;
636 else if( mydist == srcdist )
642 Bbufptr += stridenb * Bbufld * size;
649 kb =
MIN( kb, nlen );
650 add( &Bmp, &kb, one, Bbufptr, &Bbufld, ALPHA, Bptr, &Bld );
653 shft( &Bmp, &nlen, &offset, Bbufptr, &Bbufld );
654 Bptr += kb * Bld * size;
655 Bbufptr += stridenb * Bbufld * size;
662 TYPE->Cgesd2d( ctxt, Bmp, Bnpre, Bbuf, Bbufld, myrow,
671 TYPE->Fmmadd( &Bmp, &Bnq, one, Bbufptr, &Bbufld, ALPHA, Bptr,
678 if( Bnnxt > 0 ) free( Bbuf );
680 if( WAfr ) free( WA );