14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * SIDE,
char * UPLO,
int M,
int N,
char * ALPHA,
22 char * A,
int IA,
int JA,
int * DESCA,
char * B,
23 int IB,
int JB,
int * DESCB,
char * BETA,
char * C,
24 int IC,
int JC,
int * DESCC )
26 void PB_CpsymmAB(
TYPE, DIRECAB, CONJUG, SIDE, UPLO, M, N, ALPHA, A, IA,
27 JA, DESCA, B, IB, JB, DESCB, BETA, C, IC, JC, DESCC )
31 char * CONJUG, * DIRECAB, * SIDE, * UPLO;
32 int IA, IB, IC, JA, JB, JC, M, N;
38 int * DESCA, * DESCB, * DESCC;
282 char GatherDir, ScatterDir, * one, top, tran, * zero;
283 int Afr, An, Bcol, Bcurcol, Bcurimb1, Bcurinb1, Bcurrow, Bfr, Bii,
284 Bimb, Bimb1, Binb, Binb1, Bjj, Bld, Bmb, Bmp, Bmp0, Bnb, Bnq,
285 Bnq0, Brow, Ccol, Ccurcol, Ccurimb1, Ccurinb1, Ccurrow, Cii,
286 Cimb, Cimb1, Cinb, Cinb1, Cjj, Cld, Cmb, Cmp, Cmp0, Cnb, Cnq,
287 Cnq0, Crow, WABfr, WACfr, WBCfr, WBCsum, conjg, ctxt, fwd, k,
288 kb, kbb, kend, kstart, kstep, ktmp, lside, mycol, myrow,
289 npcol, nprow, size, upper;
297 char * Aptr = NULL, * Bptr = NULL, * Bptr0 = NULL, * Cptr0 = NULL,
298 * WAB = NULL, * WAC = NULL, * WBC = NULL;
312 An = ( ( lside = (
Mupcase( SIDE[0] ) ==
CLEFT ) ) ? M : N );
317 gemm =
TYPE->Fgemm; gsum2d =
TYPE->Cgsum2d;
324 kstart = 0; kend = ( ( An - 1 ) / kb + 1 ) * kb; kstep = kb;
329 kstart = ( ( An - 1 ) / kb ) * kb; kend = kstep = -kb;
335 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
337 Bimb = DESCB[
IMB_]; Binb = DESCB[
INB_];
338 Bmb = DESCB[
MB_ ]; Bnb = DESCB[
NB_ ]; Bld = DESCB[
LLD_];
340 Bmp0 =
PB_Cnumroc( M, 0, Bimb1, Bmb, myrow, Brow, nprow );
342 Bnq0 =
PB_Cnumroc( N, 0, Binb1, Bnb, mycol, Bcol, npcol );
343 if( ( Bmp0 > 0 ) && ( Bnq0 > 0 ) ) Bptr0 =
Mptr( B, Bii, Bjj, Bld, size );
345 PB_Cinfog2l( IC, JC, DESCC, nprow, npcol, myrow, mycol, &Cii, &Cjj,
347 Cimb = DESCC[
IMB_]; Cinb = DESCC[
INB_];
348 Cmb = DESCC[
MB_ ]; Cnb = DESCC[
NB_ ]; Cld = DESCC[
LLD_];
350 Cmp0 =
PB_Cnumroc( M, 0, Cimb1, Cmb, myrow, Crow, nprow );
352 Cnq0 =
PB_Cnumroc( N, 0, Cinb1, Cnb, mycol, Ccol, npcol );
353 if( ( Cmp0 > 0 ) && ( Cnq0 > 0 ) ) Cptr0 =
Mptr( C, Cii, Cjj, Cld, size );
361 for( k = kstart; k != kend; k += kstep )
363 kbb = An - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
368 DESCA,
COLUMN, &Aptr, DBUFA, &Afr );
373 PB_Cdescset( Cd0, ktmp, N, Cimb1, Cinb1, Cmb, Cnb, Crow, Ccol,
375 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ktmp, N, Cd0, kbb, Aptr, 0, 0,
376 DBUFA,
COLUMN, &WAC, WACd, &WACfr );
382 zero, WAC, k, 0, WACd );
385 zero, WAC, k+1, 0, WACd );
390 ROW, &Bptr, DBUFB, &Bfr );
394 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, N, Cd0, kbb, Bptr, 0, 0, DBUFB,
395 ROW, &WBC, WBCd, &WBCfr );
399 Cmp =
PB_Cnumroc( ktmp, 0, Cimb1, Cmb, myrow, Crow, nprow );
400 if( ( Cmp > 0 ) && ( Cnq0 > 0 ) )
402 ALPHA, WAC, &WACd[
LLD_], WBC, &WBCd[
LLD_], one, Cptr0,
404 if( WBCfr ) free( WBC );
405 if( Bfr ) free( Bptr );
410 PB_Cdescset( Bd0, ktmp, N, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol,
412 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ktmp, N, Bd0, kbb, WAC, 0, 0, WACd,
413 COLUMN, &WAB, WABd, &WABfr );
417 PB_Cplapad(
TYPE,
LOWER,
NOCONJG, kbb, kbb, zero, zero, WAB, k, 0,
422 PB_COutV(
TYPE,
ROW,
INIT, ktmp, N, Bd0, kbb, &WBC, WBCd, &WBCfr,
424 Bmp =
PB_Cnumroc( ktmp, 0, Bimb1, Bmb, myrow, Brow, nprow );
425 if( ( Bnq0 > 0 ) && ( Bmp > 0 ) )
427 ALPHA, WAB, &WABd[
LLD_], Bptr0, &Bld, zero, WBC,
429 if( WABfr ) free( WAB );
430 if( WACfr ) free( WAC );
431 if( Afr ) free( Aptr );
436 Cmb, Crow, Crow, nprow );
438 gsum2d( ctxt,
COLUMN, &top, kbb, Bnq0, WBC, WBCd[
LLD_],
439 WBCd[
RSRC_], mycol );
444 PB_CScatterV(
TYPE, &ScatterDir, kbb, N, WBC, 0, 0, WBCd,
ROW, one,
445 C, IC+k, JC, DESCC,
ROW );
446 if( WBCfr ) free( WBC );
451 for( k = kstart; k != kend; k += kstep )
453 ktmp = An - k; kbb =
MIN( ktmp, kb );
458 DESCA,
COLUMN, &Aptr, DBUFA, &Afr );
463 Ccurrow =
PB_Cindxg2p( k, Cimb1, Cmb, Crow, Crow, nprow );
464 PB_Cdescset( Cd0, ktmp, N, Ccurimb1, Cinb1, Cmb, Cnb, Ccurrow,
466 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ktmp, N, Cd0, kbb, Aptr, 0, 0,
467 DBUFA,
COLUMN, &WAC, WACd, &WACfr );
481 ROW, &Bptr, DBUFB, &Bfr );
485 PB_CInV(
TYPE,
NOCONJG,
ROW, ktmp, N, Cd0, kbb, Bptr, 0, 0, DBUFB,
486 ROW, &WBC, WBCd, &WBCfr );
490 Cmp =
PB_Cnumroc( ktmp, k, Cimb1, Cmb, myrow, Crow, nprow );
491 if( ( Cmp > 0 ) && ( Cnq0 > 0 ) )
493 ALPHA, WAC, &WACd[
LLD_], WBC, &WBCd[
LLD_], one,
494 Mptr( Cptr0, Cmp0-Cmp, 0, Cld, size ), &Cld );
495 if( WBCfr ) free( WBC );
496 if( Bfr ) free( Bptr );
502 Bcurrow =
PB_Cindxg2p( k, Bimb1, Bmb, Brow, Brow, nprow );
503 PB_Cdescset( Bd0, ktmp, N, Bcurimb1, Binb1, Bmb, Bnb, Bcurrow,
505 PB_CInV(
TYPE,
NOCONJG,
COLUMN, ktmp, N, Bd0, kbb, WAC, 0, 0, WACd,
506 COLUMN, &WAB, WABd, &WABfr );
510 PB_Cplapad(
TYPE,
UPPER,
NOCONJG, kbb, kbb, zero, zero, WAB, 0, 0,
515 PB_COutV(
TYPE,
ROW,
INIT, ktmp, N, Bd0, kbb, &WBC, WBCd, &WBCfr,
517 Bmp =
PB_Cnumroc( ktmp, k, Bimb1, Bmb, myrow, Brow, nprow );
518 if( ( Bnq0 > 0 ) && ( Bmp > 0 ) )
520 ALPHA, WAB, &WABd[
LLD_],
Mptr( Bptr0, Bmp0-Bmp, 0, Bld,
521 size ), &Bld, zero, WBC, &WBCd[
LLD_] );
522 if( WABfr ) free( WAB );
523 if( WACfr ) free( WAC );
524 if( Afr ) free( Aptr );
529 Cmb, Crow, Crow, nprow );
531 gsum2d( ctxt,
COLUMN, &top, kbb, Bnq0, WBC, WBCd[
LLD_],
532 WBCd[
RSRC_], mycol );
537 PB_CScatterV(
TYPE, &ScatterDir, kbb, N, WBC, 0, 0, WBCd,
ROW, one,
538 C, IC+k, JC, DESCC,
ROW );
539 if( WBCfr ) free( WBC );
549 for( k = kstart; k != kend; k += kstep )
551 ktmp = An - k; kbb =
MIN( ktmp, kb );
556 DESCA,
ROW, &Aptr, DBUFA, &Afr );
561 Ccurcol =
PB_Cindxg2p( k, Cinb1, Cnb, Ccol, Ccol, npcol );
562 PB_Cdescset( Cd0, M, ktmp, Cimb1, Ccurinb1, Cmb, Cnb, Crow, Ccurcol,
564 PB_CInV(
TYPE,
NOCONJG,
ROW, M, ktmp, Cd0, kbb, Aptr, 0, 0, DBUFA,
565 ROW, &WAC, WACd, &WACfr );
571 zero, WAC, 0, 0, WACd );
574 zero, WAC, 1, 0, WACd );
579 COLUMN, &Bptr, DBUFB, &Bfr );
583 PB_CInV(
TYPE,
NOCONJG,
COLUMN, M, ktmp, Cd0, kbb, Bptr, 0, 0,
584 DBUFB,
COLUMN, &WBC, WBCd, &WBCfr );
588 Cnq =
PB_Cnumroc( ktmp, k, Cinb1, Cnb, mycol, Ccol, npcol );
589 if( ( Cmp0 > 0 ) && ( Cnq > 0 ) )
591 ALPHA, WBC, &WBCd[
LLD_], WAC, &WACd[
LLD_], one,
592 Mptr( Cptr0, 0, Cnq0-Cnq, Cld, size ), &Cld );
593 if( WBCfr ) free( WBC );
594 if( Bfr ) free( Bptr );
600 Bcurcol =
PB_Cindxg2p( k, Binb1, Bnb, Bcol, Bcol, npcol );
601 PB_Cdescset( Bd0, M, ktmp, Bimb1, Bcurinb1, Bmb, Bnb, Brow, Bcurcol,
603 PB_CInV(
TYPE,
NOCONJG,
ROW, M, ktmp, Bd0, kbb, WAC, 0, 0, WACd,
604 ROW, &WAB, WABd, &WABfr );
608 PB_Cplapad(
TYPE,
LOWER,
NOCONJG, kbb, kbb, zero, zero, WAB, 0, 0,
615 Bnq =
PB_Cnumroc( ktmp, k, Binb1, Bnb, mycol, Bcol, npcol );
616 if( ( Bmp0 > 0 ) && ( Bnq > 0 ) )
618 ALPHA,
Mptr( Bptr0, 0, Bnq0-Bnq, Bld, size ), &Bld, WAB,
619 &WABd[
LLD_], zero, WBC, &WBCd[
LLD_] );
620 if( WABfr ) free( WAB );
621 if( WACfr ) free( WAC );
622 if( Afr ) free( Aptr );
627 Cnb, Ccol, Ccol, npcol );
629 gsum2d( ctxt,
ROW, &top, Bmp0, kbb, WBC, WBCd[
LLD_], myrow,
636 one, C, IC, JC+k, DESCC,
COLUMN );
637 if( WBCfr ) free( WBC );
642 for( k = kstart; k != kend; k += kstep )
644 kbb = An - k; kbb =
MIN( kbb, kb ); ktmp = k + kbb;
649 DESCA,
ROW, &Aptr, DBUFA, &Afr );
654 PB_Cdescset( Cd0, M, ktmp, Cimb1, Cinb1, Cmb, Cnb, Crow, Ccol, ctxt,
656 PB_CInV(
TYPE,
NOCONJG,
ROW, M, ktmp, Cd0, kbb, Aptr, 0, 0, DBUFA,
657 ROW, &WAC, WACd, &WACfr );
671 COLUMN, &Bptr, DBUFB, &Bfr );
675 PB_CInV(
TYPE,
NOCONJG,
COLUMN, M, ktmp, Cd0, kbb, Bptr, 0, 0,
676 DBUFB,
COLUMN, &WBC, WBCd, &WBCfr );
680 Cnq =
PB_Cnumroc( ktmp, 0, Cinb1, Cnb, mycol, Ccol, npcol );
681 if( ( Cmp0 > 0 ) && ( Cnq > 0 ) )
683 ALPHA, WBC, &WBCd[
LLD_], WAC, &WACd[
LLD_], one, Cptr0,
685 if( WBCfr ) free( WBC );
686 if( Bfr ) free( Bptr );
691 PB_Cdescset( Bd0, M, ktmp, Bimb1, Binb1, Bmb, Bnb, Brow, Bcol, ctxt,
693 PB_CInV(
TYPE,
NOCONJG,
ROW, M, ktmp, Bd0, kbb, WAC, 0, 0, WACd,
694 ROW, &WAB, WABd, &WABfr );
698 PB_Cplapad(
TYPE,
UPPER,
NOCONJG, kbb, kbb, zero, zero, WAB, 0, k,
703 PB_COutV(
TYPE,
COLUMN,
INIT, M, ktmp, Bd0, kbb, &WBC, WBCd, &WBCfr,
705 Bnq =
PB_Cnumroc( ktmp, 0, Binb1, Bnb, mycol, Bcol, npcol );
706 if( ( Bmp0 > 0 ) && ( Bnq > 0 ) )
708 ALPHA, Bptr0, &Bld, WAB, &WABd[
LLD_], zero, WBC,
710 if( WABfr ) free( WAB );
711 if( WACfr ) free( WAC );
712 if( Afr ) free( Aptr );
717 Cnb, Ccol, Ccol, npcol );
719 gsum2d( ctxt,
ROW, &top, Bmp0, kbb, WBC, WBCd[
LLD_], myrow,
726 one, C, IC, JC+k, DESCC,
COLUMN );
727 if( WBCfr ) free( WBC );