14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * TRANS,
char * DIAG,
int N,
int K,
char * ALPHA,
22 char * A,
int IA,
int JA,
int * DESCA,
23 char * X,
int LDX,
char * Y,
int LDY,
26 void PB_Cptrm(
TYPE, UTYP, SIDE, UPLO, TRANS, DIAG, N, K, ALPHA, A,
27 IA, JA, DESCA, X, LDX, Y, LDY, TRM )
31 char * DIAG, * SIDE, * TRANS, * UPLO;
32 int IA, JA, K, LDX, LDY, N;
249 int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq,
250 Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl,
251 IsRowRepl, Xinc, Yinc, XYii=0, XYjj=0, XYoffi=-1, XYoffj=-1,
252 XisRow, iimax, ilow, imbloc, inbloc, ioffd, ioffxy, iupp,
253 jjmax, joffd, joffxy, lcmt, lcmt00, lmbloc, lnbloc, low,
254 lower, m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc,
255 nblkd, nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper;
271 PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1,
272 &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol );
277 if( ( Amp <= 0 ) || ( Anq <= 0 ) )
return;
279 IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) );
280 IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) );
281 Amb = DESCA[
MB_ ]; Anb = DESCA[
NB_ ]; Ald = DESCA[
LLD_ ];
284 if( IsRowRepl && IsColRepl )
286 TRM(
TYPE, SIDE, UPLO, TRANS, DIAG, Amp, Anq, K, 0, ALPHA,
287 Mptr( A, Aii, Ajj, Ald, size ), Ald, X, LDX, Y, LDY );
294 { XisRow = 1; Xinc = LDX * size; Yinc = UTYP->
size; }
295 else { XisRow = 0; Xinc = size; Yinc = LDY * UTYP->
size; }
300 { XisRow = 0; Xinc = size; Yinc = LDY * UTYP->
size; }
301 else { XisRow = 1; Xinc = LDX * size; Yinc = UTYP->
size; }
309 PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00,
310 &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low,
313 iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp );
314 jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq );
315 pmb = ( IsRowRepl ? Amb : nprow * Amb );
316 qnb = ( IsColRepl ? Anb : npcol * Anb );
322 GoSouth = ( lcmt00 > iupp );
323 GoEast = ( lcmt00 < ilow );
330 if( ( !( GoSouth ) ) && ( !( GoEast ) ) )
335 TRM(
TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, inbloc, K, lcmt00, ALPHA,
336 Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX,
343 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
352 if( upper && ( Anq > inbloc ) )
355 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
356 Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald,
357 X+(XYjj+inbloc)*Xinc, LDX, Y+XYii*Yinc, LDY );
359 Aii += imbloc; XYii += imbloc; m1 -= imbloc;
368 if( lower && ( Amp > imbloc ) )
371 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
372 Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald, X+XYjj*Xinc,
373 LDX, Y+(XYii+imbloc)*Yinc, LDY );
375 Ajj += inbloc; XYjj += inbloc; n1 -= inbloc;
385 lcmt00 -= ( iupp - upp + pmb ); mblks--;
386 Aoffi += imbloc; XYoffi += imbloc;
391 while( ( mblks > 0 ) && ( lcmt00 > upp ) )
392 { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
397 tmp1 =
MIN( Aoffi, iimax ) - Aii + 1;
398 if( upper && ( tmp1 > 0 ) )
400 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
401 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
402 LDX, Y+XYii*Yinc, LDY );
403 Aii += tmp1; XYii += tmp1; m1 -= tmp1;
408 if( mblks <= 0 )
return;
415 lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
418 while( ( mblkd > 0 ) && ( lcmt >= ilow ) )
423 if( mblkd == 1 ) mbloc = lmbloc;
424 TRM(
TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, inbloc, K, lcmt,
425 ALPHA,
Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
426 X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
427 lcmt00 = lcmt; lcmt -= pmb;
428 mblks = mblkd; mblkd--;
429 Aoffi = ioffd; XYoffi = ioffxy;
430 ioffd += mbloc; ioffxy += mbloc;
435 tmp1 = m1 - ioffd + Aii - 1;
436 if( lower && ( tmp1 > 0 ) )
438 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
439 Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
440 X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
442 tmp1 = Aoffi - Aii + 1;
445 lcmt00 += low - ilow + qnb;
452 if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
454 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
455 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
456 LDX, Y+XYii*Yinc, LDY );
458 Aii = Aoffi + 1; Ajj = Aoffj + 1;
459 XYii = XYoffi + 1; XYjj = XYoffj + 1;
467 lcmt00 += low - ilow + qnb; nblks--;
468 Aoffj += inbloc; XYoffj += inbloc;
473 while( ( nblks > 0 ) && ( lcmt00 < low ) )
474 { lcmt00 += qnb; nblks--; Aoffj += Anb; XYoffj += Anb; }
478 tmp1 =
MIN( Aoffj, jjmax ) - Ajj + 1;
479 if( lower && ( tmp1 > 0 ) )
481 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
482 Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX,
484 Ajj += tmp1; XYjj += tmp1; n1 -= tmp1;
489 if( nblks <= 0 )
return;
496 lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffxy = XYoffj;
499 while( ( nblkd > 0 ) && ( lcmt <= iupp ) )
504 if( nblkd == 1 ) nbloc = lnbloc;
505 TRM(
TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, nbloc, K, lcmt,
506 ALPHA,
Mptr( A, Aii, joffd+1, Ald, size ), Ald,
507 X+(joffxy+1)*Xinc, LDX, Y+XYii*Yinc, LDY );
508 lcmt00 = lcmt; lcmt += qnb;
509 nblks = nblkd; nblkd--;
510 Aoffj = joffd; XYoffj = joffxy;
511 joffd += nbloc; joffxy += nbloc;
516 tmp1 = n1 - joffd + Ajj - 1;
517 if( upper && ( tmp1 > 0 ) )
519 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
520 Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+(joffxy+1)*Xinc,
521 LDX, Y+XYii*Yinc, LDY );
523 tmp1 = Aoffj - Ajj + 1;
526 lcmt00 -= ( iupp - upp + pmb );
533 if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) )
535 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
536 Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald, X+XYjj*Xinc, LDX,
537 Y+(XYoffi+1)*Yinc, LDY );
539 Aii = Aoffi + 1; Ajj = Aoffj + 1;
540 XYii = XYoffi + 1; XYjj = XYoffj + 1;
548 if( nblks == 1 ) nbloc = lnbloc;
553 while( ( mblks > 0 ) && ( lcmt00 > upp ) )
554 { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
558 tmp1 =
MIN( Aoffi, iimax ) - Aii + 1;
559 if( upper && ( tmp1 > 0 ) )
561 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
562 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
563 LDX, Y+XYii*Yinc, LDY );
571 if( mblks <= 0 )
return;
578 lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
581 while( ( mblkd > 0 ) && ( lcmt >= low ) )
586 if( mblkd == 1 ) mbloc = lmbloc;
587 TRM(
TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, nbloc, K, lcmt,
588 ALPHA,
Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
589 X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
590 lcmt00 = lcmt; lcmt -= pmb;
591 mblks = mblkd; mblkd--;
592 Aoffi = ioffd; XYoffi = ioffxy;
593 ioffd += mbloc; ioffxy += mbloc;
598 tmp1 = m1 - ioffd + Aii - 1;
599 if( lower && ( tmp1 > 0 ) )
601 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, nbloc, K, 0, ALPHA,
602 Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
603 X+(XYoffj+1)*Xinc, LDX, Y+(ioffxy+1)*Yinc, LDY );
606 tmp1 =
MIN( Aoffi, iimax ) - Aii + 1;
616 if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
618 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
619 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+(XYoffj+1)*Xinc,
620 LDX, Y+XYii*Yinc, LDY );
622 Aii = Aoffi + 1; Ajj = Aoffj + 1;
623 XYii = XYoffi + 1; XYjj = XYoffj + 1;
631 if( ( !( GoSouth ) ) && ( !( GoEast ) ) )
636 TRM(
TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, inbloc, K, lcmt00, ALPHA,
637 Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYii*Xinc, LDX,
644 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
653 if( upper && ( Anq > inbloc ) )
656 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
657 Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald, X+XYii*Xinc,
658 LDX, Y+(XYjj+inbloc)*Yinc, LDY );
660 Aii += imbloc; XYii += imbloc; m1 -= imbloc;
669 if( lower && ( Amp > imbloc ) )
672 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
673 Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald,
674 X+(XYii+imbloc)*Xinc, LDX, Y+XYjj*Yinc, LDY );
676 Ajj += inbloc; XYjj += inbloc; n1 -= inbloc;
686 lcmt00 -= ( iupp - upp + pmb ); mblks--;
687 Aoffi += imbloc; XYoffi += imbloc;
692 while( ( mblks > 0 ) && ( lcmt00 > upp ) )
693 { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
698 tmp1 =
MIN( Aoffi, iimax ) - Aii + 1;
699 if( upper && ( tmp1 > 0 ) )
701 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
702 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
703 Y+(XYoffj+1)*Yinc, LDY );
704 Aii += tmp1; XYii += tmp1; m1 -= tmp1;
709 if( mblks <= 0 )
return;
716 lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
719 while( ( mblkd > 0 ) && ( lcmt >= ilow ) )
724 if( mblkd == 1 ) mbloc = lmbloc;
725 TRM(
TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, inbloc, K, lcmt,
726 ALPHA,
Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
727 X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
728 lcmt00 = lcmt; lcmt -= pmb;
729 mblks = mblkd; mblkd--;
730 Aoffi = ioffd; XYoffi = ioffxy;
731 ioffd += mbloc; ioffxy += mbloc;
736 tmp1 = m1 - ioffd + Aii - 1;
737 if( lower && ( tmp1 > 0 ) )
739 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, inbloc, K, 0, ALPHA,
740 Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
741 X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
743 tmp1 = Aoffi - Aii + 1;
746 lcmt00 += low - ilow + qnb;
753 if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
755 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
756 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
757 Y+(XYoffj+1)*Yinc, LDY );
759 Aii = Aoffi + 1; Ajj = Aoffj + 1;
760 XYii = XYoffi + 1; XYjj = XYoffj + 1;
768 lcmt00 += low - ilow + qnb; nblks--;
769 Aoffj += inbloc; XYoffj += inbloc;
774 while( ( nblks > 0 ) && ( lcmt00 < low ) )
775 { lcmt00 += qnb; nblks--; Aoffj += Anb; XYoffj += Anb; }
779 tmp1 =
MIN( Aoffj, jjmax ) - Ajj + 1;
780 if( lower && ( tmp1 > 0 ) )
782 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
783 Mptr( A, Aii, Ajj, Ald, size ), Ald, X+XYii*Xinc, LDX,
785 Ajj += tmp1; XYjj += tmp1; n1 -= tmp1;
790 if( nblks <= 0 )
return;
797 lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffxy = XYoffj;
800 while( ( nblkd > 0 ) && ( lcmt <= iupp ) )
805 if( nblkd == 1 ) nbloc = lnbloc;
806 TRM(
TYPE, SIDE, UPLO, TRANS, DIAG, imbloc, nbloc, K, lcmt,
807 ALPHA,
Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+XYii*Xinc,
808 LDX, Y+(joffxy+1)*Yinc, LDY );
809 lcmt00 = lcmt; lcmt += qnb;
810 nblks = nblkd; nblkd--;
811 Aoffj = joffd; XYoffj = joffxy;
812 joffd += nbloc; joffxy += nbloc;
817 tmp1 = n1 - joffd + Ajj - 1;
818 if( upper && ( tmp1 > 0 ) )
820 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, imbloc, tmp1, K, 0, ALPHA,
821 Mptr( A, Aii, joffd+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
822 Y+(joffxy+1)*Yinc, LDY );
824 tmp1 = Aoffj - Ajj + 1;
827 lcmt00 -= ( iupp - upp + pmb );
834 if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) )
836 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, m1, tmp1, K, 0, ALPHA,
837 Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald, X+(XYoffi+1)*Xinc,
838 LDX, Y+XYjj*Yinc, LDY );
840 Aii = Aoffi + 1; Ajj = Aoffj + 1;
841 XYii = XYoffi + 1; XYjj = XYoffj + 1;
849 if( nblks == 1 ) nbloc = lnbloc;
854 while( ( mblks > 0 ) && ( lcmt00 > upp ) )
855 { lcmt00 -= pmb; mblks--; Aoffi += Amb; XYoffi += Amb; }
859 tmp1 =
MIN( Aoffi, iimax ) - Aii + 1;
860 if( upper && ( tmp1 > 0 ) )
862 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
863 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
864 Y+(XYoffj+1)*Yinc, LDY );
872 if( mblks <= 0 )
return;
879 lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffxy = XYoffi;
882 while( ( mblkd > 0 ) && ( lcmt >= low ) )
887 if( mblkd == 1 ) mbloc = lmbloc;
888 TRM(
TYPE, SIDE, UPLO, TRANS, DIAG, mbloc, nbloc, K, lcmt,
889 ALPHA,
Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
890 X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
891 lcmt00 = lcmt; lcmt -= pmb;
892 mblks = mblkd; mblkd--;
893 Aoffi = ioffd; XYoffi = ioffxy;
894 ioffd += mbloc; ioffxy += mbloc;
899 tmp1 = m1 - ioffd + Aii - 1;
900 if( lower && ( tmp1 > 0 ) )
902 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, nbloc, K, 0, ALPHA,
903 Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald,
904 X+(ioffxy+1)*Xinc, LDX, Y+(XYoffj+1)*Yinc, LDY );
907 tmp1 =
MIN( Aoffi, iimax ) - Aii + 1;
917 if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
919 TRM(
TYPE, SIDE,
ALL, TRANS, DIAG, tmp1, n1, K, 0, ALPHA,
920 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald, X+XYii*Xinc, LDX,
921 Y+(XYoffj+1)*Yinc, LDY );
923 Aii = Aoffi + 1; Ajj = Aoffj + 1;
924 XYii = XYoffi + 1; XYjj = XYoffj + 1;