14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * ALPHA,
char * XC,
int LDXC,
char * XR,
int LDXR,
22 char * YC,
int LDYC,
char * YR,
int LDYR,
char * A,
23 int IA,
int JA,
int * DESCA,
TZSYR2_T SYR2 )
26 YC, LDYC, YR, LDYR, A, IA, JA, DESCA, SYR2 )
31 int IA, JA, K, LDXC, LDXR, LDYC, LDYR, N;
39 char * A, * XC, * XR, * YC, * YR;
224 int Acol, Arow, Aii, Aimb1, Ainb1, Ajj, Ald, Amp, Amb, Anb, Anq,
225 Aoffi, Aoffj, Arcol, Arrow, GoEast, GoSouth, IsColRepl,
226 IsRowRepl, XCinc, XRinc, Xii=0, Xjj=0, Xoffi=-1, Xoffj=-1,
227 YCinc, YRinc, iimax, ilow, imbloc, inbloc, ioffd, ioffx, iupp,
228 jjmax, joffd, joffx, lcmt, lcmt00, lmbloc, lnbloc, low, lower,
229 m1, mbloc, mblkd, mblks, mycol, myrow, n1, nbloc, nblkd,
230 nblks, npcol, nprow, pmb, qnb, size, tmp1, upp, upper;
246 PB_Cainfog2l( N, N, IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aimb1,
247 &Ainb1, &Amp, &Anq, &Aii, &Ajj, &Arow, &Acol, &Arrow, &Arcol );
252 if( ( Amp <= 0 ) || ( Anq <= 0 ) )
return;
254 IsRowRepl = ( ( Arow < 0 ) || ( nprow == 1 ) );
255 IsColRepl = ( ( Acol < 0 ) || ( npcol == 1 ) );
256 Amb = DESCA[
MB_ ]; Anb = DESCA[
NB_ ]; Ald = DESCA[
LLD_];
259 if( IsRowRepl && IsColRepl )
261 SYR2(
TYPE, UPLO, Amp, Anq, K, 0, ALPHA, XC, LDXC, YC, LDYC, XR, LDXR,
262 YR, LDYR,
Mptr( A, Aii, Ajj, Ald, size ), Ald );
266 XCinc = size; XRinc = LDXR * size;
267 YCinc = size; YRinc = LDYR * size;
274 PB_Cbinfo( 0, Amp, Anq, Aimb1, Ainb1, Amb, Anb, Arrow, Arcol, &lcmt00,
275 &mblks, &nblks, &imbloc, &inbloc, &lmbloc, &lnbloc, &ilow, &low,
278 iimax = ( Aoffi = Aii - 1 ) + ( m1 = Amp );
279 jjmax = ( Aoffj = Ajj - 1 ) + ( n1 = Anq );
280 pmb = ( IsRowRepl ? Amb : nprow * Amb );
281 qnb = ( IsColRepl ? Anb : npcol * Anb );
287 GoSouth = ( lcmt00 > iupp );
288 GoEast = ( lcmt00 < ilow );
292 if( ( !( GoSouth ) ) && ( !( GoEast ) ) )
297 SYR2(
TYPE, UPLO, imbloc, inbloc, K, lcmt00, ALPHA,
298 XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
299 XR+Xjj*XRinc, LDXR, YR+Xjj*YRinc, LDYR,
300 Mptr( A, Aii, Ajj, Ald, size ), Ald );
306 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
315 if( upper && ( Anq > inbloc ) )
318 SYR2(
TYPE,
ALL, imbloc, tmp1, K, 0, ALPHA,
319 XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
320 XR+(Xjj+inbloc)*XRinc, LDXR, YR+(Xjj+inbloc)*YRinc, LDYR,
321 Mptr( A, Aii, Ajj+inbloc, Ald, size ), Ald );
323 Aii += imbloc; Xii += imbloc; m1 -= imbloc;
332 if( lower && ( Amp > imbloc ) )
335 SYR2(
TYPE,
ALL, tmp1, inbloc, K, 0, ALPHA,
336 XC+(Xii+imbloc)*XCinc, LDXC, YC+(Xii+imbloc)*YCinc, LDYC,
337 XR+Xjj*XRinc, LDXR, YR+Xjj*YRinc, LDYR,
338 Mptr( A, Aii+imbloc, Ajj, Ald, size ), Ald );
340 Ajj += inbloc; Xjj += inbloc; n1 -= inbloc;
350 lcmt00 -= ( iupp - upp + pmb ); mblks--; Aoffi += imbloc; Xoffi += imbloc;
355 while( ( mblks > 0 ) && ( lcmt00 > upp ) )
356 { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; }
360 tmp1 =
MIN( Aoffi, iimax ) - Aii + 1;
361 if( upper && ( tmp1 > 0 ) )
363 SYR2(
TYPE,
ALL, tmp1, n1, K, 0, ALPHA,
364 XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
365 XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
366 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald );
367 Aii += tmp1; Xii += tmp1; m1 -= tmp1;
372 if( mblks <= 0 )
return;
379 lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi;
382 while( ( mblkd > 0 ) && ( lcmt >= ilow ) )
387 if( mblkd == 1 ) mbloc = lmbloc;
388 SYR2(
TYPE, UPLO, mbloc, inbloc, K, lcmt, ALPHA,
389 XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC,
390 XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
391 Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald );
392 lcmt00 = lcmt; lcmt -= pmb;
393 mblks = mblkd; mblkd--;
394 Aoffi = ioffd; ioffd += mbloc;
395 Xoffi = ioffx; ioffx += mbloc;
400 tmp1 = m1 - ioffd + Aii - 1;
401 if( lower && ( tmp1 > 0 ) )
402 SYR2(
TYPE,
ALL, tmp1, inbloc, K, 0, ALPHA,
403 XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC,
404 XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
405 Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald );
407 tmp1 = Aoffi - Aii + 1;
410 lcmt00 += low - ilow + qnb;
417 if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
418 SYR2(
TYPE,
ALL, tmp1, n1, K, 0, ALPHA,
419 XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
420 XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
421 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald );
422 Aii = Aoffi + 1; Ajj = Aoffj + 1;
423 Xii = Xoffi + 1; Xjj = Xoffj + 1;
431 lcmt00 += low - ilow + qnb; nblks--; Aoffj += inbloc; Xoffj += inbloc;
436 while( ( nblks > 0 ) && ( lcmt00 < low ) )
437 { lcmt00 += qnb; nblks--; Aoffj += Anb; Xoffj += Anb; }
441 tmp1 =
MIN( Aoffj, jjmax ) - Ajj + 1;
442 if( lower && ( tmp1 > 0 ) )
444 SYR2(
TYPE,
ALL, m1, tmp1, K, 0, ALPHA,
445 XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
446 XR+Xjj*XRinc, LDXR, YR+Xjj*YRinc, LDYR,
447 Mptr( A, Aii, Ajj, Ald, size ), Ald );
448 Ajj += tmp1; Xjj += tmp1; n1 -= tmp1;
453 if( nblks <= 0 )
return;
460 lcmt = lcmt00; nblkd = nblks; joffd = Aoffj; joffx = Xoffj;
463 while( ( nblkd > 0 ) && ( lcmt <= iupp ) )
468 if( nblkd == 1 ) nbloc = lnbloc;
469 SYR2(
TYPE, UPLO, imbloc, nbloc, K, lcmt, ALPHA,
470 XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
471 XR+(joffx+1)*XRinc, LDXR, YR+(joffx+1)*YRinc, LDYR,
472 Mptr( A, Aii, joffd+1, Ald, size ), Ald );
473 lcmt00 = lcmt; lcmt += qnb;
474 nblks = nblkd; nblkd--;
475 Aoffj = joffd; joffd += nbloc;
476 Xoffj = joffx; joffx += nbloc;
481 tmp1 = n1 - joffd + Ajj - 1;
482 if( upper && ( tmp1 > 0 ) )
483 SYR2(
TYPE,
ALL, imbloc, tmp1, K, 0, ALPHA,
484 XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
485 XR+(joffx+1)*XRinc, LDXR, YR+(joffx+1)*YRinc, LDYR,
486 Mptr( A, Aii, joffd+1, Ald, size ), Ald );
488 tmp1 = Aoffj - Ajj + 1;
491 lcmt00 -= ( iupp - upp + pmb );
498 if( lower && ( m1 > 0 ) && ( tmp1 > 0 ) )
499 SYR2(
TYPE,
ALL, m1, tmp1, K, 0, ALPHA,
500 XC+(Xoffi+1)*XCinc, LDXC, YC+(Xoffi+1)*YCinc, LDYC,
501 XR+Xjj*XRinc, LDXR, YR+Xjj*YRinc, LDYR,
502 Mptr( A, Aoffi+1, Ajj, Ald, size ), Ald );
503 Aii = Aoffi + 1; Ajj = Aoffj + 1;
504 Xii = Xoffi + 1; Xjj = Xoffj + 1;
512 if( nblks == 1 ) nbloc = lnbloc;
517 while( ( mblks > 0 ) && ( lcmt00 > upp ) )
518 { lcmt00 -= pmb; mblks--; Aoffi += Amb; Xoffi += Amb; }
522 tmp1 =
MIN( Aoffi, iimax ) - Aii + 1;
523 if( upper && ( tmp1 > 0 ) )
525 SYR2(
TYPE,
ALL, tmp1, n1, K, 0, ALPHA,
526 XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
527 XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
528 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald );
536 if( mblks <= 0 )
return;
543 lcmt = lcmt00; mblkd = mblks; ioffd = Aoffi; ioffx = Xoffi;
546 while( ( mblkd > 0 ) && ( lcmt >= low ) )
551 if( mblkd == 1 ) mbloc = lmbloc;
552 SYR2(
TYPE, UPLO, mbloc, nbloc, K, lcmt, ALPHA,
553 XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC,
554 XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
555 Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald );
556 lcmt00 = lcmt; lcmt -= pmb;
557 mblks = mblkd; mblkd--;
558 Aoffi = ioffd; Xoffi = ioffx;
559 ioffd += mbloc; ioffx += mbloc;
564 tmp1 = m1 - ioffd + Aii - 1;
565 if( lower && ( tmp1 > 0 ) )
566 SYR2(
TYPE,
ALL, tmp1, nbloc, K, 0, ALPHA,
567 XC+(ioffx+1)*XCinc, LDXC, YC+(ioffx+1)*YCinc, LDYC,
568 XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
569 Mptr( A, ioffd+1, Aoffj+1, Ald, size ), Ald );
571 tmp1 =
MIN( Aoffi, iimax ) - Aii + 1;
581 if( upper && ( tmp1 > 0 ) && ( n1 > 0 ) )
582 SYR2(
TYPE,
ALL, tmp1, n1, K, 0, ALPHA,
583 XC+Xii*XCinc, LDXC, YC+Xii*YCinc, LDYC,
584 XR+(Xoffj+1)*XRinc, LDXR, YR+(Xoffj+1)*YRinc, LDYR,
585 Mptr( A, Aii, Aoffj+1, Ald, size ), Ald );
586 Aii = Aoffi + 1; Ajj = Aoffj + 1;
587 Xii = Xoffi + 1; Xjj = Xoffj + 1;