14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * VROCS,
char * ROCS,
char * UNPA,
char * TRANS,
22 int MN,
int K,
char * ALPHA,
char * A,
int LDA,
23 char * BETA,
char * B,
int LDB )
25 int PB_CVMloc(
TYPE, VM, VROCS, ROCS, UNPA, TRANS, MN, K, ALPHA, A,
35 char * VROCS, * ROCS, * UNPA, * TRANS;
139 int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb,
140 lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
141 * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0,
142 nprow, pmb, qnb, rows, size, tmp1, tmp2, upp;
144 char * aptrd, * bptrd;
153 if( ( mblks == 0 ) || ( nblks == 0 ) )
return( 0 );
198 add =
TYPE->Fmmtcadd;
236 add =
TYPE->Fmmddact;
254 incb = ( notran ? size : LDB * size );
264 incb = ( notran ? LDB * size : size );
274 if( ( ( lcmt00 == 0 ) && ( VM->
imb1 == VM->
inb1 ) && ( mb == nb ) &&
275 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
279 npq = ( ( mblks < 2 ) ? imbloc :
280 imbloc + ( mblks - 2 ) * mb + lmbloc );
281 npq =
MIN( npq, kb );
282 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
283 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
294 GoSouth = ( lcmt00 > iupp );
295 GoEast = ( lcmt00 < ilow );
299 if( !( GoSouth ) && !( GoEast ) )
306 tmp1 = imbloc - lcmt00; tmp1 =
MAX( 0, tmp1 );
307 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
308 add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB );
312 tmp1 = inbloc + lcmt00; tmp1 =
MAX( 0, tmp1 );
313 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
314 add( m, n, ALPHA, A, &LDA, BETA, B-lcmt00*incb, &LDB );
316 if( ( kb -= tmp2 ) == 0 )
return( npq );
322 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
331 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
336 while( mblks && ( lcmt00 > upp ) )
337 { lcmt00 -= pmb; mblks--; A += mb * inca; }
341 if( mblks <= 0 )
return( npq );
348 lcmt = lcmt00; mblkd = mblks; aptrd = A;
350 while( mblkd && ( lcmt >= ilow ) )
355 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
358 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
359 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
360 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
364 tmp1 = inbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
365 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
366 add( m, n, ALPHA, aptrd, &LDA, BETA, B-lcmt*incb, &LDB );
368 if( ( kb -= tmp2 ) == 0 )
return( npq );
372 lcmt -= pmb; mblkd--; aptrd += mbloc * inca;
377 lcmt00 += low - ilow + qnb; nblks--; B += inbloc * incb;
385 lcmt00 += low - ilow + qnb; nblks--; B += inbloc * incb;
391 while( nblks && ( lcmt00 < low ) )
392 { lcmt00 += qnb; nblks--; B += nb * incb; }
396 if( nblks <= 0 )
return( npq );
402 lcmt = lcmt00; nblkd = nblks; bptrd = B;
404 while( nblkd && ( lcmt <= iupp ) )
409 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
412 tmp1 = imbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
413 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
414 add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, bptrd, &LDB );
418 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
419 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
420 add( m, n, ALPHA, A, &LDA, BETA, bptrd-lcmt*incb, &LDB );
422 if( ( kb -= tmp2 ) == 0 )
return( npq );
426 lcmt += qnb; nblkd--; bptrd += nbloc * incb;
431 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
442 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
444 while( mblks && nblks )
446 while( mblks && ( lcmt00 > upp ) )
447 { lcmt00 -= pmb; mblks--; A += mb * inca; }
448 if( lcmt00 >= low )
break;
449 while( nblks && ( lcmt00 < low ) )
450 { lcmt00 += qnb; nblks--; B += nb * incb; }
451 if( lcmt00 <= upp )
break;
454 if( !( mblks ) || !( nblks ) )
return( npq );
460 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
461 lcmt = lcmt00; mblkd = mblks; aptrd = A;
463 while( mblkd && ( lcmt >= low ) )
468 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
471 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
472 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
473 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
477 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
478 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
479 add( m, n, ALPHA, aptrd, &LDA, BETA, B-lcmt*incb, &LDB );
481 if( ( kb -= tmp2 ) == 0 )
return( npq );
485 lcmt -= pmb; mblkd--; aptrd += mbloc * inca;
490 lcmt00 += qnb; nblks--; B += nbloc * incb;
494 }
while( nblks > 0 );
511 incb = ( notran ? size : LDB * size );
521 incb = ( notran ? LDB * size : size );
531 if( ( ( lcmt00 == 0 ) && ( VM->
imb1 == VM->
inb1 ) && ( mb == nb ) &&
532 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
536 npq = ( ( nblks < 2 ) ? inbloc :
537 inbloc + ( nblks - 2 ) * nb + lnbloc );
538 npq =
MIN( npq, kb );
539 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
540 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
551 GoSouth = ( lcmt00 > iupp );
552 GoEast = ( lcmt00 < ilow );
554 if( !( GoSouth ) && !( GoEast ) )
561 tmp1 = imbloc - lcmt00; tmp1 =
MAX( 0, tmp1 );
562 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
563 add( m, n, ALPHA, A, &LDA, BETA, B+lcmt00*incb, &LDB );
567 tmp1 = inbloc + lcmt00; tmp1 =
MAX( 0, tmp1 );
568 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
569 add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB );
571 if( ( kb -= tmp2 ) == 0 )
return( npq );
577 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
586 lcmt00 -= iupp - upp + pmb; mblks--; B += imbloc * incb;
591 while( mblks && ( lcmt00 > upp ) )
592 { lcmt00 -= pmb; mblks--; B += mb * incb; }
596 if( mblks <= 0 )
return( npq );
603 lcmt = lcmt00; mblkd = mblks; bptrd = B;
605 while( mblkd && ( lcmt >= ilow ) )
610 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
613 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
614 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
615 add( m, n, ALPHA, A, &LDA, BETA, bptrd+lcmt*incb, &LDB );
619 tmp1 = inbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
620 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
621 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, bptrd, &LDB );
623 if( ( kb -= tmp2 ) == 0 )
return( npq );
627 lcmt -= pmb; mblkd--; bptrd += mbloc * incb;
632 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
640 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
646 while( nblks && ( lcmt00 < low ) )
647 { lcmt00 += qnb; nblks--; A += nb * inca; }
651 if( nblks <= 0 )
return( npq );
657 lcmt = lcmt00; nblkd = nblks; aptrd = A;
659 while( nblkd && ( lcmt <= iupp ) )
664 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
667 tmp1 = imbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
668 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
669 add( m, n, ALPHA, aptrd, &LDA, BETA, B+lcmt*incb, &LDB );
673 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
674 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
675 add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB );
677 if( ( kb -= tmp2 ) == 0 )
return( npq );
681 lcmt += qnb; nblkd--; aptrd += nbloc * inca;
686 lcmt00 -= iupp - upp + pmb; mblks--; B += imbloc * incb;
697 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
699 while( mblks && nblks )
701 while( mblks && ( lcmt00 > upp ) )
702 { lcmt00 -= pmb; mblks--; B += mb * incb; }
703 if( lcmt00 >= low )
break;
704 while( nblks && ( lcmt00 < low ) )
705 { lcmt00 += qnb; nblks--; A += nb * inca; }
706 if( lcmt00 <= upp )
break;
709 if( !( mblks ) || !( nblks ) )
return( npq );
715 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
716 lcmt = lcmt00; mblkd = mblks; bptrd = B;
718 while( mblkd && ( lcmt >= low ) )
723 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
726 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
727 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
728 add( m, n, ALPHA, A, &LDA, BETA, bptrd+lcmt*incb, &LDB );
732 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
733 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
734 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, bptrd, &LDB );
736 if( ( kb -= tmp2 ) == 0 )
return( npq );
740 lcmt -= pmb; mblkd--; bptrd += mbloc * incb;
745 lcmt00 += qnb; nblks--; A += nbloc * inca;
749 }
while( nblks > 0 );