14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * UNPA,
char * TRANS,
int MN,
int K,
22 char * ALPHA,
char * A,
int LDA,
23 char * BETA,
char * B,
int LDB )
25 int PB_CVMpack(
TYPE, VM, VROCS, ROCS, UNPA, TRANS, MN, K, ALPHA, A,
35 char * VROCS, * ROCS, * UNPA, * TRANS;
136 int GoEast, GoSouth, ilow, imbloc, inbloc, inca, incb, iupp, kb,
137 lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
138 * m, * n, nb, nblkd, nblks, nbloc, notran, npcol, npq=0,
139 nprow, pmb, qnb, rows, size, tmp1, tmp2, upp;
150 if( ( mblks == 0 ) || ( nblks == 0 ) )
return( 0 );
170 notran = 1; add =
TYPE->Fmmadd;
177 notran = 1; add =
TYPE->Fmmcadd;
184 notran = 0; add =
TYPE->Fmmtadd;
191 notran = 0; add =
TYPE->Fmmtcadd;
204 notran = 1; add =
TYPE->Fmmdda;
211 notran = 1; add =
TYPE->Fmmddac;
218 notran = 0; add =
TYPE->Fmmddat;
225 notran = 0; add =
TYPE->Fmmddact;
243 incb = ( notran ? size : LDB * size );
253 incb = ( notran ? LDB * size : size );
263 if( ( ( lcmt00 == 0 ) && ( VM->
imb1 == VM->
inb1 ) && ( mb == nb ) &&
264 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
268 npq = ( ( mblks < 2 ) ? imbloc :
269 imbloc + ( mblks - 2 ) * mb + lmbloc );
270 npq =
MIN( npq, kb );
271 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
272 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
283 GoSouth = ( lcmt00 > iupp );
284 GoEast = ( lcmt00 < ilow );
286 if( !( GoSouth ) && !( GoEast ) )
293 tmp1 = imbloc - lcmt00; tmp1 =
MAX( 0, tmp1 );
294 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
295 add( m, n, ALPHA, A+lcmt00*inca, &LDA, BETA, B, &LDB );
299 tmp1 = inbloc + lcmt00; tmp1 =
MAX( 0, tmp1 );
300 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
301 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
303 if( ( kb -= tmp2 ) == 0 )
return( npq );
310 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
319 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
324 while( mblks && ( lcmt00 > upp ) )
325 { lcmt00 -= pmb; mblks--; A += mb * inca; }
329 if( mblks <= 0 )
return( npq );
336 lcmt = lcmt00; mblkd = mblks; aptrd = A;
338 while( mblkd && ( lcmt >= ilow ) )
343 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
346 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
347 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
348 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
352 tmp1 = inbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
353 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
354 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
356 if( ( kb -= tmp2 ) == 0 )
return( npq );
360 lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
365 lcmt00 += low - ilow + qnb; nblks--;
373 lcmt00 += low - ilow + qnb; nblks--;
379 while( nblks && ( lcmt00 < low ) ) { lcmt00 += qnb; nblks--; }
383 if( nblks <= 0 )
return( npq );
389 lcmt = lcmt00; nblkd = nblks;
391 while( nblkd && ( lcmt <= iupp ) )
396 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
399 tmp1 = imbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
400 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
401 add( m, n, ALPHA, A+lcmt*inca, &LDA, BETA, B, &LDB );
405 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
406 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
407 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
409 if( ( kb -= tmp2 ) == 0 )
return( npq );
413 lcmt += qnb; nblkd--; B += tmp2 * incb;
418 lcmt00 -= iupp - upp + pmb; mblks--; A += imbloc * inca;
429 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
431 while( mblks && nblks )
433 while( mblks && ( lcmt00 > upp ) )
434 { lcmt00 -= pmb; mblks--; A += mb*inca; }
435 if( lcmt00 >= low )
break;
436 while( nblks && ( lcmt00 < low ) )
437 { lcmt00 += qnb; nblks--; }
438 if( lcmt00 <= upp )
break;
441 if( !( mblks ) || !( nblks ) )
return( npq );
447 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
448 lcmt = lcmt00; mblkd = mblks; aptrd = A;
450 while( mblkd && ( lcmt >= low ) )
455 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
458 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
459 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
460 add( m, n, ALPHA, aptrd+lcmt*inca, &LDA, BETA, B, &LDB );
464 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
465 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
466 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
468 if( ( kb -= tmp2 ) == 0 )
return( npq );
472 lcmt -= pmb; mblkd--; aptrd += mbloc * inca; B += tmp2 * incb;
477 lcmt00 += qnb; nblks--;
481 }
while( nblks > 0 );
498 incb = ( notran ? size : LDB * size );
508 incb = ( notran ? LDB * size : size );
518 if( ( ( lcmt00 == 0 ) && ( VM->
imb1 == VM->
inb1 ) && ( mb == nb ) &&
519 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
523 npq = ( ( nblks < 2 ) ? inbloc :
524 inbloc + ( nblks - 2 ) * nb + lnbloc );
525 npq =
MIN( npq, kb );
526 if( rows ) add( &npq, &K, ALPHA, A, &LDA, BETA, B, &LDB );
527 else add( &K, &npq, ALPHA, A, &LDA, BETA, B, &LDB );
538 GoSouth = ( lcmt00 > iupp );
539 GoEast = ( lcmt00 < ilow );
541 if( !( GoSouth ) && !( GoEast ) )
548 tmp1 = imbloc - lcmt00; tmp1 =
MAX( 0, tmp1 );
549 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
550 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
554 tmp1 = inbloc + lcmt00; tmp1 =
MAX( 0, tmp1 );
555 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
556 add( m, n, ALPHA, A-lcmt00*inca, &LDA, BETA, B, &LDB );
558 if( ( kb -= tmp2 ) == 0 )
return( npq );
565 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
574 lcmt00 -= iupp - upp + pmb; mblks--;
579 while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= pmb; mblks--; }
583 if( mblks <= 0 )
return( npq );
590 lcmt = lcmt00; mblkd = mblks;
592 while( mblkd && ( lcmt >= ilow ) )
597 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
600 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
601 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
602 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
606 tmp1 = inbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
607 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
608 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
610 if( ( kb -= tmp2 ) == 0 )
return( npq );
614 lcmt -= pmb; mblkd--; B += tmp2 * incb;
619 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
627 lcmt00 += low - ilow + qnb; nblks--; A += inbloc * inca;
633 while( nblks && ( lcmt00 < low ) )
634 { lcmt00 += qnb; nblks--; A += nb * inca; }
638 if( nblks <= 0 )
return( npq );
644 lcmt = lcmt00; nblkd = nblks; aptrd = A;
646 while( nblkd && ( lcmt <= iupp ) )
651 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
654 tmp1 = imbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
655 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
656 add( m, n, ALPHA, aptrd, &LDA, BETA, B, &LDB );
660 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
661 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
662 add( m, n, ALPHA, aptrd-lcmt*inca, &LDA, BETA, B, &LDB );
664 if( ( kb -= tmp2 ) == 0 )
return( npq );
668 lcmt += qnb; nblkd--; aptrd += nbloc * inca; B += tmp2 * incb;
673 lcmt00 -= iupp - upp + pmb; mblks--;
684 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
686 while( mblks && nblks )
688 while( mblks && ( lcmt00 > upp ) )
689 { lcmt00 -= pmb; mblks--; }
690 if( lcmt00 >= low )
break;
691 while( nblks && ( lcmt00 < low ) )
692 { lcmt00 += qnb; nblks--; A += nb*inca; }
693 if( lcmt00 <= upp )
break;
696 if( !( mblks ) || !( nblks ) )
return( npq );
702 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
703 lcmt = lcmt00; mblkd = mblks;
705 while( mblkd && ( lcmt >= low ) )
710 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
713 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
714 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
715 add( m, n, ALPHA, A, &LDA, BETA, B, &LDB );
719 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
720 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
721 add( m, n, ALPHA, A-lcmt*inca, &LDA, BETA, B, &LDB );
723 if( ( kb -= tmp2 ) == 0 )
return( npq );
727 lcmt -= pmb; mblkd--; B += tmp2 * incb;
732 lcmt00 += qnb; nblks--; A += nbloc * inca;
736 }
while( nblks > 0 );