14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
21 char * TRANS,
int MN,
char * X,
int INCX,
char * Y,
32 char * VROCS, * ROCS, * TRANS;
112 int GoEast, GoSouth, Xinc, Yinc, ilow, imbloc, inbloc, iupp, kb,
113 lcmt, lcmt00, lmbloc, lnbloc, low, mb, mblkd, mblks, mbloc,
114 nb, nblkd, nblks, nbloc, notran, npcol, npq=0, nprow, pmb,
115 qnb, rows, size, tmp1, tmp2, upp;
116 char * Xptrd, * Yptrd;
125 if( ( mblks == 0 ) || ( nblks == 0 ) )
return( 0 );
151 Yinc = ( notran ? size : INCY * size );
159 Yinc = ( notran ? INCY * size : size );
167 if( ( ( lcmt00 == 0 ) && ( VM->
imb1 == VM->
inb1 ) && ( mb == nb ) &&
168 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
172 npq = ( ( mblks < 2 ) ? imbloc :
173 imbloc + ( mblks - 2 ) * mb + lmbloc );
174 npq =
MIN( npq, kb );
175 if( rows )
TYPE->Fswap( &npq, X, &INCX, Y, &INCY );
176 else TYPE->Fswap( &npq, X, &INCX, Y, &INCY );
187 GoSouth = ( lcmt00 > iupp );
188 GoEast = ( lcmt00 < ilow );
190 if( !( GoSouth ) && !( GoEast ) )
197 tmp1 = imbloc - lcmt00; tmp1 =
MAX( 0, tmp1 );
198 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
199 TYPE->Fswap( &tmp2, X+lcmt00*Xinc, &INCX, Y, &INCY );
203 tmp1 = inbloc + lcmt00; tmp1 =
MAX( 0, tmp1 );
204 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
205 TYPE->Fswap( &tmp2, X, &INCX, Y-lcmt00*Yinc, &INCY );
207 if( ( kb -= tmp2 ) == 0 )
return( npq );
213 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
222 lcmt00 -= iupp - upp + pmb; mblks--; X += imbloc * Xinc;
227 while( mblks && ( lcmt00 > upp ) )
228 { lcmt00 -= pmb; mblks--; X += mb * Xinc; }
232 if( mblks <= 0 )
return( npq );
239 lcmt = lcmt00; mblkd = mblks; Xptrd = X;
241 while( mblkd && ( lcmt >= ilow ) )
246 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
249 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
250 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
251 TYPE->Fswap( &tmp2, Xptrd+lcmt*Xinc, &INCX, Y, &INCY );
255 tmp1 = inbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
256 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
257 TYPE->Fswap( &tmp2, Xptrd, &INCX, Y-lcmt*Yinc, &INCY );
259 if( ( kb -= tmp2 ) == 0 )
return( npq );
263 lcmt -= pmb; mblkd--; Xptrd += mbloc * Xinc;
268 lcmt00 += low - ilow + qnb; nblks--; Y += inbloc * Yinc;
276 lcmt00 += low - ilow + qnb; nblks--; Y += inbloc * Yinc;
282 while( nblks && ( lcmt00 < low ) )
283 { lcmt00 += qnb; nblks--; Y += nb * Yinc; }
287 if( nblks <= 0 )
return( npq );
293 lcmt = lcmt00; nblkd = nblks; Yptrd = Y;
295 while( nblkd && ( lcmt <= iupp ) )
300 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
303 tmp1 = imbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
304 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
305 TYPE->Fswap( &tmp2, X+lcmt*Xinc, &INCX, Yptrd, &INCY );
309 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
310 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
311 TYPE->Fswap( &tmp2, X, &INCX, Yptrd-lcmt*Yinc, &INCY );
313 if( ( kb -= tmp2 ) == 0 )
return( npq );
317 lcmt += qnb; nblkd--; Yptrd += nbloc * Yinc;
322 lcmt00 -= iupp - upp + pmb; mblks--; X += imbloc * Xinc;
333 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
335 while( mblks && nblks )
337 while( mblks && ( lcmt00 > upp ) )
338 { lcmt00 -= pmb; mblks--; X += mb * Xinc; }
339 if( lcmt00 >= low )
break;
340 while( nblks && ( lcmt00 < low ) )
341 { lcmt00 += qnb; nblks--; Y += nb * Yinc; }
342 if( lcmt00 <= upp )
break;
345 if( !mblks || !nblks )
return( npq );
351 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
352 lcmt = lcmt00; mblkd = mblks; Xptrd = X;
354 while( mblkd && lcmt >= low )
359 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
362 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
363 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
364 TYPE->Fswap( &tmp2, Xptrd+lcmt*Xinc, &INCX, Y, &INCY );
368 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
369 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
370 TYPE->Fswap( &tmp2, Xptrd, &INCX, Y-lcmt*Yinc, &INCY );
372 if( ( kb -= tmp2 ) == 0 )
return( npq );
376 lcmt -= pmb; mblkd--; Xptrd += mbloc * Xinc;
381 lcmt00 += qnb; nblks--; Y += nbloc * Yinc;
385 }
while( nblks > 0 );
402 Yinc = ( notran ? size : INCY * size );
410 Yinc = ( notran ? INCY * size : size );
418 if( ( ( lcmt00 == 0 ) && ( VM->
imb1 == VM->
inb1 ) && ( mb == nb ) &&
419 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
423 npq = ( ( nblks < 2 ) ? inbloc :
424 inbloc + ( nblks - 2 ) * nb + lnbloc );
425 npq =
MIN( npq, kb );
426 if( rows )
TYPE->Fswap( &npq, X, &INCX, Y, &INCY );
427 else TYPE->Fswap( &npq, X, &INCX, Y, &INCY );
438 GoSouth = ( lcmt00 > iupp );
439 GoEast = ( lcmt00 < ilow );
441 if( !( GoSouth ) && !( GoEast ) )
448 tmp1 = imbloc - lcmt00; tmp1 =
MAX( 0, tmp1 );
449 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
450 TYPE->Fswap( &tmp2, X, &INCX, Y+lcmt00*Yinc, &INCY );
454 tmp1 = inbloc + lcmt00; tmp1 =
MAX( 0, tmp1 );
455 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
456 TYPE->Fswap( &tmp2, X-lcmt00*Xinc, &INCX, Y, &INCY );
458 if( ( kb -= tmp2 ) == 0 )
return( npq );
464 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + pmb ) ) < ilow ) );
473 lcmt00 -= iupp - upp + pmb; mblks--; Y += imbloc * Yinc;
478 while( mblks && ( lcmt00 > upp ) )
479 { lcmt00 -= pmb; mblks--; Y += mb * Yinc; }
483 if( mblks <= 0 )
return( npq );
490 lcmt = lcmt00; mblkd = mblks; Yptrd = Y;
492 while( mblkd && ( lcmt >= ilow ) )
497 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
500 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
501 tmp2 =
MIN( tmp1, inbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
502 TYPE->Fswap( &tmp2, X, &INCX, Yptrd+lcmt*Yinc, &INCY );
506 tmp1 = inbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
507 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
508 TYPE->Fswap( &tmp2, X-lcmt*Xinc, &INCX, Yptrd, &INCY );
510 if( ( kb -= tmp2 ) == 0 )
return( npq );
514 lcmt -= pmb; mblkd--; Yptrd += mbloc * Yinc;
519 lcmt00 += low - ilow + qnb; nblks--; X += inbloc * Xinc;
527 lcmt00 += low - ilow + qnb; nblks--; X += inbloc * Xinc;
533 while( nblks && ( lcmt00 < low ) )
534 { lcmt00 += qnb; nblks--; X += nb * Xinc; }
538 if( nblks <= 0 )
return( npq );
544 lcmt = lcmt00; nblkd = nblks; Xptrd = X;
546 while( nblkd && ( lcmt <= iupp ) )
551 nbloc = ( ( nblkd == 1 ) ? lnbloc : nb );
554 tmp1 = imbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
555 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
556 TYPE->Fswap( &tmp2, Xptrd, &INCX, Y+lcmt*Yinc, &INCY );
560 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
561 tmp2 =
MIN( tmp1, imbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
562 TYPE->Fswap( &tmp2, Xptrd-lcmt*Xinc, &INCX, Y, &INCY );
564 if( ( kb -= tmp2 ) == 0 )
return( npq );
568 lcmt += qnb; nblkd--; Xptrd += nbloc * Xinc;
573 lcmt00 -= iupp - upp + pmb; mblks--; Y += imbloc * Yinc;
584 if( ( lcmt00 < low ) || ( lcmt00 > upp ) )
586 while( mblks && nblks )
588 while( mblks && ( lcmt00 > upp ) )
589 { lcmt00 -= pmb; mblks--; Y += mb * Yinc; }
590 if( lcmt00 >= low )
break;
591 while( nblks && ( lcmt00 < low ) )
592 { lcmt00 += qnb; nblks--; X += nb * Xinc; }
593 if( lcmt00 <= upp )
break;
596 if( !( mblks ) || !( nblks ) )
return( npq );
602 nbloc = ( ( nblks == 1 ) ? lnbloc : nb );
603 lcmt = lcmt00; mblkd = mblks; Yptrd = Y;
607 while( mblkd && lcmt >= low )
609 mbloc = ( ( mblkd == 1 ) ? lmbloc : mb );
612 tmp1 = mbloc - lcmt; tmp1 =
MAX( 0, tmp1 );
613 tmp2 =
MIN( tmp1, nbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
614 TYPE->Fswap( &tmp2, X, &INCX, Yptrd+lcmt*Yinc, &INCY );
618 tmp1 = nbloc + lcmt; tmp1 =
MAX( 0, tmp1 );
619 tmp2 =
MIN( tmp1, mbloc ); npq += ( tmp2 =
MIN( tmp2, kb ) );
620 TYPE->Fswap( &tmp2, X-lcmt*Xinc, &INCX, Yptrd, &INCY );
622 if( ( kb -= tmp2 ) == 0 )
return( npq );
626 lcmt -= pmb; mblkd--; Yptrd += mbloc * Yinc;
631 lcmt00 += qnb; nblks--; X += nbloc * Xinc;
635 }
while( nblks > 0 );