14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
51 int GoEast, GoSouth, Pmb, Qnb, gcdb, ilow, imbloc, inbloc, iupp,
52 kmax, kmin, k1, k2, k3, lcmb, lcmp, lcmq, lcmt, lcmt00,
53 lmbloc, lnbloc, low, l1, l2, l3, m, mb, mblkd, mblks, mbloc,
54 n, nb, nblkd, nblks, nbloc, nlcmblks, npcol, npq=0, nprow,
60 m = VM->
mp; n = VM->nq;
64 if( ( m == 0 ) || ( n == 0 ) )
return( 0 );
69 mb = VM->mb; nprow = VM->nprow;
70 nb = VM->nb; npcol = VM->npcol;
72 if( ( ( VM->offd == 0 ) && ( VM->imb1 == VM->inb1 ) && ( mb == nb ) &&
73 ( nprow == npcol ) ) || ( ( nprow == 1 ) && ( npcol == 1 ) ) )
75 if( VM->prow == VM->pcol )
return(
MIN( m, n ) );
82 mblks = VM->mblks; imbloc = VM->imbloc; lmbloc = VM->lmbloc;
83 iupp = VM->iupp; upp = VM->upp;
84 nblks = VM->nblks; inbloc = VM->inbloc; lnbloc = VM->lnbloc;
85 ilow = VM->ilow; low = VM->low;
94 GoSouth = ( lcmt00 > iupp );
95 GoEast = ( lcmt00 < ilow );
99 if( !( GoSouth ) && !( GoEast ) )
104 npq += ( lcmt00 >= 0 ?
105 ( ( tmp2 = ( tmp1 = imbloc - lcmt00 ) > 0 ? tmp1 : 0 ) < inbloc ?
107 ( ( tmp2 = ( tmp1 = inbloc + lcmt00 ) > 0 ? tmp1 : 0 ) > imbloc ?
114 GoSouth = !( GoEast = ( ( lcmt00 - ( iupp - upp + Pmb ) ) < ilow ) );
122 lcmt00 -= iupp - upp + Pmb; mblks--;
127 while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= Pmb; mblks--; }
131 if( mblks <= 0 )
return( npq );
138 lcmt = lcmt00; mblkd = mblks; mbloc = mb;
140 while( mblkd && ( lcmt >= ilow ) )
145 if( mblkd == 1 ) mbloc = lmbloc;
147 ( ( tmp2 = ( tmp1 = mbloc - lcmt ) > 0 ? tmp1 : 0 ) < inbloc ?
149 ( ( tmp2 = ( tmp1 = inbloc + lcmt ) > 0 ? tmp1 : 0 ) > mbloc ?
154 lcmt00 = lcmt; lcmt -= Pmb; mblks = mblkd--;
159 lcmt00 += low - ilow + Qnb; nblks--;
166 lcmt00 += low - ilow + Qnb; nblks--;
171 while( nblks && ( lcmt00 < low ) ) { lcmt00 += Qnb; nblks--; }
175 if( nblks <= 0 )
return( npq );
181 lcmt = lcmt00; nblkd = nblks; nbloc = nb;
183 while( nblkd && ( lcmt <= iupp ) )
188 if( nblkd == 1 ) nbloc = lnbloc;
190 ( ( tmp2 = ( tmp1 = imbloc - lcmt ) > 0 ? tmp1 : 0 ) < nbloc ?
192 ( ( tmp2 = ( tmp1 = nbloc + lcmt ) > 0 ? tmp1 : 0 ) > imbloc ?
197 lcmt00 = lcmt; lcmt += Qnb; nblks = nblkd--;
202 lcmt00 -= iupp - upp + Pmb; mblks--;
208 if( lcmt00 < low || lcmt00 > upp )
210 while( mblks && nblks )
212 while( mblks && ( lcmt00 > upp ) ) { lcmt00 -= Pmb; mblks--; }
213 if( lcmt00 >= low )
break;
214 while( nblks && ( lcmt00 < low ) ) { lcmt00 += Qnb; nblks--; }
215 if( lcmt00 <= upp )
break;
218 if( !( mblks ) || !( nblks ) )
return( npq );
222 gcdb = ( Pmb * Qnb ) / lcmb;
226 kmin = - ( lcmb / gcdb );
227 kmax = ( lcmb - Qnb ) / gcdb;
228 tmp1 = ( mblks - 1 ) / ( lcmp = lcmb / Pmb );
229 tmp2 = nblks / ( lcmq = lcmb / Qnb );
231 else if( lcmt00 < 0 )
233 kmin = - ( ( lcmb - Pmb ) / gcdb );
235 tmp1 = mblks / ( lcmp = lcmb / Pmb );
236 tmp2 = ( nblks - 1 ) / ( lcmq = lcmb / Qnb );
240 kmin = - ( ( lcmb - Pmb ) / gcdb );
241 kmax = ( lcmb - Qnb ) / gcdb;
242 tmp1 = mblks / ( lcmp = lcmb / Pmb );
243 tmp2 = nblks / ( lcmq = lcmb / Qnb );
248 nlcmblks =
MIN( tmp1, tmp2 );
249 if( nlcmblks ) nlcmblks--;
257 k1 = -lcmt00; k1 =
ICEIL( k1, gcdb ); l1 = k1 - 1;
258 l1 =
MIN( l1, kmax ); k1 =
MAX( k1, kmin );
260 k3 = upp - lcmt00; k3 =
FLOOR( k3, gcdb ); k3 =
MIN( k3, kmax );
261 l3 = low - lcmt00; l3 =
ICEIL( l3, gcdb ); l3 =
MAX( l3, kmin );
265 k2 = mb - nb - lcmt00;
266 k2 =
ICEIL( k2, gcdb );
274 tmp2 = tmp1 * ( mb - lcmt00 );
275 tmp1 *= ( k3 + k1 )*gcdb;
276 tmp2 += ( tmp1 > 0 ? -( tmp1 / 2 ) : (-tmp1) / 2 );
283 tmp2 = ( k3 - k1 + 1 ) * nb;
291 tmp2 = ( k2 - k1 ) * nb + tmp1 * ( mb - lcmt00 );
292 tmp1 *= ( k3 + k2 ) * gcdb;
293 tmp2 += ( tmp1 > 0 ? -( tmp1 / 2 ) : (-tmp1) / 2 );
299 l2 = mb - nb - lcmt00;
300 l2 =
FLOOR( l2, gcdb );
308 tmp2 += tmp1 * ( nb + lcmt00 );
309 tmp1 *= ( l3 + l1 ) * gcdb;
310 tmp2 += ( tmp1 > 0 ? ( tmp1 / 2 ) : -( (-tmp1) / 2 ) );
317 tmp2 += ( l1 - l3 + 1 ) * mb;
325 tmp2 += ( l1 - l2 ) * mb + tmp1 * ( nb + lcmt00 );
326 tmp1 *= ( l3 + l2 ) * gcdb;
327 tmp2 += ( tmp1 > 0 ? ( tmp1 / 2 ) : -( (-tmp1) / 2 ) );
330 npq += nlcmblks * tmp2;
332 mblks -= nlcmblks * lcmp;
333 nblks -= nlcmblks * lcmq;
346 if( nblks == 1 ) nbloc = lnbloc;
347 while( mblks && lcmt00 > upp ) { lcmt00 -= Pmb; mblks--; }
349 if( mblks <= 0 )
return( npq );
351 lcmt = lcmt00; mblkd = mblks; mbloc = mb;
353 while( mblkd && ( lcmt >= low ) )
358 if( mblkd == 1 ) mbloc = lmbloc;
360 ( ( tmp2 = ( tmp1 = mbloc - lcmt ) > 0 ? tmp1 : 0 ) < nbloc ?
362 ( ( tmp2 = ( tmp1 = nbloc + lcmt ) > 0 ? tmp1 : 0 ) > mbloc ?
367 lcmt00 = lcmt; lcmt -= Pmb; mblks = mblkd--;
372 lcmt00 += Qnb; nblks--;