ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
PB_Cinfog2l.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18 
19 #ifdef __STDC__
20 void PB_Cinfog2l( int I, int J, int * DESC, int NPROW, int NPCOL,
21  int MYROW, int MYCOL, int * II, int * JJ,
22  int * PROW, int * PCOL )
23 #else
24 void PB_Cinfog2l( I, J, DESC, NPROW, NPCOL, MYROW, MYCOL, II, JJ,
25  PROW, PCOL )
26  int I, * II, J, * JJ, MYCOL, MYROW, NPCOL, NPROW, * PCOL,
27  * PROW;
28 /*
29 * .. Scalar Arguments ..
30 */
31 /*
32 * .. Array Arguments ..
33 */
34  int * DESC;
35 #endif
36 {
37 /*
38 * Purpose
39 * =======
40 *
41 * PB_Cinfog2l computes the starting local index II, JJ corresponding to
42 * the submatrix starting globally at the entry pointed by I, J. This
43 * routine returns the coordinates in the grid of the process owning the
44 * matrix entry of global indexes I, J, namely PROW and PCOL.
45 *
46 * Notes
47 * =====
48 *
49 * A description vector is associated with each 2D block-cyclicly dis-
50 * tributed matrix. This vector stores the information required to
51 * establish the mapping between a matrix entry and its corresponding
52 * process and memory location.
53 *
54 * In the following comments, the character _ should be read as
55 * "of the distributed matrix". Let A be a generic term for any 2D
56 * block cyclicly distributed matrix. Its description vector is DESC_A:
57 *
58 * NOTATION STORED IN EXPLANATION
59 * ---------------- --------------- ------------------------------------
60 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
61 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
62 * the NPROW x NPCOL BLACS process grid
63 * A is distributed over. The context
64 * itself is global, but the handle
65 * (the integer value) may vary.
66 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
67 * ted matrix A, M_A >= 0.
68 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
69 * buted matrix A, N_A >= 0.
70 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
71 * block of the matrix A, IMB_A > 0.
72 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
73 * left block of the matrix A,
74 * INB_A > 0.
75 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
76 * bute the last M_A-IMB_A rows of A,
77 * MB_A > 0.
78 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
79 * bute the last N_A-INB_A columns of
80 * A, NB_A > 0.
81 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
82 * row of the matrix A is distributed,
83 * NPROW > RSRC_A >= 0.
84 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
85 * first column of A is distributed.
86 * NPCOL > CSRC_A >= 0.
87 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
88 * array storing the local blocks of
89 * the distributed matrix A,
90 * IF( Lc( 1, N_A ) > 0 )
91 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
92 * ELSE
93 * LLD_A >= 1.
94 *
95 * Let K be the number of rows of a matrix A starting at the global in-
96 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
97 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
98 * receive if these K rows were distributed over NPROW processes. If K
99 * is the number of columns of a matrix A starting at the global index
100 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
101 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
102 * these K columns were distributed over NPCOL processes.
103 *
104 * The values of Lr() and Lc() may be determined via a call to the func-
105 * tion PB_Cnumroc:
106 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
107 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
108 *
109 * Arguments
110 * =========
111 *
112 * I (global input) INTEGER
113 * On entry, I specifies the global starting row index of the
114 * submatrix. I must at least zero.
115 *
116 * J (global input) INTEGER
117 * On entry, J specifies the global starting column index of
118 * the submatrix. J must at least zero.
119 *
120 * DESC (global and local input) INTEGER array
121 * On entry, DESC is an integer array of dimension DLEN_. This
122 * is the array descriptor of the underlying matrix.
123 *
124 * NPROW (global input) INTEGER
125 * On entry, NPROW specifies the total number of process rows
126 * over which the matrix is distributed. NPROW must be at least
127 * one.
128 *
129 * NPCOL (global input) INTEGER
130 * On entry, NPCOL specifies the total number of process columns
131 * over which the matrix is distributed. NPCOL must be at least
132 * one.
133 *
134 * MYROW (local input) INTEGER
135 * On entry, MYROW specifies the row coordinate of the process
136 * whose local index II is determined. MYROW must be at least
137 * zero and strictly less than NPROW.
138 *
139 * MYCOL (local input) INTEGER
140 * On entry, MYCOL specifies the column coordinate of the pro-
141 * cess whose local index JJ is determined. MYCOL must be at
142 * least zero and strictly less than NPCOL.
143 *
144 * II (local output) INTEGER
145 * On exit, II specifies the local starting row index of the
146 * submatrix. On exit, II is at least zero.
147 *
148 * JJ (local output) INTEGER
149 * On exit, JJ specifies the local starting column index of the
150 * submatrix. On exit, JJ is at least zero.
151 *
152 * PROW (global output) INTEGER
153 * On exit, PROW specifies the row coordinate of the process
154 * that possesses the first row of the submatrix. On exit, PROW
155 * is -1 if DESC( RSRC_ ) is -1 on input, and, at least zero
156 * and strictly less than NPROW otherwise.
157 *
158 * PCOL (global output) INTEGER
159 * On exit, PCOL specifies the column coordinate of the process
160 * that possesses the first column of the submatrix. On exit,
161 * PCOL is -1 if DESC( CSRC_ ) is -1 on input, and, at least
162 * zero and strictly less than NPCOL otherwise.
163 *
164 * -- Written on April 1, 1998 by
165 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
166 *
167 * ---------------------------------------------------------------------
168 */
169 /*
170 * .. Local Scalars ..
171 */
172  int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc;
173 /* ..
174 * .. Executable Statements ..
175 *
176 */
177 /*
178 * Retrieve the row distribution parameters
179 */
180  imb = DESC[IMB_ ];
181  *PROW = DESC[RSRC_];
182 
183  if( ( *PROW == -1 ) || ( NPROW == 1 ) )
184  {
185 /*
186 * The data is not distributed, or there is just one process row in the grid.
187 */
188  *II = I;
189  }
190  else if( I < imb )
191  {
192 /*
193 * I refers to an entry in the first block of rows
194 */
195  *II = ( MYROW == *PROW ? I : 0 );
196  }
197  else
198  {
199  mb = DESC[MB_];
200  rsrc = *PROW;
201 /*
202 * The discussion goes as follows: compute my distance from the source process
203 * so that within this process coordinate system, the source process is the
204 * process such that mydist = 0, or equivalently MYROW == rsrc.
205 *
206 * Find out the global coordinate of the block I belongs to (nblocks), as well
207 * as the minimum local number of blocks that every process has.
208 *
209 * when mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks,
210 * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks,
211 * when mydist = nblocks - ilocblk * NPROCS, I own ilocblk full blocks
212 * but not I, or I own ilocblk + 1 blocks and the entry I refers to.
213 */
214  if( MYROW == rsrc )
215  {
216 /*
217 * I refers to an entry that is not in the first block, find out which process
218 * has it.
219 */
220  nblocks = ( I - imb ) / mb + 1;
221  *PROW += nblocks;
222  *PROW -= ( *PROW / NPROW ) * NPROW;
223 /*
224 * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only three
225 * possible cases:
226 *
227 * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I don't own I, in
228 * which case II = IMB + ( ilocblk - 1 ) * MB. Note that this case cannot
229 * happen when ilocblk is zero, since nblocks is at least one.
230 *
231 * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in which
232 * case I and II can respectively be written as IMB + (nblocks-1)*NB + IL
233 * and IMB + (ilocblk-1) * MB + IL. That is II = I + (ilocblk-nblocks)*MB.
234 * Note that this case cannot happen when ilocblk is zero, since nblocks
235 * is at least one.
236 *
237 * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns
238 * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. Note
239 * that when ilocblk is zero, II is just IMB.
240 */
241  if( nblocks < NPROW )
242  {
243  *II = imb;
244  }
245  else
246  {
247  ilocblk = nblocks / NPROW;
248  if( ilocblk * NPROW >= nblocks )
249  {
250  *II = ( ( MYROW == *PROW ) ? I + ( ilocblk - nblocks ) * mb :
251  imb + ( ilocblk - 1 ) * mb );
252  }
253  else
254  {
255  *II = imb + ilocblk * mb;
256  }
257  }
258  }
259  else
260  {
261 /*
262 * I refers to an entry that is not in the first block, find out which process
263 * has it.
264 */
265  nblocks = ( I -= imb ) / mb + 1;
266  *PROW += nblocks;
267  *PROW -= ( *PROW / NPROW ) * NPROW;
268 /*
269 * Compute my distance from the source process so that within this process
270 * coordinate system, the source process is the process such that mydist=0.
271 */
272  if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW;
273 /*
274 * When mydist < nblocks - ilocblk * NPROW, I own ilocblk + 1 full blocks of
275 * size MB since I am not the source process, i.e. II = ( ilocblk + 1 ) * MB.
276 * When mydist >= nblocks - ilocblk * NPROW and I don't own I, I own ilocblk
277 * full blocks of size MB, i.e. II = ilocblk * MB, otherwise I own ilocblk
278 * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB + IL
279 * and II = ilocblk*MB + IL = I - IMB + ( ilocblk - nblocks + 1 )*MB.
280 */
281  if( nblocks < NPROW )
282  {
283  mydist -= nblocks;
284  *II = ( ( mydist < 0 ) ? mb :
285  ( ( MYROW == *PROW ) ? I + ( 1 - nblocks ) * mb : 0 ) );
286  }
287  else
288  {
289  ilocblk = nblocks / NPROW;
290  mydist -= nblocks - ilocblk * NPROW;
291  *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb :
292  ( ( MYROW == *PROW ) ?
293  ( ilocblk - nblocks + 1 ) * mb + I : ilocblk * mb ) );
294  }
295  }
296  }
297 /*
298 * Idem for the columns
299 */
300  inb = DESC[INB_ ];
301  *PCOL = DESC[CSRC_];
302 
303  if( ( *PCOL == -1 ) || ( NPCOL == 1 ) )
304  {
305  *JJ = J;
306  }
307  else if( J < inb )
308  {
309  *JJ = ( MYCOL == *PCOL ? J : 0 );
310  }
311  else
312  {
313  nb = DESC[NB_];
314  csrc = *PCOL;
315 
316  if( MYCOL == csrc )
317  {
318  nblocks = ( J - inb ) / nb + 1;
319  *PCOL += nblocks;
320  *PCOL -= ( *PCOL / NPCOL ) * NPCOL;
321 
322  if( nblocks < NPCOL )
323  {
324  *JJ = inb;
325  }
326  else
327  {
328  ilocblk = nblocks / NPCOL;
329  if( ilocblk * NPCOL >= nblocks )
330  {
331  *JJ = ( ( MYCOL == *PCOL ) ? J + ( ilocblk - nblocks ) * nb :
332  inb + ( ilocblk - 1 ) * nb );
333  }
334  else
335  {
336  *JJ = inb + ilocblk * nb;
337  }
338  }
339  }
340  else
341  {
342  nblocks = ( J -= inb ) / nb + 1;
343  *PCOL += nblocks;
344  *PCOL -= ( *PCOL / NPCOL ) * NPCOL;
345 
346  if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL;
347 
348  if( nblocks < NPCOL )
349  {
350  mydist -= nblocks;
351  *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ?
352  J + ( 1 - nblocks )*nb : 0 ) );
353  }
354  else
355  {
356  ilocblk = nblocks / NPCOL;
357  mydist -= nblocks - ilocblk * NPCOL;
358  *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb :
359  ( ( MYCOL == *PCOL ) ?
360  ( ilocblk - nblocks + 1 ) * nb + J : ilocblk * nb ) );
361  }
362  }
363  }
364 /*
365 * End of PB_Cinfog2l
366 */
367 }
MB_
#define MB_
Definition: PBtools.h:43
NB_
#define NB_
Definition: PBtools.h:44
CSRC_
#define CSRC_
Definition: PBtools.h:46
IMB_
#define IMB_
Definition: PBtools.h:41
RSRC_
#define RSRC_
Definition: PBtools.h:45
PB_Cinfog2l
void PB_Cinfog2l(int I, int J, int *DESC, int NPROW, int NPCOL, int MYROW, int MYCOL, int *II, int *JJ, int *PROW, int *PCOL)
Definition: PB_Cinfog2l.c:24
INB_
#define INB_
Definition: PBtools.h:42