ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
PB_Cnumroc.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18
19 #ifdef __STDC__
20 int PB_Cnumroc( int N, int I, int INB, int NB, int PROC, int SRCPROC,
21  int NPROCS )
22 #else
23 int PB_Cnumroc( N, I, INB, NB, PROC, SRCPROC, NPROCS )
24 /*
25 * .. Scalar Arguments ..
26 */
27  int I, INB, N, NB, NPROCS, PROC, SRCPROC;
28 #endif
29 {
30 /*
31 * Purpose
32 * =======
33 *
34 * PB_Cnumroc returns the local number of matrix rows/columns process
35 * PROC will get if we give out N rows/columns starting from global in-
36 * dex I.
37 *
38 * Arguments
39 * =========
40 *
41 * N (global input) INTEGER
42 * On entry, N specifies the number of rows/columns being dealt
43 * out. N must be at least zero.
44 *
45 * I (global input) INTEGER
46 * On entry, I specifies the global index of the matrix entry.
47 * I must be at least zero.
48 *
49 * INB (global input) INTEGER
50 * On entry, INB specifies the size of the first block of the
51 * global matrix. INB must be at least one.
52 *
53 * NB (global input) INTEGER
54 * On entry, NB specifies the size of the blocks used to parti-
55 * tion the matrix. NB must be at least one.
56 *
57 * PROC (local input) INTEGER
58 * On entry, PROC specifies the coordinate of the process whose
59 * local portion is determined. PROC must be at least zero and
60 * strictly less than NPROCS.
61 *
62 * SRCPROC (global input) INTEGER
63 * On entry, SRCPROC specifies the coordinate of the process
64 * that possesses the first row or column of the matrix. When
65 * SRCPROC = -1, the data is not distributed but replicated,
66 * otherwise SRCPROC must be at least zero and strictly less
67 * than NPROCS.
68 *
69 * NPROCS (global input) INTEGER
70 * On entry, NPROCS specifies the total number of process rows
71 * or columns over which the matrix is distributed. NPROCS must
72 * be at least one.
73 *
74 * -- Written on April 1, 1998 by
75 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
76 *
77 * ---------------------------------------------------------------------
78 */
79 /*
80 * .. Local Scalars ..
81 */
82  int ilocblk, mydist, nblocks;
83 /* ..
84 * .. Executable Statements ..
85 *
86 */
87  if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
88 /*
89 * The data is not distributed, or there is just one process in this dimension
90 * of the grid.
91 */
92  return( N );
93 /*
94 * Compute coordinate of process owning I and corresponding INB
95 */
96  if( ( INB -= I ) <= 0 )
97  {
98 /*
99 * I is not in the first block, find out which process has it and update the
100 * size of first block
101 */
102  nblocks = (-INB) / NB + 1;
103  SRCPROC += nblocks;
104  SRCPROC -= ( SRCPROC / NPROCS ) * NPROCS;
105  INB += nblocks * NB;
106  }
107 /*
108 * Now everything is just like N, I=0, INB, NB, SRCPROC, NPROCS. The discussion
109 * goes as follows: compute my distance from the source process so that within
110 * this process coordinate system, the source process is the process such that
111 * mydist = 0, or equivalently PROC == SRCPROC.
112 *
113 * Find out how many full blocks are globally (nblocks) and locally (ilocblk)
114 * in those N entries. Then remark that
115 *
116 * when mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks,
117 * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks,
118 * when mydist = nblocks - ilocblk * NPROCS, either the last block is not full
119 * and I own it, or the last block is full and I am the first process owning
120 * only ilocblk full blocks.
121 */
122  if( PROC == SRCPROC )
123  {
124 /*
125 * I am the source process, i.e. I own I (mydist = 0). When N <= INB, the
126 * answer is simply N.
127 */
128  if( N <= INB ) return( N );
129 /*
130 * Find out how many full blocks are globally (nblocks) and locally (ilocblk)
131 * in those N entries.
132 */
133  nblocks = ( N - INB ) / NB + 1;
134 /*
135 * Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only two
136 * possible cases:
137 *
138 * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS divides
139 * the global number of full blocks, then the source process SRCPROC owns
140 * one more block than the other processes; and N can be rewritten as
141 * N = INB + (nblocks-1) * NB + LNB with LNB >= 0 size of the last block.
142 * Similarly, the local value Np corresponding to N can be written as
143 * Np = INB + (ilocblk-1) * NB + LNB = N + ( ilocblk-1 - (nblocks-1) )*NB.
144 * Note that this case cannot happen when ilocblk is zero, since nblocks
145 * is at least one.
146 *
147 * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only owns
148 * full blocks, and therefore Np = INB + ilocblk * NB. Note that when
149 * ilocblk is zero, Np is just INB.
150 */
151  if( nblocks < NPROCS ) return( INB );
152
153  ilocblk = nblocks / NPROCS;
154  return( ( nblocks - ilocblk * NPROCS ) ? INB + ilocblk * NB :
155  N + ( ilocblk - nblocks ) * NB );
156  }
157  else
158  {
159 /*
160 * I am not the source process. When N <= INB, the answer is simply 0.
161 */
162  if( N <= INB ) return( 0 );
163 /*
164 * Find out how many full blocks are globally (nblocks) and locally (ilocblk)
165 * in those N entries
166 */
167  nblocks = ( N - INB ) / NB + 1;
168 /*
169 * Compute my distance from the source process so that within this process
170 * coordinate system, the source process is the process such that mydist=0.
171 */
172  if( ( mydist = PROC - SRCPROC ) < 0 ) mydist += NPROCS;
173 /*
174 * When mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks of
175 * size NB since I am not the source process,
176 *
177 * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks of
178 * size NB since I am not the source process,
179 *
180 * when mydist = nblocks - ilocblk * NPROCS,
181 * either the last block is not full and I own it, in which case
182 * N = INB + (nblocks - 1)*NB + LNB with LNB the size of the last block
183 * such that NB > LNB > 0; the local value Np corresponding to N is given
184 * by Np = ilocblk * NB + LNB = N - INB + ( ilocblk - nblocks + 1 ) * NB;
185 * or the last block is full and I am the first process owning only ilocblk
186 * full blocks of size NB, that is N = INB + ( nblocks - 1 ) * NB and
187 * Np = ilocblk * NB = N - INB + ( ilocblk - nblocks + 1 ) * NB.
188 */
189  if( nblocks < NPROCS )
190  return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 :
191  N - INB + NB * ( 1 - nblocks ) ) );
192
193  ilocblk = nblocks / NPROCS;
194  mydist -= nblocks - ilocblk * NPROCS;
195  return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB :
196  ( ( mydist > 0 ) ? ilocblk * NB :
197  N - INB + NB * ( ilocblk - nblocks + 1 ) ) );
198  }
199 /*
200 * End of PB_Cnumroc
201 */
202 }
PB_Cnumroc
int PB_Cnumroc(int N, int I, int INB, int NB, int PROC, int SRCPROC, int NPROCS)
Definition: PB_Cnumroc.c:23