 ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
PB_Cnnxtroc.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18
19 #ifdef __STDC__
20 int PB_Cnnxtroc( int N, int I, int INB, int NB, int PROC, int SRCPROC,
21  int NPROCS )
22 #else
23 int PB_Cnnxtroc( N, I, INB, NB, PROC, SRCPROC, NPROCS )
24 /*
25 * .. Scalar Arguments ..
26 */
27  int I, INB, N, NB, NPROCS, PROC, SRCPROC;
28 #endif
29 {
30 /*
31 * Purpose
32 * =======
33 *
34 * PB_Cnnxtroc computes the number of next rows or columns of a subma-
35 * trix that are possessed by processes closer to SRCPROC1 than PROC
36 * where SRCPROC1 is the process owning the row or column globally in-
37 * dexed by I. The submatrix is defined by giving out N rows or columns
38 * starting from global index I. Therefore, if SRCPROC=0 and PROC=1,
39 * then PB_Cnnxtroc returns the number of matrix rows or columns owned
40 * by processes 2, 3 ... NPROCS-1.
41 *
42 * In fact, if the same exact parameters N, I, INB, NB, SRCPROC and
43 * NPROCS are passed to PB_Cnpreroc, PB_Cnumroc and PB_Cnnxtroc produ-
44 * cing respectively npre, np and nnxt, then npre + np + nnxt = N in
45 * every process PROC.
46 *
47 * Arguments
48 * =========
49 *
50 * N (global input) INTEGER
51 * On entry, N specifies the number of rows/columns being dealt
52 * out. N must be at least zero.
53 *
54 * I (global input) INTEGER
55 * On entry, I specifies the global index of the matrix entry.
56 * I must be at least zero.
57 *
58 * INB (global input) INTEGER
59 * On entry, INB specifies the size of the first block of the
60 * global matrix distribution. INB must be at least one.
61 *
62 * NB (global input) INTEGER
63 * On entry, NB specifies the size of the blocks used to parti-
64 * tion the matrix. NB must be at least one.
65 *
66 * PROC (local input) INTEGER
67 * On entry, PROC specifies the coordinate of the process whose
68 * local portion is determined. PROC must be at least zero and
69 * strictly less than NPROCS.
70 *
71 * SRCPROC (global input) INTEGER
72 * On entry, SRCPROC specifies the coordinate of the process
73 * that possesses the first row or column of the matrix. When
74 * SRCPROC = -1, the data is not distributed but replicated,
75 * otherwise SRCPROC must be at least zero and strictly less
76 * than NPROCS.
77 *
78 * NPROCS (global input) INTEGER
79 * On entry, NPROCS specifies the total number of process rows
80 * or columns over which the matrix is distributed. NPROCS must
81 * be at least one.
82 *
83 * -- Written on April 1, 1998 by
84 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
85 *
86 * ---------------------------------------------------------------------
87 */
88 /*
89 * .. Local Scalars ..
90 */
91  int ilocblk, mydist, nblocks;
92 /* ..
93 * .. Executable Statements ..
94 *
95 */
96  if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) )
97 /*
98 * The data is not distributed, or there is just one process in this dimension
99 * of the grid.
100 */
101  return( 0 );
102 /*
103 * Compute coordinate of process owning I and corresponding INB
104 */
105  if( ( INB -= I ) <= 0 )
106  {
107 /*
108 * I is not in first block, find out which process has it and update size of
109 * first block
110 */
111  nblocks = ( -INB ) / NB + 1;
112  SRCPROC += nblocks;
113  SRCPROC -= ( SRCPROC / NPROCS ) * NPROCS;
114  INB += nblocks * NB;
115  }
116 /*
117 * Now everything is just like N, I=0, INB, NB, SRCPROC, NPROCS. If the source
118 * process owns the N rows or columns, nothing follows me ...
119 */
120  if( N <= INB ) return( 0 );
121 /*
122 * The discussion goes as follows: compute my distance from the source process
123 * so that within this process coordinate system, the source process is the
124 * process such that mydist = 0, or equivalently PROC == SRCPROC.
125 *
126 * Find out how many full blocks are globally (nblocks) and locally (ilocblk)
127 * in those N entries. Then remark that
128 *
129 * when mydist < nblocks - ilocblk * NPROCS, I own ilocblk + 1 full blocks,
130 * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks,
131 * when mydist = nblocks - ilocblk * NPROCS, either the last block is not full
132 * and I own it, or the last block is full and I am the first process owning
133 * only ilocblk full blocks.
134 */
135  nblocks = ( N - INB ) / NB + 1;
136
137  if( PROC == SRCPROC )
138  {
139 /*
140 * First note that I cannot be the source and the last process because mydist=0
141 * and NPROCS > 1. Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there
142 * are only two possible cases:
143 *
144 * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS divides
145 * the global number of full blocks, then the source process SRCPROC owns
146 * one more block than the other processes; Thus, N can be rewritten as
147 * N = INB + (nblocks-1) * NB + LNB with LNB >= 0 size of the last block.
148 * Similarly, the local value Np corresponding to the local number of rows
149 * and columns owned by the source process is INB + (ilocblk-1)*NB + LNB,
150 * that is N + ( ilocblk-1 - (nblocks-1) )*NB. Therefore, there must be
151 * ( nblocks - ilocblk ) * NB rows or columns following me. Note that this
152 * case cannot happen when ilocblk is zero, since nblocks is at least one.
153 *
154 * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only owns
155 * full blocks, and therefore locally INB + ilocblk * NB rows or columns.
156 * Thus, N - INB - ilocblk * NB rows or columns follow me. Note that when
157 * ilocblk is zero, this becomes simply N - INB.
158 */
159  if( nblocks < NPROCS ) return( N - INB );
160
161  ilocblk = nblocks / NPROCS;
162  return( ( ( nblocks - ilocblk * NPROCS ) ? N - INB - ilocblk * NB :
163  ( nblocks - ilocblk ) * NB ) );
164  }
165  else
166  {
167 /*
168 * I am not the source process. Compute my distance from the source process.
169 */
170  if( ( mydist = PROC - SRCPROC ) < 0 ) mydist += NPROCS;
171 /*
172 * If I am the last process i.e. mydist = NPROCS - 1, nothing follows me.
173 */
174  if( mydist == NPROCS - 1 ) return( 0 );
175 /*
176 * Otherwise, when mydist >= nblocks - ilocblk * NPROCS, there are exactly
177 * NB * ilocblk * ( NPROCS - mydist ) rows or columns after me including mine,
178 * i.e NB * ilocblk * ( NPROCS - 1 - mydist ) rows or columns following me.
179 * Finally, when 0 < mydist < nblocks - ilocblk * NPROCS, the number of rows
180 * or columns preceeding me is INB + ilocblk * NB + mydist*( ilocblk+1 )*NB
181 * including mine, therefore there are N-INB-NB*( ilocblk+mydist*(ilocblk+1) )
182 * rows or columns following me.
183 */
184  if( nblocks < NPROCS )
185  return( ( ( mydist < nblocks ) ? N - mydist * NB - INB : 0 ) );
186
187  ilocblk = nblocks / NPROCS;
188  return( ( ( mydist >= ( nblocks - ilocblk * NPROCS ) ) ?
189  ( NPROCS - 1 - mydist ) * ilocblk * NB :
190  N - INB - ( ilocblk * mydist + ilocblk + mydist )*NB ) );
191  }
192 /*
193 * End of PB_Cnnxtroc
194 */
195 }
PB_Cnnxtroc
int PB_Cnnxtroc(int N, int I, int INB, int NB, int PROC, int SRCPROC, int NPROCS)
Definition: PB_Cnnxtroc.c:23