ScaLAPACK 2.1  2.1 ScaLAPACK: Scalable Linear Algebra PACKage
PB_CpswapNN.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- PBLAS auxiliary routine (version 2.0) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * April 1, 1998
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "../pblas.h"
14 #include "../PBpblas.h"
15 #include "../PBtools.h"
16 #include "../PBblacs.h"
17 #include "../PBblas.h"
18
19 #ifdef __STDC__
20 void PB_CpswapNN( PBTYP_T * TYPE, int N,
21  char * X, int IX, int JX, int * DESCX, int INCX,
22  char * Y, int IY, int JY, int * DESCY, int INCY )
23 #else
24 void PB_CpswapNN( TYPE, N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY )
25 /*
26 * .. Scalar Arguments ..
27 */
28  int INCX, INCY, IX, IY, JX, JY, N;
29  PBTYP_T * TYPE;
30 /*
31 * .. Array Arguments ..
32 */
33  int * DESCX, * DESCY;
34  char * X, * Y;
35 #endif
36 {
37 /*
38 * Purpose
39 * =======
40 *
41 * PB_CpswapNN swaps two subvectors,
42 *
43 * sub( Y ) := sub( X ) and sub( X ) := sub( Y )
44 *
45 * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
46 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
47 *
48 * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
49 * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
50 *
51 * Both subvectors are assumed to be not distributed.
52 *
53 * Notes
54 * =====
55 *
56 * A description vector is associated with each 2D block-cyclicly dis-
57 * tributed matrix. This vector stores the information required to
58 * establish the mapping between a matrix entry and its corresponding
59 * process and memory location.
60 *
61 * In the following comments, the character _ should be read as
62 * "of the distributed matrix". Let A be a generic term for any 2D
63 * block cyclicly distributed matrix. Its description vector is DESC_A:
64 *
65 * NOTATION STORED IN EXPLANATION
66 * ---------------- --------------- ------------------------------------
67 * DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
68 * CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
69 * the NPROW x NPCOL BLACS process grid
70 * A is distributed over. The context
71 * itself is global, but the handle
72 * (the integer value) may vary.
73 * M_A (global) DESCA[ M_ ] The number of rows in the distribu-
74 * ted matrix A, M_A >= 0.
75 * N_A (global) DESCA[ N_ ] The number of columns in the distri-
76 * buted matrix A, N_A >= 0.
77 * IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
78 * block of the matrix A, IMB_A > 0.
79 * INB_A (global) DESCA[ INB_ ] The number of columns of the upper
80 * left block of the matrix A,
81 * INB_A > 0.
82 * MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
83 * bute the last M_A-IMB_A rows of A,
84 * MB_A > 0.
85 * NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
86 * bute the last N_A-INB_A columns of
87 * A, NB_A > 0.
88 * RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
89 * row of the matrix A is distributed,
90 * NPROW > RSRC_A >= 0.
91 * CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
92 * first column of A is distributed.
93 * NPCOL > CSRC_A >= 0.
94 * LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
95 * array storing the local blocks of
96 * the distributed matrix A,
97 * IF( Lc( 1, N_A ) > 0 )
98 * LLD_A >= MAX( 1, Lr( 1, M_A ) )
99 * ELSE
100 * LLD_A >= 1.
101 *
102 * Let K be the number of rows of a matrix A starting at the global in-
103 * dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
104 * that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
105 * receive if these K rows were distributed over NPROW processes. If K
106 * is the number of columns of a matrix A starting at the global index
107 * JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
108 * lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
109 * these K columns were distributed over NPCOL processes.
110 *
111 * The values of Lr() and Lc() may be determined via a call to the func-
112 * tion PB_Cnumroc:
113 * Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
114 * Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
115 *
116 * Arguments
117 * =========
118 *
119 * TYPE (local input) pointer to a PBTYP_T structure
120 * On entry, TYPE is a pointer to a structure of type PBTYP_T,
121 * that contains type information (See pblas.h).
122 *
123 * N (global input) INTEGER
124 * On entry, N specifies the length of the subvectors to be
125 * swapped. N must be at least zero.
126 *
127 * X (local input/local output) pointer to CHAR
128 * On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
129 * is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
130 * MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least
131 * Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
132 * Before entry, this array contains the local entries of the
133 * matrix X. On exit, sub( X ) is overwritten with sub( Y ).
134 *
135 * IX (global input) INTEGER
136 * On entry, IX specifies X's global row index, which points to
137 * the beginning of the submatrix sub( X ).
138 *
139 * JX (global input) INTEGER
140 * On entry, JX specifies X's global column index, which points
141 * to the beginning of the submatrix sub( X ).
142 *
143 * DESCX (global and local input) INTEGER array
144 * On entry, DESCX is an integer array of dimension DLEN_. This
145 * is the array descriptor for the matrix X.
146 *
147 * INCX (global input) INTEGER
148 * On entry, INCX specifies the global increment for the
149 * elements of X. Only two values of INCX are supported in
150 * this version, namely 1 and M_X. INCX must not be zero.
151 *
152 * Y (local input/local output) pointer to CHAR
153 * On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
154 * is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
155 * MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least
156 * Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
157 * Before entry, this array contains the local entries of the
158 * matrix Y. On exit, sub( Y ) is overwritten with sub( X ).
159 *
160 * IY (global input) INTEGER
161 * On entry, IY specifies Y's global row index, which points to
162 * the beginning of the submatrix sub( Y ).
163 *
164 * JY (global input) INTEGER
165 * On entry, JY specifies Y's global column index, which points
166 * to the beginning of the submatrix sub( Y ).
167 *
168 * DESCY (global and local input) INTEGER array
169 * On entry, DESCY is an integer array of dimension DLEN_. This
170 * is the array descriptor for the matrix Y.
171 *
172 * INCY (global input) INTEGER
173 * On entry, INCY specifies the global increment for the
174 * elements of Y. Only two values of INCY are supported in
175 * this version, namely 1 and M_Y. INCY must not be zero.
176 *
177 * -- Written on April 1, 1998 by
178 * Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
179 *
180 * ---------------------------------------------------------------------
181 */
182 /*
183 * .. Local Scalars ..
184 */
185  char Xscope, Yscope, * top;
186  int RRorCC, XYm, XYn, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc,
187  XmyprocD, XmyprocR, XnprocsR, XprocR, Xrow, Ycol, Yii, YisR,
188  YisRow, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnprocsR, YprocR,
189  Yrow, csrc, ctxt, mycol, myrow, npcol, nprow, rsrc, size;
190 /* ..
191 * .. Executable Statements ..
192 *
193 */
194 /*
195 * Retrieve process grid information
196 */
197  Cblacs_gridinfo( ( ctxt = DESCX[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
198 /*
199 * Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ...
200 */
201  PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj,
202  &Xrow, &Xcol );
203  if( ( XisRow = ( INCX == DESCX[M_] ) ) != 0 )
204  {
205  Xld = DESCX[LLD_]; Xlinc = Xld;
206  XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow;
207  XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) );
208  }
209  else
210  {
211  Xld = DESCX[LLD_]; Xlinc = 1;
212  XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol;
213  XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) );
214  }
215 /*
216 * Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ...
217 */
218  PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj,
219  &Yrow, &Ycol );
220  if( ( YisRow = ( INCY == DESCY[M_] ) ) != 0 )
221  {
222  Yld = DESCY[LLD_]; Ylinc = Yld;
223  YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow;
224  YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) );
225  }
226  else
227  {
228  Yld = DESCY[LLD_]; Ylinc = 1;
229  YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol;
230  YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) );
231  }
232 /*
233 * Are sub( X ) and sub( Y ) both row or column vectors ?
234 */
235  RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) );
236 /*
237 * Neither sub( X ) nor sub( Y ) are distributed
238 */
239  if( !XisR )
240  {
241 /*
242 * sub( X ) is not replicated
243 */
244  if( !( YisR ) )
245  {
246 /*
247 * sub( Y ) is not replicated
248 */
249  if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) )
250 /*
251 * If I am not in XprocR or YprocR, then return immediately
252 */
253  return;
254
255  size = TYPE->size;
256
257  if( RRorCC )
258  {
259 /*
260 * sub( X ) and sub( Y ) are both row or column vectors
261 */
262  if( XprocR == YprocR )
263  {
264 /*
265 * sub( X ) and sub( Y ) are in the same process row or column
266 */
267  TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y,
268  Yii, Yjj, Yld, size ), &Ylinc );
269  }
270  else
271  {
272 /*
273 * sub( X ) and sub( Y ) are in a different process row or column
274 */
275  if( XmyprocR == XprocR )
276  {
277 /*
278 * Send sub( X ) to where sub( Y ) resides, and receive sub( Y ) from the same
279 * location.
280 */
281  if( XisRow )
282  {
283  TYPE->Cgesd2d( ctxt, 1, N, Mptr( X, Xii, Xjj, Xld, size ),
284  Xld, YprocR, XmyprocD );
285  TYPE->Cgerv2d( ctxt, 1, N, Mptr( X, Xii, Xjj, Xld, size ),
286  Xld, YprocR, XmyprocD );
287  }
288  else
289  {
290  TYPE->Cgesd2d( ctxt, N, 1, Mptr( X, Xii, Xjj, Xld, size ),
291  Xld, XmyprocD, YprocR );
292  TYPE->Cgerv2d( ctxt, N, 1, Mptr( X, Xii, Xjj, Xld, size ),
293  Xld, XmyprocD, YprocR );
294  }
295  }
296
297  if( YmyprocR == YprocR )
298  {
299 /*
300 * Send sub( Y ) to where sub( X ) resides, and receive sub( X ) from the same
301 * location.
302 */
303  if( YisRow )
304  {
305  TYPE->Cgesd2d( ctxt, 1, N, Mptr( Y, Yii, Yjj, Yld, size ),
306  Yld, XprocR, YmyprocD );
307  TYPE->Cgerv2d( ctxt, 1, N, Mptr( Y, Yii, Yjj, Yld, size ),
308  Yld, XprocR, YmyprocD );
309  }
310  else
311  {
312  TYPE->Cgesd2d( ctxt, N, 1, Mptr( Y, Yii, Yjj, Yld, size ),
313  Yld, YmyprocD, XprocR );
314  TYPE->Cgerv2d( ctxt, N, 1, Mptr( Y, Yii, Yjj, Yld, size ),
315  Yld, YmyprocD, XprocR );
316  }
317  }
318  }
319  }
320  else
321  {
322 /*
323 * sub( X ) and sub( Y ) are not both row or column vectors
324 */
325  if( XisRow )
326  {
327  XYm = 1; XYn = N;
328  Xscope = CROW; Yscope = CCOLUMN;
329  rsrc = XprocR; csrc = YprocR;
330  }
331  else
332  {
333  XYm = N; XYn = 1;
334  Xscope = CCOLUMN; Yscope = CROW;
335  rsrc = YprocR; csrc = XprocR;
336  }
337
338  if( ( XmyprocR == XprocR ) && ( YmyprocR == YprocR ) )
339  {
340 /*
341 * If I am at the intersection of the process row and column, then swap and
342 * broadcast sub( X ) and sub( Y ) in their respective process scope.
343 */
344  TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc,
345  Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc );
346  top = PB_Ctop( &ctxt, BCAST, &Xscope, TOP_GET );
347  TYPE->Cgebs2d( ctxt, &Xscope, top, XYm, XYn, Mptr( X, Xii, Xjj,
348  Xld, size ), Xld );
349  top = PB_Ctop( &ctxt, BCAST, &Yscope, TOP_GET );
350  TYPE->Cgebs2d( ctxt, &Yscope, top, XYn, XYm, Mptr( Y, Yii, Yjj,
351  Yld, size ), Yld );
352  }
353  else if( XmyprocR == XprocR )
354  {
355  top = PB_Ctop( &ctxt, BCAST, &Xscope, TOP_GET );
356  TYPE->Cgebr2d( ctxt, &Xscope, top, XYm, XYn, Mptr( X, Xii, Xjj,
357  Xld, size ), Xld, rsrc, csrc );
358  }
359  else if( YmyprocR == YprocR )
360  {
361  top = PB_Ctop( &ctxt, BCAST, &Yscope, TOP_GET );
362  TYPE->Cgebr2d( ctxt, &Yscope, top, XYn, XYm, Mptr( Y, Yii, Yjj,
363  Yld, size ), Yld, rsrc, csrc );
364  }
365  }
366  }
367  else
368  {
369 /*
370 * sub( Y ) is replicated
371 */
372  size = TYPE->size;
373
374  if( YisRow ) { XYm = 1; XYn = N; }
375  else { XYm = N; XYn = 1; }
376
377  if( XmyprocR == XprocR )
378  {
379 /*
380 * If I am in the process row (resp. column) owning sub( X ), then swap and
381 * broadcast sub( Y ) in my column (resp. row).
382 */
383  TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y,
384  Yii, Yjj, Yld, size ), &Ylinc );
385
386  if( XisRow )
387  {
388  top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
389  TYPE->Cgebs2d( ctxt, COLUMN, top, XYm, XYn, Mptr( Y, Yii, Yjj,
390  Yld, size ), Yld );
391  }
392  else
393  {
394  top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
395  TYPE->Cgebs2d( ctxt, ROW, top, XYm, XYn, Mptr( Y, Yii, Yjj,
396  Yld, size ), Yld );
397  }
398  }
399  else
400  {
401 /*
402 * Otherwise, receive sub( Y )
403 */
404  if( XisRow )
405  {
406  top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
407  TYPE->Cgebr2d( ctxt, COLUMN, top, XYm, XYn, Mptr( Y, Yii, Yjj,
408  Yld, size ), Yld, XprocR, XmyprocD );
409  }
410  else
411  {
412  top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
413  TYPE->Cgebr2d( ctxt, ROW, top, XYm, XYn, Mptr( Y, Yii, Yjj,
414  Yld, size ), Yld, XmyprocD, XprocR );
415  }
416  }
417  }
418  }
419  else
420  {
421 /*
422 * sub( X ) is replicated
423 */
424  size = TYPE->size;
425
426  if( YisR || ( YmyprocR == YprocR ) )
427  {
428 /*
429 * If I own a piece of sub( Y ), then swap
430 */
431  TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii,
432  Yjj, Yld, size ), &Ylinc );
433  }
434
435  if( !YisR )
436  {
437 /*
438 * If sub( Y ) is not replicated, then broadcast the result to the other
439 * processes that own a piece of sub( X ), but were not involved in the
440 * above swap operation.
441 */
442  if( XisRow ) { XYm = 1; XYn = N; }
443  else { XYm = N; XYn = 1; }
444
445  if( YisRow )
446  {
447  top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
448  if( YmyprocR == YprocR )
449  TYPE->Cgebs2d( ctxt, COLUMN, top, XYm, XYn, Mptr( X, Xii, Xjj,
450  Xld, size ), Xld );
451  else
452  TYPE->Cgebr2d( ctxt, COLUMN, top, XYm, XYn, Mptr( X, Xii, Xjj,
453  Xld, size ), Xld, YprocR, YmyprocD );
454  }
455  else
456  {
457  top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
458  if( YmyprocR == YprocR )
459  TYPE->Cgebs2d( ctxt, ROW, top, XYm, XYn, Mptr( X, Xii, Xjj,
460  Xld, size ), Xld );
461  else
462  TYPE->Cgebr2d( ctxt, ROW, top, XYm, XYn, Mptr( X, Xii, Xjj,
463  Xld, size ), Xld, YmyprocD, YprocR );
464  }
465  }
466  }
467 /*
468 * End of PB_CpswapNN
469 */
470 }
M_
#define M_
Definition: PBtools.h:39
TYPE
#define TYPE
Definition: clamov.c:7
ROW
#define ROW
Definition: PBblacs.h:46
COLUMN
#define COLUMN
Definition: PBblacs.h:45
LLD_
#define LLD_
Definition: PBtools.h:47
CROW
#define CROW
Definition: PBblacs.h:21
TOP_GET
#define TOP_GET
Definition: PBblacs.h:50
PB_Ctop
char * PB_Ctop()
BCAST
#define BCAST
Definition: PBblacs.h:48
PB_Cinfog2l
void PB_Cinfog2l()
PB_CpswapNN
void PB_CpswapNN(PBTYP_T *TYPE, int N, char *X, int IX, int JX, int *DESCX, int INCX, char *Y, int IY, int JY, int *DESCY, int INCY)
Definition: PB_CpswapNN.c:24
CCOLUMN
#define CCOLUMN
Definition: PBblacs.h:20
Cblacs_gridinfo
void Cblacs_gridinfo()
PBTYP_T
Definition: pblas.h:325
Mptr
#define Mptr(a_, i_, j_, lda_, siz_)
Definition: PBtools.h:132
CTXT_
#define CTXT_
Definition: PBtools.h:38