SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
PB_CpswapNN.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_CpswapNN( PBTYP_T * TYPE, Int N,
21 char * X, Int IX, Int JX, Int * DESCX, Int INCX,
22 char * Y, Int IY, Int JY, Int * DESCY, Int INCY )
23#else
24void PB_CpswapNN( TYPE, N, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY, INCY )
25/*
26* .. Scalar Arguments ..
27*/
28 Int INCX, INCY, IX, IY, JX, JY, N;
29 PBTYP_T * TYPE;
30/*
31* .. Array Arguments ..
32*/
33 Int * DESCX, * DESCY;
34 char * X, * Y;
35#endif
36{
37/*
38* Purpose
39* =======
40*
41* PB_CpswapNN swaps two subvectors,
42*
43* sub( Y ) := sub( X ) and sub( X ) := sub( Y )
44*
45* where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
46* X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
47*
48* sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
49* Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
50*
51* Both subvectors are assumed to be not distributed.
52*
53* Notes
54* =====
55*
56* A description vector is associated with each 2D block-cyclicly dis-
57* tributed matrix. This vector stores the information required to
58* establish the mapping between a matrix entry and its corresponding
59* process and memory location.
60*
61* In the following comments, the character _ should be read as
62* "of the distributed matrix". Let A be a generic term for any 2D
63* block cyclicly distributed matrix. Its description vector is DESC_A:
64*
65* NOTATION STORED IN EXPLANATION
66* ---------------- --------------- ------------------------------------
67* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
68* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
69* the NPROW x NPCOL BLACS process grid
70* A is distributed over. The context
71* itself is global, but the handle
72* (the integer value) may vary.
73* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
74* ted matrix A, M_A >= 0.
75* N_A (global) DESCA[ N_ ] The number of columns in the distri-
76* buted matrix A, N_A >= 0.
77* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
78* block of the matrix A, IMB_A > 0.
79* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
80* left block of the matrix A,
81* INB_A > 0.
82* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
83* bute the last M_A-IMB_A rows of A,
84* MB_A > 0.
85* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
86* bute the last N_A-INB_A columns of
87* A, NB_A > 0.
88* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
89* row of the matrix A is distributed,
90* NPROW > RSRC_A >= 0.
91* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
92* first column of A is distributed.
93* NPCOL > CSRC_A >= 0.
94* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
95* array storing the local blocks of
96* the distributed matrix A,
97* IF( Lc( 1, N_A ) > 0 )
98* LLD_A >= MAX( 1, Lr( 1, M_A ) )
99* ELSE
100* LLD_A >= 1.
101*
102* Let K be the number of rows of a matrix A starting at the global in-
103* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
104* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
105* receive if these K rows were distributed over NPROW processes. If K
106* is the number of columns of a matrix A starting at the global index
107* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
108* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
109* these K columns were distributed over NPCOL processes.
110*
111* The values of Lr() and Lc() may be determined via a call to the func-
112* tion PB_Cnumroc:
113* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
114* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
115*
116* Arguments
117* =========
118*
119* TYPE (local input) pointer to a PBTYP_T structure
120* On entry, TYPE is a pointer to a structure of type PBTYP_T,
121* that contains type information (See pblas.h).
122*
123* N (global input) INTEGER
124* On entry, N specifies the length of the subvectors to be
125* swapped. N must be at least zero.
126*
127* X (local input/local output) pointer to CHAR
128* On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
129* is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
130* MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least
131* Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
132* Before entry, this array contains the local entries of the
133* matrix X. On exit, sub( X ) is overwritten with sub( Y ).
134*
135* IX (global input) INTEGER
136* On entry, IX specifies X's global row index, which points to
137* the beginning of the submatrix sub( X ).
138*
139* JX (global input) INTEGER
140* On entry, JX specifies X's global column index, which points
141* to the beginning of the submatrix sub( X ).
142*
143* DESCX (global and local input) INTEGER array
144* On entry, DESCX is an integer array of dimension DLEN_. This
145* is the array descriptor for the matrix X.
146*
147* INCX (global input) INTEGER
148* On entry, INCX specifies the global increment for the
149* elements of X. Only two values of INCX are supported in
150* this version, namely 1 and M_X. INCX must not be zero.
151*
152* Y (local input/local output) pointer to CHAR
153* On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
154* is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
155* MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least
156* Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
157* Before entry, this array contains the local entries of the
158* matrix Y. On exit, sub( Y ) is overwritten with sub( X ).
159*
160* IY (global input) INTEGER
161* On entry, IY specifies Y's global row index, which points to
162* the beginning of the submatrix sub( Y ).
163*
164* JY (global input) INTEGER
165* On entry, JY specifies Y's global column index, which points
166* to the beginning of the submatrix sub( Y ).
167*
168* DESCY (global and local input) INTEGER array
169* On entry, DESCY is an integer array of dimension DLEN_. This
170* is the array descriptor for the matrix Y.
171*
172* INCY (global input) INTEGER
173* On entry, INCY specifies the global increment for the
174* elements of Y. Only two values of INCY are supported in
175* this version, namely 1 and M_Y. INCY must not be zero.
176*
177* -- Written on April 1, 1998 by
178* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
179*
180* ---------------------------------------------------------------------
181*/
182/*
183* .. Local Scalars ..
184*/
185 char Xscope, Yscope, * top;
186 Int RRorCC, XYm, XYn, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc,
187 XmyprocD, XmyprocR, XnprocsR, XprocR, Xrow, Ycol, Yii, YisR,
188 YisRow, Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnprocsR, YprocR,
189 Yrow, csrc, ctxt, mycol, myrow, npcol, nprow, rsrc, size;
190/* ..
191* .. Executable Statements ..
192*
193*/
194/*
195* Retrieve process grid information
196*/
197 Cblacs_gridinfo( ( ctxt = DESCX[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
198/*
199* Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ...
200*/
201 PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj,
202 &Xrow, &Xcol );
203 if( ( XisRow = ( INCX == DESCX[M_] ) ) != 0 )
204 {
205 Xld = DESCX[LLD_]; Xlinc = Xld;
206 XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow;
207 XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) );
208 }
209 else
210 {
211 Xld = DESCX[LLD_]; Xlinc = 1;
212 XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol;
213 XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) );
214 }
215/*
216* Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ...
217*/
218 PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj,
219 &Yrow, &Ycol );
220 if( ( YisRow = ( INCY == DESCY[M_] ) ) != 0 )
221 {
222 Yld = DESCY[LLD_]; Ylinc = Yld;
223 YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow;
224 YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) );
225 }
226 else
227 {
228 Yld = DESCY[LLD_]; Ylinc = 1;
229 YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol;
230 YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) );
231 }
232/*
233* Are sub( X ) and sub( Y ) both row or column vectors ?
234*/
235 RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) );
236/*
237* Neither sub( X ) nor sub( Y ) are distributed
238*/
239 if( !XisR )
240 {
241/*
242* sub( X ) is not replicated
243*/
244 if( !( YisR ) )
245 {
246/*
247* sub( Y ) is not replicated
248*/
249 if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) )
250/*
251* If I am not in XprocR or YprocR, then return immediately
252*/
253 return;
254
255 size = TYPE->size;
256
257 if( RRorCC )
258 {
259/*
260* sub( X ) and sub( Y ) are both row or column vectors
261*/
262 if( XprocR == YprocR )
263 {
264/*
265* sub( X ) and sub( Y ) are in the same process row or column
266*/
267 TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y,
268 Yii, Yjj, Yld, size ), &Ylinc );
269 }
270 else
271 {
272/*
273* sub( X ) and sub( Y ) are in a different process row or column
274*/
275 if( XmyprocR == XprocR )
276 {
277/*
278* Send sub( X ) to where sub( Y ) resides, and receive sub( Y ) from the same
279* location.
280*/
281 if( XisRow )
282 {
283 TYPE->Cgesd2d( ctxt, 1, N, Mptr( X, Xii, Xjj, Xld, size ),
284 Xld, YprocR, XmyprocD );
285 TYPE->Cgerv2d( ctxt, 1, N, Mptr( X, Xii, Xjj, Xld, size ),
286 Xld, YprocR, XmyprocD );
287 }
288 else
289 {
290 TYPE->Cgesd2d( ctxt, N, 1, Mptr( X, Xii, Xjj, Xld, size ),
291 Xld, XmyprocD, YprocR );
292 TYPE->Cgerv2d( ctxt, N, 1, Mptr( X, Xii, Xjj, Xld, size ),
293 Xld, XmyprocD, YprocR );
294 }
295 }
296
297 if( YmyprocR == YprocR )
298 {
299/*
300* Send sub( Y ) to where sub( X ) resides, and receive sub( X ) from the same
301* location.
302*/
303 if( YisRow )
304 {
305 TYPE->Cgesd2d( ctxt, 1, N, Mptr( Y, Yii, Yjj, Yld, size ),
306 Yld, XprocR, YmyprocD );
307 TYPE->Cgerv2d( ctxt, 1, N, Mptr( Y, Yii, Yjj, Yld, size ),
308 Yld, XprocR, YmyprocD );
309 }
310 else
311 {
312 TYPE->Cgesd2d( ctxt, N, 1, Mptr( Y, Yii, Yjj, Yld, size ),
313 Yld, YmyprocD, XprocR );
314 TYPE->Cgerv2d( ctxt, N, 1, Mptr( Y, Yii, Yjj, Yld, size ),
315 Yld, YmyprocD, XprocR );
316 }
317 }
318 }
319 }
320 else
321 {
322/*
323* sub( X ) and sub( Y ) are not both row or column vectors
324*/
325 if( XisRow )
326 {
327 XYm = 1; XYn = N;
328 Xscope = CROW; Yscope = CCOLUMN;
329 rsrc = XprocR; csrc = YprocR;
330 }
331 else
332 {
333 XYm = N; XYn = 1;
334 Xscope = CCOLUMN; Yscope = CROW;
335 rsrc = YprocR; csrc = XprocR;
336 }
337
338 if( ( XmyprocR == XprocR ) && ( YmyprocR == YprocR ) )
339 {
340/*
341* If I am at the intersection of the process row and column, then swap and
342* broadcast sub( X ) and sub( Y ) in their respective process scope.
343*/
344 TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc,
345 Mptr( Y, Yii, Yjj, Yld, size ), &Ylinc );
346 top = PB_Ctop( &ctxt, BCAST, &Xscope, TOP_GET );
347 TYPE->Cgebs2d( ctxt, &Xscope, top, XYm, XYn, Mptr( X, Xii, Xjj,
348 Xld, size ), Xld );
349 top = PB_Ctop( &ctxt, BCAST, &Yscope, TOP_GET );
350 TYPE->Cgebs2d( ctxt, &Yscope, top, XYn, XYm, Mptr( Y, Yii, Yjj,
351 Yld, size ), Yld );
352 }
353 else if( XmyprocR == XprocR )
354 {
355 top = PB_Ctop( &ctxt, BCAST, &Xscope, TOP_GET );
356 TYPE->Cgebr2d( ctxt, &Xscope, top, XYm, XYn, Mptr( X, Xii, Xjj,
357 Xld, size ), Xld, rsrc, csrc );
358 }
359 else if( YmyprocR == YprocR )
360 {
361 top = PB_Ctop( &ctxt, BCAST, &Yscope, TOP_GET );
362 TYPE->Cgebr2d( ctxt, &Yscope, top, XYn, XYm, Mptr( Y, Yii, Yjj,
363 Yld, size ), Yld, rsrc, csrc );
364 }
365 }
366 }
367 else
368 {
369/*
370* sub( Y ) is replicated
371*/
372 size = TYPE->size;
373
374 if( YisRow ) { XYm = 1; XYn = N; }
375 else { XYm = N; XYn = 1; }
376
377 if( XmyprocR == XprocR )
378 {
379/*
380* If I am in the process row (resp. column) owning sub( X ), then swap and
381* broadcast sub( Y ) in my column (resp. row).
382*/
383 TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y,
384 Yii, Yjj, Yld, size ), &Ylinc );
385
386 if( XisRow )
387 {
388 top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
389 TYPE->Cgebs2d( ctxt, COLUMN, top, XYm, XYn, Mptr( Y, Yii, Yjj,
390 Yld, size ), Yld );
391 }
392 else
393 {
394 top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
395 TYPE->Cgebs2d( ctxt, ROW, top, XYm, XYn, Mptr( Y, Yii, Yjj,
396 Yld, size ), Yld );
397 }
398 }
399 else
400 {
401/*
402* Otherwise, receive sub( Y )
403*/
404 if( XisRow )
405 {
406 top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
407 TYPE->Cgebr2d( ctxt, COLUMN, top, XYm, XYn, Mptr( Y, Yii, Yjj,
408 Yld, size ), Yld, XprocR, XmyprocD );
409 }
410 else
411 {
412 top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
413 TYPE->Cgebr2d( ctxt, ROW, top, XYm, XYn, Mptr( Y, Yii, Yjj,
414 Yld, size ), Yld, XmyprocD, XprocR );
415 }
416 }
417 }
418 }
419 else
420 {
421/*
422* sub( X ) is replicated
423*/
424 size = TYPE->size;
425
426 if( YisR || ( YmyprocR == YprocR ) )
427 {
428/*
429* If I own a piece of sub( Y ), then swap
430*/
431 TYPE->Fswap( &N, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii,
432 Yjj, Yld, size ), &Ylinc );
433 }
434
435 if( !YisR )
436 {
437/*
438* If sub( Y ) is not replicated, then broadcast the result to the other
439* processes that own a piece of sub( X ), but were not involved in the
440* above swap operation.
441*/
442 if( XisRow ) { XYm = 1; XYn = N; }
443 else { XYm = N; XYn = 1; }
444
445 if( YisRow )
446 {
447 top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
448 if( YmyprocR == YprocR )
449 TYPE->Cgebs2d( ctxt, COLUMN, top, XYm, XYn, Mptr( X, Xii, Xjj,
450 Xld, size ), Xld );
451 else
452 TYPE->Cgebr2d( ctxt, COLUMN, top, XYm, XYn, Mptr( X, Xii, Xjj,
453 Xld, size ), Xld, YprocR, YmyprocD );
454 }
455 else
456 {
457 top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
458 if( YmyprocR == YprocR )
459 TYPE->Cgebs2d( ctxt, ROW, top, XYm, XYn, Mptr( X, Xii, Xjj,
460 Xld, size ), Xld );
461 else
462 TYPE->Cgebr2d( ctxt, ROW, top, XYm, XYn, Mptr( X, Xii, Xjj,
463 Xld, size ), Xld, YmyprocD, YprocR );
464 }
465 }
466 }
467/*
468* End of PB_CpswapNN
469*/
470}
#define Int
Definition Bconfig.h:22
#define CCOLUMN
Definition PBblacs.h:20
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define BCAST
Definition PBblacs.h:48
#define CTXT_
Definition PBtools.h:38
void PB_Cinfog2l()
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
char * PB_Ctop()
void PB_CpswapNN()
#define M_
Definition PBtools.h:39
#define TYPE
Definition clamov.c:7