SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_CpaxpbyNN.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_CpaxpbyNN( PBTYP_T * TYPE, char * CONJUG, Int M, Int N,
21 char * ALPHA,
22 char * A, Int IA, Int JA, Int * DESCA, char * AROC,
23 char * BETA,
24 char * B, Int IB, Int JB, Int * DESCB, char * BROC )
25#else
26void PB_CpaxpbyNN( TYPE, CONJUG, M, N, ALPHA, A, IA, JA, DESCA, AROC,
27 BETA, B, IB, JB, DESCB, BROC )
28/*
29* .. Scalar Arguments ..
30*/
31 char * AROC, * BROC, * CONJUG;
32 Int IA, IB, JA, JB, M, N;
33 char * ALPHA, * BETA;
34 PBTYP_T * TYPE;
35/*
36* .. Array Arguments ..
37*/
38 Int * DESCA, * DESCB;
39 char * A, * B;
40#endif
41{
42/*
43* Purpose
44* =======
45*
46* PB_CpaxpbyNN adds one submatrix to another,
47*
48* sub( B ) := beta * sub( B ) + alpha * sub( A ), or,
49*
50* sub( B ) := beta * sub( B ) + alpha * conjg( sub( A ) ),
51*
52* where both submatrices are not distributed; sub( A ) always denotes
53* A(IA:IA+M-1,JA:JA+N-1). When AROC is 'R' or 'r' sub( A ) resides in
54* a process row, otherwise sub( A ) resides in a process column. When
55* sub( A ) resides in a process row and BROC is 'R' or 'r' or
56* sub( A ) resides in a process column and BROC is 'C' or 'c', then
57* sub( B ) denotes B(IB:IB+M-1,JB:JB+N-1), and B(IB:IB+N-1,JB:JB+M-1)
58* otherwise.
59*
60* Notes
61* =====
62*
63* A description vector is associated with each 2D block-cyclicly dis-
64* tributed matrix. This vector stores the information required to
65* establish the mapping between a matrix entry and its corresponding
66* process and memory location.
67*
68* In the following comments, the character _ should be read as
69* "of the distributed matrix". Let A be a generic term for any 2D
70* block cyclicly distributed matrix. Its description vector is DESC_A:
71*
72* NOTATION STORED IN EXPLANATION
73* ---------------- --------------- ------------------------------------
74* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
75* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
76* the NPROW x NPCOL BLACS process grid
77* A is distributed over. The context
78* itself is global, but the handle
79* (the integer value) may vary.
80* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
81* ted matrix A, M_A >= 0.
82* N_A (global) DESCA[ N_ ] The number of columns in the distri-
83* buted matrix A, N_A >= 0.
84* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
85* block of the matrix A, IMB_A > 0.
86* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
87* left block of the matrix A,
88* INB_A > 0.
89* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
90* bute the last M_A-IMB_A rows of A,
91* MB_A > 0.
92* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
93* bute the last N_A-INB_A columns of
94* A, NB_A > 0.
95* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
96* row of the matrix A is distributed,
97* NPROW > RSRC_A >= 0.
98* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
99* first column of A is distributed.
100* NPCOL > CSRC_A >= 0.
101* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
102* array storing the local blocks of
103* the distributed matrix A,
104* IF( Lc( 1, N_A ) > 0 )
105* LLD_A >= MAX( 1, Lr( 1, M_A ) )
106* ELSE
107* LLD_A >= 1.
108*
109* Let K be the number of rows of a matrix A starting at the global in-
110* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
111* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
112* receive if these K rows were distributed over NPROW processes. If K
113* is the number of columns of a matrix A starting at the global index
114* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
115* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
116* these K columns were distributed over NPCOL processes.
117*
118* The values of Lr() and Lc() may be determined via a call to the func-
119* tion PB_Cnumroc:
120* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
121* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
122*
123* Arguments
124* =========
125*
126* TYPE (local input) pointer to a PBTYP_T structure
127* On entry, TYPE is a pointer to a structure of type PBTYP_T,
128* that contains type information (See pblas.h).
129*
130* CONJUG (global input) pointer to CHAR
131* On entry, CONJUG specifies whether conjg( sub( A ) ) or
132* sub( A ) should be added to sub( B ) as follows:
133* CONJUG = 'N' or 'n':
134* sub( B ) := beta*sub( B ) + alpha*sub( A ),
135* otherwise
136* sub( B ) := beta*sub( B ) + alpha*conjg( sub( A ) ).
137*
138* M (global input) INTEGER
139* On entry, M specifies the number of rows of the submatrix
140* sub( A ). M must be at least zero.
141*
142* N (global input) INTEGER
143* On entry, N specifies the number of columns of the submatrix
144* sub( A ). N must be at least zero.
145*
146* ALPHA (global input) pointer to CHAR
147* On entry, ALPHA specifies the scalar alpha. When ALPHA is
148* supplied as zero then the local entries of the array A cor-
149* responding to the entries of the submatrix sub( A ) need not
150* be set on input.
151*
152* A (local input) pointer to CHAR
153* On entry, A is an array of dimension (LLD_A, Ka), where LLD_A
154* is at least MAX( 1, Lr( 1, IA+M-1 ) ), and, Ka is at least
155* Lc( 1, JA+N-1 ). Before entry, this array contains the local
156* entries of the matrix A.
157*
158* IA (global input) INTEGER
159* On entry, IA specifies A's global row index, which points to
160* the beginning of the submatrix sub( A ).
161*
162* JA (global input) INTEGER
163* On entry, JA specifies A's global column index, which points
164* to the beginning of the submatrix sub( A ).
165*
166* DESCA (global and local input) INTEGER array
167* On entry, DESCA is an integer array of dimension DLEN_. This
168* is the array descriptor for the matrix A.
169*
170* AROC (global input) pointer to CHAR
171* On entry, AROC specifies the orientation of the subvector
172* sub( A ). When AROC is 'R' or 'r', sub( A ) is a row vector,
173* and a column vector otherwise.
174*
175* BETA (global input) pointer to CHAR
176* On entry, BETA specifies the scalar beta. When BETA is sup-
177* plied as zero then the local entries of the array B corres-
178* ponding to the entries of the submatrix sub( B ) need not be
179* set on input.
180*
181* B (local input/local output) pointer to CHAR
182* On entry, B is an array of dimension (LLD_B, Kb), where LLD_B
183* is at least MAX( 1, Lr( 1, IB+M-1 ) ) when sub( A ) and
184* sub( B ) are both distributed along a process column or a
185* process row. In that case, Kb is at least Lc( 1, JB+N-1 ).
186* Otherwise, LLD_B is at least MAX( 1, Lr( 1, IB+N-1 ) ) and
187* Kb is at least Lc( 1, JB+M-1 ). Before entry, this array
188* contains the local entries of the matrix B. On exit, sub( B )
189* is overwritten with the updated submatrix.
190*
191* IB (global input) INTEGER
192* On entry, IB specifies B's global row index, which points to
193* the beginning of the submatrix sub( B ).
194*
195* JB (global input) INTEGER
196* On entry, JB specifies B's global column index, which points
197* to the beginning of the submatrix sub( B ).
198*
199* DESCB (global and local input) INTEGER array
200* On entry, DESCB is an integer array of dimension DLEN_. This
201* is the array descriptor for the matrix B.
202*
203* BROC (global input) pointer to CHAR
204* On entry, BROC specifies the orientation of the subvector
205* sub( B ). When BROC is 'R' or 'r', sub( B ) is a row vector,
206* and a column vector otherwise.
207*
208* -- Written on April 1, 1998 by
209* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
210*
211* ---------------------------------------------------------------------
212*/
213/*
214* .. Local Scalars ..
215*/
216 char scope, * top;
217 Int Acol, Aii, AisR, AisRow, Ajj, Ald, AmyprocD, AmyprocR,
218 AnprocsD, AnprocsR, AprocR, Arow, Bcol, Bii, BisR, BisRow,
219 Bjj, Bld, BmyprocD, BmyprocR, BnprocsD, BnprocsR, BprocR,
220 Brow, RRorCC, csrc, ctxt, iroca, mycol, myrow, npcol, nprow,
221 p, rsrc, size;
222 MMADD_T add;
223/*
224* .. Local Arrays ..
225*/
226 char * buf = NULL;
227/* ..
228* .. Executable Statements ..
229*
230*/
231/*
232* Retrieve process grid information
233*/
234 Cblacs_gridinfo( ( ctxt = DESCA[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
235/*
236* Retrieve sub( A )'s local information: Aii, Ajj, Arow, Acol ...
237*/
238 PB_Cinfog2l( IA, JA, DESCA, nprow, npcol, myrow, mycol, &Aii, &Ajj,
239 &Arow, &Acol );
240 if( ( AisRow = ( Mupcase( AROC[0] ) == CROW ) ) != 0 )
241 {
242 Ald = DESCA[LLD_]; AmyprocD = mycol; AnprocsD = npcol;
243 AprocR = Arow; AmyprocR = myrow; AnprocsR = nprow;
244 AisR = ( ( Arow == -1 ) || ( AnprocsR == 1 ) );
245 }
246 else
247 {
248 Ald = DESCA[LLD_]; AmyprocD = myrow; AnprocsD = nprow;
249 AprocR = Acol; AmyprocR = mycol; AnprocsR = npcol;
250 AisR = ( ( Acol == -1 ) || ( AnprocsR == 1 ) );
251 }
252/*
253* Retrieve sub( B )'s local information: Bii, Bjj, Brow, Bcol ...
254*/
255 PB_Cinfog2l( IB, JB, DESCB, nprow, npcol, myrow, mycol, &Bii, &Bjj,
256 &Brow, &Bcol );
257 if( ( BisRow = ( Mupcase( BROC[0] ) == CROW ) ) != 0 )
258 {
259 Bld = DESCB[LLD_]; BmyprocD = mycol; BnprocsD = npcol;
260 BprocR = Brow; BmyprocR = myrow; BnprocsR = nprow;
261 BisR = ( ( Brow == -1 ) || ( BnprocsR == 1 ) );
262 }
263 else
264 {
265 Bld = DESCB[LLD_]; BmyprocD = myrow; BnprocsD = nprow;
266 BprocR = Bcol; BmyprocR = mycol; BnprocsR = npcol;
267 BisR = ( ( Bcol == -1 ) || ( BnprocsR == 1 ) );
268 }
269/*
270* Are sub( A ) and sub( B ) both row or column vectors ?
271*/
272 RRorCC = ( ( AisRow && BisRow ) || ( !( AisRow ) && !( BisRow ) ) );
273/*
274* Neither sub( A ) nor sub( B ) are distributed
275*/
276 if( !AisR )
277 {
278/*
279* sub( A ) is not replicated
280*/
281 if( !( BisR ) )
282 {
283/*
284* sub( B ) is not replicated
285*/
286 if( ( AmyprocR != AprocR ) && ( BmyprocR != BprocR ) )
287/*
288* If I am not in AprocR or BprocR, then return immediately
289*/
290 return;
291
292 size = TYPE->size;
293
294 if( RRorCC )
295 {
296/*
297* sub( A ) and sub( B ) are both row or column vectors
298*/
299 if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmcadd;
300 else add = TYPE->Fmmadd;
301
302 if( AprocR == BprocR )
303 {
304 add( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald, BETA,
305 Mptr( B, Bii, Bjj, Bld, size ), &Bld );
306 }
307 else
308 {
309/*
310* sub( A ) and sub( B ) are in a different process row or column
311*/
312 if( AmyprocR == AprocR )
313 {
314/*
315* Send sub( A ) to where sub( B ) resides.
316*/
317 if( AisRow )
318 TYPE->Cgesd2d( ctxt, M, N, Mptr( A, Aii, Ajj, Ald, size ),
319 Ald, BprocR, AmyprocD );
320 else
321 TYPE->Cgesd2d( ctxt, M, N, Mptr( A, Aii, Ajj, Ald, size ),
322 Ald, AmyprocD, BprocR );
323 }
324/*
325* receive sub( A ) and add it to sub( B )
326*/
327 if( BmyprocR == BprocR )
328 {
329 buf = PB_Cmalloc( M * N * size );
330 if( BisRow )
331 TYPE->Cgerv2d( ctxt, M, N, buf, M, AprocR, BmyprocD );
332 else
333 TYPE->Cgerv2d( ctxt, M, N, buf, M, BmyprocD, AprocR );
334 add( &M, &N, ALPHA, buf, &M, BETA, Mptr( B, Bii, Bjj, Bld,
335 size ), &Bld );
336 if( buf ) free( buf );
337 }
338 }
339 }
340 else
341 {
342/*
343* sub( A ) and sub( B ) are not both row or column vectors
344*/
345 if( Mupcase( CONJUG[0] ) != CNOCONJG ) add = TYPE->Fmmtcadd;
346 else add = TYPE->Fmmtadd;
347
348 iroca = 0;
349 for( p = 0; p < BnprocsD; p++ )
350 {
351 if( ( AprocR == p ) && ( BprocR == iroca ) )
352 {
353 if( ( AmyprocR == p ) && ( AmyprocD == iroca ) )
354 {
355 add( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ), &Ald,
356 BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld );
357 }
358 }
359 else
360 {
361 if( ( AmyprocR == AprocR ) && ( AmyprocD == iroca ) )
362 {
363 if( AisRow )
364 TYPE->Cgesd2d( ctxt, M, N, Mptr( A, Aii, Ajj, Ald,
365 size ), Ald, p, BprocR );
366 else
367 TYPE->Cgesd2d( ctxt, M, N, Mptr( A, Aii, Ajj, Ald,
368 size ), Ald, BprocR, p );
369 }
370 if( ( BmyprocR == BprocR ) && ( BmyprocD == p ) )
371 {
372 buf = PB_Cmalloc( M * N * size );
373 if( AisRow )
374 TYPE->Cgerv2d( ctxt, M, N, buf, M, AprocR, iroca );
375 else
376 TYPE->Cgerv2d( ctxt, M, N, buf, M, iroca, AprocR );
377 add( &M, &N, ALPHA, buf, &M, BETA, Mptr( B, Bii, Bjj, Bld,
378 size ), &Bld );
379 if( buf ) free( buf );
380 }
381 }
382 iroca = MModAdd1( iroca, AnprocsD );
383 }
384 }
385 }
386 else
387 {
388/*
389* sub( B ) is replicated
390*/
391 size = TYPE->size;
392
393 if( AmyprocR == AprocR )
394 {
395 if( RRorCC )
396 {
397 if( Mupcase( CONJUG[0] ) != CNOCONJG )
398 TYPE->Fmmcadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
399 &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ),
400 &Bld );
401 else
402 TYPE->Fmmadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
403 &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ),
404 &Bld );
405 }
406 else
407 {
408 if( Mupcase( CONJUG[0] ) != CNOCONJG )
409 TYPE->Fmmtcadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
410 &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ),
411 &Bld );
412 else
413 TYPE->Fmmtadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
414 &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ),
415 &Bld );
416 }
417 if( AisRow ) { scope = CCOLUMN; } else { scope = CROW; }
418 top = PB_Ctop( &ctxt, BCAST, &scope, TOP_GET );
419 if( RRorCC )
420 TYPE->Cgebs2d( ctxt, &scope, top, M, N, Mptr( B, Bii, Bjj, Bld,
421 size ), Bld );
422 else
423 TYPE->Cgebs2d( ctxt, &scope, top, N, M, Mptr( B, Bii, Bjj, Bld,
424 size ), Bld );
425 }
426 else
427 {
428 if( AisRow ) { scope = CCOLUMN; rsrc = AprocR; csrc = AmyprocD; }
429 else { scope = CROW; rsrc = AmyprocD; csrc = AprocR; }
430 top = PB_Ctop( &ctxt, BCAST, &scope, TOP_GET );
431 if( RRorCC )
432 TYPE->Cgebr2d( ctxt, &scope, top, M, N, Mptr( B, Bii, Bjj, Bld,
433 size ), Bld, rsrc, csrc );
434 else
435 TYPE->Cgebr2d( ctxt, &scope, top, N, M, Mptr( B, Bii, Bjj, Bld,
436 size ), Bld, rsrc, csrc );
437 }
438 }
439 }
440 else
441 {
442/*
443* sub( A ) is replicated
444*/
445 if( BisR || ( BmyprocR == BprocR ) )
446 {
447/*
448* If I own a piece of sub( B ), then add sub( A ) to it
449*/
450 size = TYPE->size;
451 if( RRorCC )
452 {
453 if( Mupcase( CONJUG[0] ) != CNOCONJG )
454 TYPE->Fmmcadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
455 &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ),
456 &Bld );
457 else
458 TYPE->Fmmadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
459 &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ), &Bld );
460 }
461 else
462 {
463 if( Mupcase( CONJUG[0] ) != CNOCONJG )
464 TYPE->Fmmtcadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
465 &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ),
466 &Bld );
467 else
468 TYPE->Fmmtadd( &M, &N, ALPHA, Mptr( A, Aii, Ajj, Ald, size ),
469 &Ald, BETA, Mptr( B, Bii, Bjj, Bld, size ),
470 &Bld );
471 }
472 }
473 }
474/*
475* End of PB_CpaxpbyNN
476*/
477}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* MMADD_T)()
Definition pblas.h:288
#define CCOLUMN
Definition PBblacs.h:20
#define TOP_GET
Definition PBblacs.h:50
#define CROW
Definition PBblacs.h:21
void Cblacs_gridinfo()
#define BCAST
Definition PBblacs.h:48
#define CNOCONJG
Definition PBblas.h:19
#define CTXT_
Definition PBtools.h:38
char * PB_Cmalloc()
void PB_Cinfog2l()
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
char * PB_Ctop()
#define MModAdd1(I, d)
Definition PBtools.h:100
#define Mupcase(C)
Definition PBtools.h:83
void PB_CpaxpbyNN()
#define TYPE
Definition clamov.c:7