SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
PB_CpdotNN.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* -- PBLAS auxiliary routine (version 2.0) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* April 1, 1998
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "../pblas.h"
14#include "../PBpblas.h"
15#include "../PBtools.h"
16#include "../PBblacs.h"
17#include "../PBblas.h"
18
19#ifdef __STDC__
20void PB_CpdotNN( PBTYP_T * TYPE, Int N, char * DOT,
21 char * X, Int IX, Int JX, Int * DESCX, Int INCX,
22 char * Y, Int IY, Int JY, Int * DESCY, Int INCY,
23 VVDOT_T FDOT )
24#else
25void PB_CpdotNN( TYPE, N, DOT, X, IX, JX, DESCX, INCX, Y, IY, JY, DESCY,
26 INCY, FDOT )
27/*
28* .. Scalar Arguments ..
29*/
30 Int INCX, INCY, IX, IY, JX, JY, N;
31 char * DOT;
32 PBTYP_T * TYPE;
33 VVDOT_T FDOT;
34/*
35* .. Array Arguments ..
36*/
37 Int * DESCX, * DESCY;
38 char * X, * Y;
39#endif
40{
41/*
42* Purpose
43* =======
44*
45* PB_CpdotNN forms the dot product of two subvectors,
46*
47* DOT := sub( X )**T * sub( Y ) or DOT := sub( X )**H * sub( Y ),
48*
49* where
50*
51* sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
52* X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X, and,
53*
54* sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
55* Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
56*
57* Both subvectors are assumed to be not distributed.
58*
59* Notes
60* =====
61*
62* A description vector is associated with each 2D block-cyclicly dis-
63* tributed matrix. This vector stores the information required to
64* establish the mapping between a matrix entry and its corresponding
65* process and memory location.
66*
67* In the following comments, the character _ should be read as
68* "of the distributed matrix". Let A be a generic term for any 2D
69* block cyclicly distributed matrix. Its description vector is DESC_A:
70*
71* NOTATION STORED IN EXPLANATION
72* ---------------- --------------- ------------------------------------
73* DTYPE_A (global) DESCA[ DTYPE_ ] The descriptor type.
74* CTXT_A (global) DESCA[ CTXT_ ] The BLACS context handle, indicating
75* the NPROW x NPCOL BLACS process grid
76* A is distributed over. The context
77* itself is global, but the handle
78* (the integer value) may vary.
79* M_A (global) DESCA[ M_ ] The number of rows in the distribu-
80* ted matrix A, M_A >= 0.
81* N_A (global) DESCA[ N_ ] The number of columns in the distri-
82* buted matrix A, N_A >= 0.
83* IMB_A (global) DESCA[ IMB_ ] The number of rows of the upper left
84* block of the matrix A, IMB_A > 0.
85* INB_A (global) DESCA[ INB_ ] The number of columns of the upper
86* left block of the matrix A,
87* INB_A > 0.
88* MB_A (global) DESCA[ MB_ ] The blocking factor used to distri-
89* bute the last M_A-IMB_A rows of A,
90* MB_A > 0.
91* NB_A (global) DESCA[ NB_ ] The blocking factor used to distri-
92* bute the last N_A-INB_A columns of
93* A, NB_A > 0.
94* RSRC_A (global) DESCA[ RSRC_ ] The process row over which the first
95* row of the matrix A is distributed,
96* NPROW > RSRC_A >= 0.
97* CSRC_A (global) DESCA[ CSRC_ ] The process column over which the
98* first column of A is distributed.
99* NPCOL > CSRC_A >= 0.
100* LLD_A (local) DESCA[ LLD_ ] The leading dimension of the local
101* array storing the local blocks of
102* the distributed matrix A,
103* IF( Lc( 1, N_A ) > 0 )
104* LLD_A >= MAX( 1, Lr( 1, M_A ) )
105* ELSE
106* LLD_A >= 1.
107*
108* Let K be the number of rows of a matrix A starting at the global in-
109* dex IA,i.e, A( IA:IA+K-1, : ). Lr( IA, K ) denotes the number of rows
110* that the process of row coordinate MYROW ( 0 <= MYROW < NPROW ) would
111* receive if these K rows were distributed over NPROW processes. If K
112* is the number of columns of a matrix A starting at the global index
113* JA, i.e, A( :, JA:JA+K-1, : ), Lc( JA, K ) denotes the number of co-
114* lumns that the process MYCOL ( 0 <= MYCOL < NPCOL ) would receive if
115* these K columns were distributed over NPCOL processes.
116*
117* The values of Lr() and Lc() may be determined via a call to the func-
118* tion PB_Cnumroc:
119* Lr( IA, K ) = PB_Cnumroc( K, IA, IMB_A, MB_A, MYROW, RSRC_A, NPROW )
120* Lc( JA, K ) = PB_Cnumroc( K, JA, INB_A, NB_A, MYCOL, CSRC_A, NPCOL )
121*
122* Arguments
123* =========
124*
125* TYPE (local input) pointer to a PBTYP_T structure
126* On entry, TYPE is a pointer to a structure of type PBTYP_T,
127* that contains type information (See pblas.h).
128*
129* N (global input) INTEGER
130* On entry, N specifies the length of the subvectors to be
131* multiplied. N must be at least zero.
132*
133* DOT (local output) pointer to CHAR
134* On exit, DOT specifies the dot product of the two subvectors
135* sub( X ) and sub( Y ) only in their scope (See below for fur-
136* ther details).
137*
138* X (local input) pointer to CHAR
139* On entry, X is an array of dimension (LLD_X, Kx), where LLD_X
140* is at least MAX( 1, Lr( 1, IX ) ) when INCX = M_X and
141* MAX( 1, Lr( 1, IX+N-1 ) ) otherwise, and, Kx is at least
142* Lc( 1, JX+N-1 ) when INCX = M_X and Lc( 1, JX ) otherwise.
143* Before entry, this array contains the local entries of the
144* matrix X.
145*
146* IX (global input) INTEGER
147* On entry, IX specifies X's global row index, which points to
148* the beginning of the submatrix sub( X ).
149*
150* JX (global input) INTEGER
151* On entry, JX specifies X's global column index, which points
152* to the beginning of the submatrix sub( X ).
153*
154* DESCX (global and local input) INTEGER array
155* On entry, DESCX is an integer array of dimension DLEN_. This
156* is the array descriptor for the matrix X.
157*
158* INCX (global input) INTEGER
159* On entry, INCX specifies the global increment for the
160* elements of X. Only two values of INCX are supported in
161* this version, namely 1 and M_X. INCX must not be zero.
162*
163* Y (local input) pointer to CHAR
164* On entry, Y is an array of dimension (LLD_Y, Ky), where LLD_Y
165* is at least MAX( 1, Lr( 1, IY ) ) when INCY = M_Y and
166* MAX( 1, Lr( 1, IY+N-1 ) ) otherwise, and, Ky is at least
167* Lc( 1, JY+N-1 ) when INCY = M_Y and Lc( 1, JY ) otherwise.
168* Before entry, this array contains the local entries of the
169* matrix Y.
170*
171* IY (global input) INTEGER
172* On entry, IY specifies Y's global row index, which points to
173* the beginning of the submatrix sub( Y ).
174*
175* JY (global input) INTEGER
176* On entry, JY specifies Y's global column index, which points
177* to the beginning of the submatrix sub( Y ).
178*
179* DESCY (global and local input) INTEGER array
180* On entry, DESCY is an integer array of dimension DLEN_. This
181* is the array descriptor for the matrix Y.
182*
183* INCY (global input) INTEGER
184* On entry, INCY specifies the global increment for the
185* elements of Y. Only two values of INCY are supported in
186* this version, namely 1 and M_Y. INCY must not be zero.
187*
188* FDOT (local input) pointer to a function of type VVDOT
189* On entry, FDOT points to a subroutine that computes the local
190* dot product of two vectors.
191*
192* Further Details
193* ===============
194*
195* When the result of a vector-oriented PBLAS call is a scalar, this
196* scalar is set only within the process scope which owns the vector(s)
197* being operated on. Let sub( X ) be a generic term for the input vec-
198* tor(s). Then, the processes owning the correct the answer is determi-
199* ned as follows: if an operation involves more than one vector, the
200* processes receiving the result will be the union of the following set
201* of processes for each vector:
202*
203* If N = 1, M_X = 1 and INCX = 1, then one cannot determine if a pro-
204* cess row or process column owns the vector operand, therefore only
205* the process owning sub( X ) receives the correct result;
206*
207* If INCX = M_X, then sub( X ) is a vector distributed over a process
208* row. Each process in this row receives the result;
209*
210* If INCX = 1, then sub( X ) is a vector distributed over a process
211* column. Each process in this column receives the result;
212*
213* -- Written on April 1, 1998 by
214* Antoine Petitet, University of Tennessee, Knoxville 37996, USA.
215*
216* ---------------------------------------------------------------------
217*/
218/*
219* .. Local Scalars ..
220*/
221 char Xscope, Yscope, * top;
222 Int RRorCC, Xcol, Xii, XisR, XisRow, Xjj, Xld, Xlinc, XmyprocD,
223 XmyprocR, XnprocsR, XprocR, Xrow, Ycol, Yii, YisR, YisRow,
224 Yjj, Yld, Ylinc, YmyprocD, YmyprocR, YnprocsR, YprocR, Yrow,
225 csrc, ctxt, ione=1, mycol, myrow, npcol, nprow, rsrc, size;
226/*
227* .. Local Arrays ..
228*/
229 char * buf = NULL;
230/* ..
231* .. Executable Statements ..
232*
233*/
234/*
235* Retrieve process grid information
236*/
237 Cblacs_gridinfo( ( ctxt = DESCX[CTXT_] ), &nprow, &npcol, &myrow, &mycol );
238/*
239* Retrieve sub( X )'s local information: Xii, Xjj, Xrow, Xcol ...
240*/
241 PB_Cinfog2l( IX, JX, DESCX, nprow, npcol, myrow, mycol, &Xii, &Xjj,
242 &Xrow, &Xcol );
243 if( ( XisRow = ( INCX == DESCX[M_] ) ) != 0 )
244 {
245 Xld = DESCX[LLD_]; Xlinc = Xld;
246 XmyprocD = mycol; XprocR = Xrow; XmyprocR = myrow; XnprocsR = nprow;
247 XisR = ( ( Xrow == -1 ) || ( XnprocsR == 1 ) );
248 }
249 else
250 {
251 Xld = DESCX[LLD_]; Xlinc = 1;
252 XmyprocD = myrow; XprocR = Xcol; XmyprocR = mycol; XnprocsR = npcol;
253 XisR = ( ( Xcol == -1 ) || ( XnprocsR == 1 ) );
254 }
255/*
256* Retrieve sub( Y )'s local information: Yii, Yjj, Yrow, Ycol ...
257*/
258 PB_Cinfog2l( IY, JY, DESCY, nprow, npcol, myrow, mycol, &Yii, &Yjj,
259 &Yrow, &Ycol );
260 if( ( YisRow = ( INCY == DESCY[M_] ) ) != 0 )
261 {
262 Yld = DESCY[LLD_]; Ylinc = Yld;
263 YmyprocD = mycol; YprocR = Yrow; YmyprocR = myrow; YnprocsR = nprow;
264 YisR = ( ( Yrow == -1 ) || ( YnprocsR == 1 ) );
265 }
266 else
267 {
268 Yld = DESCY[LLD_]; Ylinc = 1;
269 YmyprocD = myrow; YprocR = Ycol; YmyprocR = mycol; YnprocsR = npcol;
270 YisR = ( ( Ycol == -1 ) || ( YnprocsR == 1 ) );
271 }
272/*
273* Are sub( X ) and sub( Y ) both row or column vectors ?
274*/
275 RRorCC = ( ( XisRow && YisRow ) || ( !( XisRow ) && !( YisRow ) ) );
276/*
277* Neither sub( X ) nor sub( Y ) are distributed
278*/
279 if( !XisR )
280 {
281/*
282* sub( X ) is not replicated
283*/
284 if( !( YisR ) )
285 {
286/*
287* sub( Y ) is not replicated
288*/
289 if( ( XmyprocR != XprocR ) && ( YmyprocR != YprocR ) )
290/*
291* If I am not in XprocR or YprocR, then return immediately
292*/
293 return;
294
295 size = TYPE->size;
296
297 if( RRorCC )
298 {
299/*
300* sub( X ) and sub( Y ) are both row or column vectors
301*/
302 if( XprocR == YprocR )
303 {
304/*
305* sub( X ) and sub( Y ) are in the same process row or column
306*/
307 FDOT( &N, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y,
308 Yii, Yjj, Yld, size ), &Ylinc );
309 }
310 else
311 {
312/*
313* sub( X ) and sub( Y ) are in a different process row or column
314*/
315 if( XmyprocR == XprocR )
316 {
317 buf = PB_Cmalloc( N * size );
318/*
319* Send sub( X ) to where sub( Y ) resides, and receive sub( Y ) from the same
320* location.
321*/
322 if( XisRow )
323 {
324 TYPE->Cgesd2d( ctxt, 1, N, Mptr( X, Xii, Xjj, Xld, size ),
325 Xld, YprocR, XmyprocD );
326 TYPE->Cgerv2d( ctxt, 1, N, buf, 1, YprocR, XmyprocD );
327 }
328 else
329 {
330 TYPE->Cgesd2d( ctxt, N, 1, Mptr( X, Xii, Xjj, Xld, size ),
331 Xld, XmyprocD, YprocR );
332 TYPE->Cgerv2d( ctxt, N, 1, buf, N, XmyprocD, YprocR );
333 }
334 FDOT( &N, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, buf,
335 &ione );
336 if( buf ) free( buf );
337 }
338
339 if( YmyprocR == YprocR )
340 {
341 buf = PB_Cmalloc( N * size );
342/*
343* Send sub( Y ) to where sub( X ) resides, and receive sub( X ) from the same
344* location.
345*/
346 if( YisRow )
347 {
348 TYPE->Cgesd2d( ctxt, 1, N, Mptr( Y, Yii, Yjj, Yld, size ),
349 Yld, XprocR, YmyprocD );
350 TYPE->Cgerv2d( ctxt, 1, N, buf, 1, XprocR, YmyprocD );
351 }
352 else
353 {
354 TYPE->Cgesd2d( ctxt, N, 1, Mptr( Y, Yii, Yjj, Yld, size ),
355 Yld, YmyprocD, XprocR );
356 TYPE->Cgerv2d( ctxt, N, 1, buf, N, YmyprocD, XprocR );
357 }
358 FDOT( &N, DOT, buf, &ione, Mptr( Y, Yii, Yjj, Yld, size ),
359 &Ylinc );
360 if( buf ) free( buf );
361 }
362 }
363 }
364 else
365 {
366/*
367* sub( X ) and sub( Y ) are not both row or column vectors
368*/
369 if( ( XmyprocR == XprocR ) && ( YmyprocR == YprocR ) )
370 {
371/*
372* If I am at the intersection of the process row and column, then compute the
373* dot product and broadcast it in my process row and column.
374*/
375 FDOT( &N, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y,
376 Yii, Yjj, Yld, size ), &Ylinc );
377 top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
378 TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 );
379 top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
380 TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 );
381 }
382 else if( XmyprocR == XprocR )
383 {
384 if( XisRow ) { Xscope = CROW; rsrc = XprocR; csrc = YprocR; }
385 else { Xscope = CCOLUMN; rsrc = YprocR; csrc = XprocR; }
386 top = PB_Ctop( &ctxt, BCAST, &Xscope, TOP_GET );
387 TYPE->Cgebr2d( ctxt, &Xscope, top, 1, 1, DOT, 1, rsrc, csrc );
388 }
389 else if( YmyprocR == YprocR )
390 {
391 if( YisRow ) { Yscope = CROW; rsrc = YprocR; csrc = XprocR; }
392 else { Yscope = CCOLUMN; rsrc = XprocR; csrc = YprocR; }
393 top = PB_Ctop( &ctxt, BCAST, &Yscope, TOP_GET );
394 TYPE->Cgebr2d( ctxt, &Yscope, top, 1, 1, DOT, 1, rsrc, csrc );
395 }
396 }
397 }
398 else
399 {
400/*
401* sub( Y ) is replicated
402*/
403 if( XmyprocR == XprocR )
404 {
405/*
406* If I am in the process row (resp. column) owning sub( X ), then compute the
407* dot product and broadcast in my column (resp. row).
408*/
409 size = TYPE->size;
410
411 FDOT( &N, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii,
412 Yjj, Yld, size ), &Ylinc );
413
414 if( XisRow )
415 {
416 top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
417 TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 );
418 }
419 else
420 {
421 top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
422 TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 );
423 }
424 }
425 else
426 {
427/*
428* Otherwise, receive the dot product
429*/
430 if( XisRow )
431 {
432 top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
433 TYPE->Cgebr2d( ctxt, COLUMN, top, 1, 1, DOT, 1, XprocR,
434 XmyprocD );
435 }
436 else
437 {
438 top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
439 TYPE->Cgebr2d( ctxt, ROW, top, 1, 1, DOT, 1, XmyprocD,
440 XprocR );
441 }
442 }
443 }
444 }
445 else
446 {
447/*
448* sub( X ) is replicated
449*/
450 if( YisR || ( YmyprocR == YprocR ) )
451 {
452/*
453* If I own a piece of sub( Y ), then compute the dot product
454*/
455 size = TYPE->size;
456
457 FDOT( &N, DOT, Mptr( X, Xii, Xjj, Xld, size ), &Xlinc, Mptr( Y, Yii,
458 Yjj, Yld, size ), &Ylinc );
459 }
460
461 if( !YisR )
462 {
463/*
464* If sub( Y ) is not replicated, then broadcast the result to the other
465* processes that own a piece of sub( X ), but were not involved in the above
466* dot-product computation.
467*/
468 if( YisRow )
469 {
470 top = PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
471 if( YmyprocR == YprocR )
472 TYPE->Cgebs2d( ctxt, COLUMN, top, 1, 1, DOT, 1 );
473 else
474 TYPE->Cgebr2d( ctxt, COLUMN, top, 1, 1, DOT, 1, YprocR,
475 YmyprocD );
476 }
477 else
478 {
479 top = PB_Ctop( &ctxt, BCAST, ROW, TOP_GET );
480 if( YmyprocR == YprocR )
481 TYPE->Cgebs2d( ctxt, ROW, top, 1, 1, DOT, 1 );
482 else
483 TYPE->Cgebr2d( ctxt, ROW, top, 1, 1, DOT, 1, YmyprocD, YprocR );
484 }
485 }
486 }
487/*
488* End of PB_CpdotNN
489*/
490}
#define Int
Definition Bconfig.h:22
F_VOID_FCT(* VVDOT_T)()
Definition pblas.h:290
#define CCOLUMN
Definition PBblacs.h:20
#define TOP_GET
Definition PBblacs.h:50
#define COLUMN
Definition PBblacs.h:45
#define CROW
Definition PBblacs.h:21
#define ROW
Definition PBblacs.h:46
void Cblacs_gridinfo()
#define BCAST
Definition PBblacs.h:48
#define CTXT_
Definition PBtools.h:38
char * PB_Cmalloc()
void PB_Cinfog2l()
#define Mptr(a_, i_, j_, lda_, siz_)
Definition PBtools.h:132
#define LLD_
Definition PBtools.h:47
char * PB_Ctop()
void PB_CpdotNN()
#define M_
Definition PBtools.h:39
#define TYPE
Definition clamov.c:7