SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzaxpy.c
Go to the documentation of this file.
1/* ---------------------------------------------------------------------
2*
3* Mark R. Fahey
4* August 2000
5* This is a slightly modified version of pzaxpy_ from ScaLAPACK 1.0
6* which fixes a bug in the incx=1 and incy=1 case.
7*
8* ---------------------------------------------------------------------
9*/
10/*
11* Include files
12*/
13#include "pblas.h"
14
15void pzaxpy_( n, alpha, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y,
16 incy )
17/*
18* .. Scalar Arguments ..
19*/
20 Int * incx, * incy, * ix, * iy, * jx, * jy, * n;
21 complex16 * alpha;
22/* ..
23* .. Array Arguments ..
24*/
25 Int desc_X[], desc_Y[];
26 complex16 X[], Y[];
27{
28/*
29* Purpose
30* =======
31*
32* PZAXPY adds one distributed vector to another,
33*
34* sub( Y ) := sub( Y ) + alpha * sub( X )
35*
36* where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
37* X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
38*
39* sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
40* Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
41*
42* Notes
43* =====
44*
45* Each global data object is described by an associated description
46* vector. This vector stores the information required to establish
47* the mapping between an object element and its corresponding process
48* and memory location.
49*
50* Let A be a generic term for any 2D block cyclicly distributed array.
51* Such a global array has an associated description vector descA.
52* In the following comments, the character _ should be read as
53* "of the global array".
54*
55* NOTATION STORED IN EXPLANATION
56* --------------- -------------- --------------------------------------
57* DT_A (global) descA[ DT_ ] The descriptor type. In this case,
58* DT_A = 1.
59* CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
60* the BLACS process grid A is distribu-
61* ted over. The context itself is glo-
62* bal, but the handle (the integer
63* value) may vary.
64* M_A (global) descA[ M_ ] The number of rows in the global
65* array A.
66* N_A (global) descA[ N_ ] The number of columns in the global
67* array A.
68* MB_A (global) descA[ MB_ ] The blocking factor used to distribu-
69* te the rows of the array.
70* NB_A (global) descA[ NB_ ] The blocking factor used to distribu-
71* te the columns of the array.
72* RSRC_A (global) descA[ RSRC_ ] The process row over which the first
73* row of the array A is distributed.
74* CSRC_A (global) descA[ CSRC_ ] The process column over which the
75* first column of the array A is
76* distributed.
77* LLD_A (local) descA[ LLD_ ] The leading dimension of the local
78* array. LLD_A >= MAX(1,LOCr(M_A)).
79*
80* Let K be the number of rows or columns of a distributed matrix,
81* and assume that its process grid has dimension p x q.
82* LOCr( K ) denotes the number of elements of K that a process
83* would receive if K were distributed over the p processes of its
84* process column.
85* Similarly, LOCc( K ) denotes the number of elements of K that a
86* process would receive if K were distributed over the q processes of
87* its process row.
88* The values of LOCr() and LOCc() may be determined via a call to the
89* ScaLAPACK tool function, NUMROC:
90* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
91* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
92* An upper bound for these quantities may be computed by:
93* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
94* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
95*
96* Because vectors may be seen as particular matrices, a distributed
97* vector is considered to be a distributed matrix.
98*
99* If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
100* process column having the first entries of sub( Y ) must also contain
101* the first entries of sub( X ). Moreover, the quantity
102* MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
103*
104* If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
105* Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
106* MOD( IY-1, MB_Y ).
107*
108* If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
109* Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
110* MOD( JY-1, NB_Y ).
111*
112* If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
113* equal to MB_Y, and the process row having the first entries of
114* sub( Y ) must also contain the first entries of sub( X ). Moreover,
115* the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
116*
117* Parameters
118* ==========
119*
120* N (global input) pointer to INTEGER.
121* The length of the distributed vectors to be added. N >= 0.
122*
123* ALPHA (global input) pointer to COMPLEX*16
124* The scalar used to multiply each component of sub( X ).
125*
126* X (local input) COMPLEX*16 array containing the local
127* pieces of a distributed matrix of dimension of at least
128* ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
129* This array contains the entries of the distributed vector
130* sub( X ).
131*
132* IX (global input) pointer to INTEGER
133* The global row index of the submatrix of the distributed
134* matrix X to operate on.
135*
136* JX (global input) pointer to INTEGER
137* The global column index of the submatrix of the distributed
138* matrix X to operate on.
139*
140* DESCX (global and local input) INTEGER array of dimension 8.
141* The array descriptor of the distributed matrix X.
142*
143* INCX (global input) pointer to INTEGER
144* The global increment for the elements of X. Only two values
145* of INCX are supported in this version, namely 1 and M_X.
146*
147* Y (local input/local output) COMPLEX*16 array
148* containing the local pieces of a distributed matrix of
149* dimension of at least
150* ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
151* This array contains the entries of the distributed vector
152* sub( Y ).
153* On exit sub( Y ) is overwritten by sub( Y ) + alpha*sub( X ).
154*
155* IY (global input) pointer to INTEGER
156* The global row index of the submatrix of the distributed
157* matrix Y to operate on.
158*
159* JY (global input) pointer to INTEGER
160* The global column index of the submatrix of the distributed
161* matrix Y to operate on.
162*
163* DESCY (global and local input) INTEGER array of dimension 8.
164* The array descriptor of the distributed matrix Y.
165*
166* INCY (global input) pointer to INTEGER
167* The global increment for the elements of Y. Only two values
168* of INCY are supported in this version, namely 1 and M_Y.
169*
170* =====================================================================
171*
172* .. Local Scalars ..
173*/
174 Int ictxt, info, iix, iiy, ixcol, ixrow, iycol, iyrow, jjx,
175 jjy, lcm, lcmp, lcmq, mycol, myrow, nn, np, np0, nprow,
176 npcol, nq, nq0, nz, ione=1, tmp1, wksz;
177 complex16 one, tmp, zero;
178/* ..
179* .. PBLAS Buffer ..
180*/
181 complex16 * buff;
182/* ..
183* .. External Functions ..
184*/
185 void blacs_gridinfo_();
186 void zgerv2d_();
187 void zgesd2d_();
188 void pbchkvect();
189 void pberror_();
190 char * getpbbuf();
193 F_VOID_FCT pbztrnv_();
194 F_INTG_FCT ilcm_();
195 F_INTG_FCT numroc_();
196/* ..
197* .. Executable Statements ..
198*
199* Get grid parameters
200*/
201 ictxt = desc_X[CTXT_];
202 blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
203/*
204* Test the input parameters
205*/
206 info = 0;
207 if( nprow == -1 )
208 info = -(600+CTXT_+1);
209 else
210 {
211 pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 6, &iix, &jjx,
212 &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
213 pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 11, &iiy, &jjy,
214 &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );
215
216 if( info == 0 )
217 {
218 if( *n != 1 )
219 {
220 if( *incx == desc_X[M_] )
221 { /* X is distributed along a process row */
222 if( *incy == desc_Y[M_] )
223 { /* Y is distributed over a process row */
224 if( ( ixcol != iycol ) ||
225 ( ( (*jx-1) % desc_X[NB_] ) !=
226 ( (*jy-1) % desc_Y[NB_] ) ) )
227 info = -10;
228 else if( desc_Y[NB_] != desc_X[NB_] )
229 info = -(1100+NB_+1);
230 }
231 else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
232 { /* Y is distributed over a process column */
233 if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
234 info = -9;
235 else if( desc_Y[MB_] != desc_X[NB_] )
236 info = -(1100+MB_+1);
237 }
238 else
239 {
240 info = -12;
241 }
242 }
243 else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
244 { /* X is distributed along a process column */
245 if( *incy == desc_Y[M_] )
246 { /* Y is distributed over a process row */
247 if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
248 info = -10;
249 else if( desc_Y[NB_] != desc_X[MB_] )
250 info = -(1100+NB_+1);
251 }
252 else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
253 { /* Y is distributed over a process column */
254 if( ( ixrow != iyrow ) ||
255 ( ( (*ix-1) % desc_X[MB_] ) !=
256 ( (*iy-1) % desc_Y[MB_] ) ) )
257 info = -9;
258 else if( desc_Y[MB_] != desc_X[MB_] )
259 info = -(1100+MB_+1);
260 }
261 else
262 {
263 info = -12;
264 }
265 }
266 else
267 {
268 info = -7;
269 }
270 }
271 if( ictxt != desc_Y[CTXT_] )
272 info = -(1100+CTXT_+1);
273 }
274 }
275 if( info )
276 {
277 pberror_( &ictxt, "PZAXPY", &info );
278 return;
279 }
280/*
281* Quick return if possible.
282*/
283 if( *n == 0 )
284 return;
285/*
286* y <- y + alpha * x
287*/
288 if( *n == 1 )
289 {
290 if( ( myrow == iyrow ) && ( mycol == iycol ) )
291 {
292 if( ( myrow != ixrow ) || ( mycol != ixcol ) )
293 zgerv2d_( &ictxt, n, n, &tmp, n, &ixrow, &ixcol );
294 else
295 tmp = X[iix-1+(jjx-1)*desc_X[LLD_]];
296 zaxpy_( n, alpha, &tmp, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n );
297 }
298 else if( ( myrow == ixrow ) && ( mycol == ixcol ) )
299 zgesd2d_( &ictxt, n, n, &X[iix-1+(jjx-1)*desc_X[LLD_]], n,
300 &iyrow, &iycol );
301 return;
302 }
303
304 one.re = ONE; one.im = ZERO;
305 zero.re = ZERO; zero.im = ZERO;
306 if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
307 { /* X and Y are both distributed over a process row */
308 nz = (*jx-1) % desc_Y[NB_];
309 nn = *n + nz;
310 nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
311 if( mycol == ixcol )
312 nq -= nz;
313 if( ixrow == iyrow )
314 {
315 if( myrow == ixrow )
316 zaxpy_( &nq, alpha,
317 &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
318 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] );
319 }
320 else
321 {
322 if( myrow == ixrow )
323 zgesd2d_( &ictxt, &ione, &nq,
324 &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
325 &iyrow, &mycol );
326 else if( myrow == iyrow )
327 {
328 buff = (complex16 *)getpbbuf( "PZAXPY", nq*sizeof(complex16) );
329 zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow,
330 &mycol );
331 zaxpy_( &nq, alpha, buff, &ione,
332 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] );
333 }
334 }
335 }
336 else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
337 ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
338 { /* X and Y are both distributed over a process column */
339 nz = (*ix-1) % desc_X[MB_];
340 nn = *n + nz;
341 np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
342 if( myrow == ixrow )
343 np -= nz;
344 if( ixcol == iycol )
345 {
346 if( mycol == ixcol )
347 zaxpy_( &np, alpha,
348 &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
349 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
350 }
351 else
352 {
353 if( mycol == ixcol )
354 zgesd2d_( &ictxt, &np, &ione,
355 &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
356 &myrow, &iycol );
357 else if( mycol == iycol )
358 {
359 buff = (complex16 *)getpbbuf( "PZAXPY", np*sizeof(complex16) );
360 zgerv2d_( &ictxt, &np, &ione, buff, &ione, &myrow,
361 &ixcol );
362 zaxpy_( &np, alpha, buff, &ione,
363 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
364 }
365 }
366 }
367 else /* X and Y are not distributed along the same direction */
368 {
369 lcm = ilcm_( &nprow, &npcol );
370 if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
371 { /* X is distributed over a process column */
372 lcmq = lcm / npcol;
373 nz = (*ix-1) % desc_X[MB_];
374 nn = *n + nz;
375 np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
376 nz = (*jy-1) % desc_Y[NB_];
377 nn = *n + nz;
378 tmp1 = nn / desc_Y[NB_];
379 nq0 = MYROC0( tmp1, nn, desc_Y[NB_], npcol );
380 tmp1 = nq0 / desc_Y[NB_];
381 wksz = np + MYROC0( tmp1, nq0, desc_Y[NB_], lcmq );
382
383 buff = (complex16 *)getpbbuf( "PZAXPY", wksz*sizeof(complex16) );
384
385 if( myrow == ixrow )
386 np -= nz;
387
388 if( mycol == ixcol )
389 {
390 zcopy_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
391 buff, incx );
392 zscal_( &np, alpha, buff, incx );
393 }
394 pbztrnv_( &ictxt, C2F_CHAR( "C" ), C2F_CHAR( "T" ), n,
395 &desc_X[MB_], &nz, buff, incx, &one,
396 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
397 &ixrow, &ixcol, &iyrow, &iycol, buff+np );
398 }
399 else /* Y is distributed over a process column */
400 {
401 lcmp = lcm / nprow;
402 nz = (*iy-1) % desc_Y[MB_];
403 nn = *n + nz;
404 tmp1 = nn / desc_Y[MB_];
405 np = numroc_( &nn, &desc_Y[MB_], &myrow, &iyrow, &nprow );
406 np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
407 tmp1 = np0 / desc_Y[MB_];
408 wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp );
409 wksz = np + wksz;
410
411 buff = (complex16 *)getpbbuf( "PZAXPY", wksz*sizeof(complex16) );
412
413 pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
414 &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]],
415 &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol,
416 &iyrow, &iycol, buff+np );
417 if( mycol == iycol )
418 {
419 if( myrow == iyrow )
420 np -= nz;
421 zaxpy_( &np, alpha, buff, &ione,
422 &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
423 }
424 }
425 }
426}
#define Int
Definition Bconfig.h:22
#define F_VOID_FCT
Definition pblas.h:127
#define F_INTG_FCT
Definition pblas.h:128
#define C2F_CHAR(a)
Definition pblas.h:125
#define zaxpy_
Definition PBblas.h:108
#define zcopy_
Definition PBblas.h:103
#define zscal_
Definition PBblas.h:113
#define pzaxpy_
Definition PBpblas.h:63
#define CTXT_
Definition PBtools.h:38
#define MB_
Definition PBtools.h:43
#define ONE
Definition PBtools.h:64
#define LLD_
Definition PBtools.h:47
#define M_
Definition PBtools.h:39
#define ZERO
Definition PBtools.h:66
#define NB_
Definition PBtools.h:44
#define MYROC0(nblocks, n, nb, nprocs)
Definition pblas.h:195
F_VOID_FUNC blacs_gridinfo_(Int *ConTxt, Int *nprow, Int *npcol, Int *myrow, Int *mycol)
Definition blacs_info_.c:6
char * getpbbuf(char *mess, Int length)
Definition getpbbuf.c:3
void pbchkvect(Int n, Int npos0, Int ix, Int jx, Int desc_X[], Int incx, Int dpos0, Int *iix, Int *jjx, Int *ixrow, Int *ixcol, Int nprow, Int npcol, Int myrow, Int mycol, Int *info)
Definition pbchkvect.c:15
double re
Definition pblas.h:97
double im
Definition pblas.h:97
F_VOID_FUNC zgerv2d_(Int *ConTxt, Int *m, Int *n, double *A, Int *lda, Int *rsrc, Int *csrc)
Definition zgerv2d_.c:6
F_VOID_FUNC zgesd2d_(Int *ConTxt, Int *m, Int *n, double *A, Int *lda, Int *rdest, Int *cdest)
Definition zgesd2d_.c:7