ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzaxpy.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * Mark R. Fahey
4 * August 2000
5 * This is a slightly modified version of pzaxpy_ from ScaLAPACK 1.0
6 * which fixes a bug in the incx=1 and incy=1 case.
7 *
8 * ---------------------------------------------------------------------
9 */
10 /*
11 * Include files
12 */
13 #include "pblas.h"
14 
15 void pzaxpy_( n, alpha, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y,
16  incy )
17 /*
18 * .. Scalar Arguments ..
19 */
20  int * incx, * incy, * ix, * iy, * jx, * jy, * n;
21  complex16 * alpha;
22 /* ..
23 * .. Array Arguments ..
24 */
25  int desc_X[], desc_Y[];
26  complex16 X[], Y[];
27 {
28 /*
29 * Purpose
30 * =======
31 *
32 * PZAXPY adds one distributed vector to another,
33 *
34 * sub( Y ) := sub( Y ) + alpha * sub( X )
35 *
36 * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
37 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
38 *
39 * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
40 * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y.
41 *
42 * Notes
43 * =====
44 *
45 * Each global data object is described by an associated description
46 * vector. This vector stores the information required to establish
47 * the mapping between an object element and its corresponding process
48 * and memory location.
49 *
50 * Let A be a generic term for any 2D block cyclicly distributed array.
51 * Such a global array has an associated description vector descA.
52 * In the following comments, the character _ should be read as
53 * "of the global array".
54 *
55 * NOTATION STORED IN EXPLANATION
56 * --------------- -------------- --------------------------------------
57 * DT_A (global) descA[ DT_ ] The descriptor type. In this case,
58 * DT_A = 1.
59 * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
60 * the BLACS process grid A is distribu-
61 * ted over. The context itself is glo-
62 * bal, but the handle (the integer
63 * value) may vary.
64 * M_A (global) descA[ M_ ] The number of rows in the global
65 * array A.
66 * N_A (global) descA[ N_ ] The number of columns in the global
67 * array A.
68 * MB_A (global) descA[ MB_ ] The blocking factor used to distribu-
69 * te the rows of the array.
70 * NB_A (global) descA[ NB_ ] The blocking factor used to distribu-
71 * te the columns of the array.
72 * RSRC_A (global) descA[ RSRC_ ] The process row over which the first
73 * row of the array A is distributed.
74 * CSRC_A (global) descA[ CSRC_ ] The process column over which the
75 * first column of the array A is
76 * distributed.
77 * LLD_A (local) descA[ LLD_ ] The leading dimension of the local
78 * array. LLD_A >= MAX(1,LOCr(M_A)).
79 *
80 * Let K be the number of rows or columns of a distributed matrix,
81 * and assume that its process grid has dimension p x q.
82 * LOCr( K ) denotes the number of elements of K that a process
83 * would receive if K were distributed over the p processes of its
84 * process column.
85 * Similarly, LOCc( K ) denotes the number of elements of K that a
86 * process would receive if K were distributed over the q processes of
87 * its process row.
88 * The values of LOCr() and LOCc() may be determined via a call to the
89 * ScaLAPACK tool function, NUMROC:
90 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
91 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
92 * An upper bound for these quantities may be computed by:
93 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
94 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
95 *
96 * Because vectors may be seen as particular matrices, a distributed
97 * vector is considered to be a distributed matrix.
98 *
99 * If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
100 * process column having the first entries of sub( Y ) must also contain
101 * the first entries of sub( X ). Moreover, the quantity
102 * MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
103 *
104 * If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
105 * Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
106 * MOD( IY-1, MB_Y ).
107 *
108 * If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
109 * Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
110 * MOD( JY-1, NB_Y ).
111 *
112 * If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
113 * equal to MB_Y, and the process row having the first entries of
114 * sub( Y ) must also contain the first entries of sub( X ). Moreover,
115 * the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
116 *
117 * Parameters
118 * ==========
119 *
120 * N (global input) pointer to INTEGER.
121 * The length of the distributed vectors to be added. N >= 0.
122 *
123 * ALPHA (global input) pointer to COMPLEX*16
124 * The scalar used to multiply each component of sub( X ).
125 *
126 * X (local input) COMPLEX*16 array containing the local
127 * pieces of a distributed matrix of dimension of at least
128 * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
129 * This array contains the entries of the distributed vector
130 * sub( X ).
131 *
132 * IX (global input) pointer to INTEGER
133 * The global row index of the submatrix of the distributed
134 * matrix X to operate on.
135 *
136 * JX (global input) pointer to INTEGER
137 * The global column index of the submatrix of the distributed
138 * matrix X to operate on.
139 *
140 * DESCX (global and local input) INTEGER array of dimension 8.
141 * The array descriptor of the distributed matrix X.
142 *
143 * INCX (global input) pointer to INTEGER
144 * The global increment for the elements of X. Only two values
145 * of INCX are supported in this version, namely 1 and M_X.
146 *
147 * Y (local input/local output) COMPLEX*16 array
148 * containing the local pieces of a distributed matrix of
149 * dimension of at least
150 * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
151 * This array contains the entries of the distributed vector
152 * sub( Y ).
153 * On exit sub( Y ) is overwritten by sub( Y ) + alpha*sub( X ).
154 *
155 * IY (global input) pointer to INTEGER
156 * The global row index of the submatrix of the distributed
157 * matrix Y to operate on.
158 *
159 * JY (global input) pointer to INTEGER
160 * The global column index of the submatrix of the distributed
161 * matrix Y to operate on.
162 *
163 * DESCY (global and local input) INTEGER array of dimension 8.
164 * The array descriptor of the distributed matrix Y.
165 *
166 * INCY (global input) pointer to INTEGER
167 * The global increment for the elements of Y. Only two values
168 * of INCY are supported in this version, namely 1 and M_Y.
169 *
170 * =====================================================================
171 *
172 * .. Local Scalars ..
173 */
174  int ictxt, info, iix, iiy, ixcol, ixrow, iycol, iyrow, jjx,
175  jjy, lcm, lcmp, lcmq, mycol, myrow, nn, np, np0, nprow,
176  npcol, nq, nq0, nz, ione=1, tmp1, wksz;
177  complex16 one, tmp, zero;
178 /* ..
179 * .. PBLAS Buffer ..
180 */
181  complex16 * buff;
182 /* ..
183 * .. External Functions ..
184 */
185  void blacs_gridinfo_();
186  void zgerv2d_();
187  void zgesd2d_();
188  void pbchkvect();
189  void pberror_();
190  char * getpbbuf();
191  F_VOID_FCT zaxpy_();
192  F_VOID_FCT zcopy_();
193  F_VOID_FCT pbztrnv_();
194  F_INTG_FCT ilcm_();
195  F_INTG_FCT numroc_();
196 /* ..
197 * .. Executable Statements ..
198 *
199 * Get grid parameters
200 */
201  ictxt = desc_X[CTXT_];
202  blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
203 /*
204 * Test the input parameters
205 */
206  info = 0;
207  if( nprow == -1 )
208  info = -(600+CTXT_+1);
209  else
210  {
211  pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 6, &iix, &jjx,
212  &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
213  pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 11, &iiy, &jjy,
214  &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );
215 
216  if( info == 0 )
217  {
218  if( *n != 1 )
219  {
220  if( *incx == desc_X[M_] )
221  { /* X is distributed along a process row */
222  if( *incy == desc_Y[M_] )
223  { /* Y is distributed over a process row */
224  if( ( ixcol != iycol ) ||
225  ( ( (*jx-1) % desc_X[NB_] ) !=
226  ( (*jy-1) % desc_Y[NB_] ) ) )
227  info = -10;
228  else if( desc_Y[NB_] != desc_X[NB_] )
229  info = -(1100+NB_+1);
230  }
231  else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
232  { /* Y is distributed over a process column */
233  if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
234  info = -9;
235  else if( desc_Y[MB_] != desc_X[NB_] )
236  info = -(1100+MB_+1);
237  }
238  else
239  {
240  info = -12;
241  }
242  }
243  else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
244  { /* X is distributed along a process column */
245  if( *incy == desc_Y[M_] )
246  { /* Y is distributed over a process row */
247  if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
248  info = -10;
249  else if( desc_Y[NB_] != desc_X[MB_] )
250  info = -(1100+NB_+1);
251  }
252  else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
253  { /* Y is distributed over a process column */
254  if( ( ixrow != iyrow ) ||
255  ( ( (*ix-1) % desc_X[MB_] ) !=
256  ( (*iy-1) % desc_Y[MB_] ) ) )
257  info = -9;
258  else if( desc_Y[MB_] != desc_X[MB_] )
259  info = -(1100+MB_+1);
260  }
261  else
262  {
263  info = -12;
264  }
265  }
266  else
267  {
268  info = -7;
269  }
270  }
271  if( ictxt != desc_Y[CTXT_] )
272  info = -(1100+CTXT_+1);
273  }
274  }
275  if( info )
276  {
277  pberror_( &ictxt, "PZAXPY", &info );
278  return;
279  }
280 /*
281 * Quick return if possible.
282 */
283  if( *n == 0 )
284  return;
285 /*
286 * y <- y + alpha * x
287 */
288  if( *n == 1 )
289  {
290  if( ( myrow == iyrow ) && ( mycol == iycol ) )
291  {
292  if( ( myrow != ixrow ) || ( mycol != ixcol ) )
293  zgerv2d_( &ictxt, n, n, &tmp, n, &ixrow, &ixcol );
294  else
295  tmp = X[iix-1+(jjx-1)*desc_X[LLD_]];
296  zaxpy_( n, alpha, &tmp, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n );
297  }
298  else if( ( myrow == ixrow ) && ( mycol == ixcol ) )
299  zgesd2d_( &ictxt, n, n, &X[iix-1+(jjx-1)*desc_X[LLD_]], n,
300  &iyrow, &iycol );
301  return;
302  }
303 
304  one.re = ONE; one.im = ZERO;
305  zero.re = ZERO; zero.im = ZERO;
306  if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
307  { /* X and Y are both distributed over a process row */
308  nz = (*jx-1) % desc_Y[NB_];
309  nn = *n + nz;
310  nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
311  if( mycol == ixcol )
312  nq -= nz;
313  if( ixrow == iyrow )
314  {
315  if( myrow == ixrow )
316  zaxpy_( &nq, alpha,
317  &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
318  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] );
319  }
320  else
321  {
322  if( myrow == ixrow )
323  zgesd2d_( &ictxt, &ione, &nq,
324  &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
325  &iyrow, &mycol );
326  else if( myrow == iyrow )
327  {
328  buff = (complex16 *)getpbbuf( "PZAXPY", nq*sizeof(complex16) );
329  zgerv2d_( &ictxt, &nq, &ione, buff, &ione, &ixrow,
330  &mycol );
331  zaxpy_( &nq, alpha, buff, &ione,
332  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_] );
333  }
334  }
335  }
336  else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
337  ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
338  { /* X and Y are both distributed over a process column */
339  nz = (*ix-1) % desc_X[MB_];
340  nn = *n + nz;
341  np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
342  if( myrow == ixrow )
343  np -= nz;
344  if( ixcol == iycol )
345  {
346  if( mycol == ixcol )
347  zaxpy_( &np, alpha,
348  &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
349  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
350  }
351  else
352  {
353  if( mycol == ixcol )
354  zgesd2d_( &ictxt, &np, &ione,
355  &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
356  &myrow, &iycol );
357  else if( mycol == iycol )
358  {
359  buff = (complex16 *)getpbbuf( "PZAXPY", np*sizeof(complex16) );
360  zgerv2d_( &ictxt, &np, &ione, buff, &ione, &myrow,
361  &ixcol );
362  zaxpy_( &np, alpha, buff, &ione,
363  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
364  }
365  }
366  }
367  else /* X and Y are not distributed along the same direction */
368  {
369  lcm = ilcm_( &nprow, &npcol );
370  if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
371  { /* X is distributed over a process column */
372  lcmq = lcm / npcol;
373  nz = (*ix-1) % desc_X[MB_];
374  nn = *n + nz;
375  np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
376  nz = (*jy-1) % desc_Y[NB_];
377  nn = *n + nz;
378  tmp1 = nn / desc_Y[NB_];
379  nq0 = MYROC0( tmp1, nn, desc_Y[NB_], npcol );
380  tmp1 = nq0 / desc_Y[NB_];
381  wksz = np + MYROC0( tmp1, nq0, desc_Y[NB_], lcmq );
382 
383  buff = (complex16 *)getpbbuf( "PZAXPY", wksz*sizeof(complex16) );
384 
385  if( myrow == ixrow )
386  np -= nz;
387 
388  if( mycol == ixcol )
389  {
390  zcopy_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
391  buff, incx );
392  zscal_( &np, alpha, buff, incx );
393  }
394  pbztrnv_( &ictxt, C2F_CHAR( "C" ), C2F_CHAR( "T" ), n,
395  &desc_X[MB_], &nz, buff, incx, &one,
396  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
397  &ixrow, &ixcol, &iyrow, &iycol, buff+np );
398  }
399  else /* Y is distributed over a process column */
400  {
401  lcmp = lcm / nprow;
402  nz = (*iy-1) % desc_Y[MB_];
403  nn = *n + nz;
404  tmp1 = nn / desc_Y[MB_];
405  np = numroc_( &nn, &desc_Y[MB_], &myrow, &iyrow, &nprow );
406  np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
407  tmp1 = np0 / desc_Y[MB_];
408  wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp );
409  wksz = np + wksz;
410 
411  buff = (complex16 *)getpbbuf( "PZAXPY", wksz*sizeof(complex16) );
412 
413  pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
414  &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]],
415  &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol,
416  &iyrow, &iycol, buff+np );
417  if( mycol == iycol )
418  {
419  if( myrow == iyrow )
420  np -= nz;
421  zaxpy_( &np, alpha, buff, &ione,
422  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy );
423  }
424  }
425  }
426 }
M_
#define M_
Definition: PBtools.h:39
getpbbuf
char * getpbbuf(char *mess, int length)
Definition: getpbbuf.c:3
MB_
#define MB_
Definition: PBtools.h:43
zgerv2d_
F_VOID_FUNC zgerv2d_(int *ConTxt, int *m, int *n, double *A, int *lda, int *rsrc, int *csrc)
Definition: zgerv2d_.c:6
complex16
Definition: pblas.h:93
NB_
#define NB_
Definition: PBtools.h:44
zscal_
F_VOID_FCT zscal_()
zgesd2d_
F_VOID_FUNC zgesd2d_(int *ConTxt, int *m, int *n, double *A, int *lda, int *rdest, int *cdest)
Definition: zgesd2d_.c:7
complex16::re
double re
Definition: pblas.h:93
F_VOID_FCT
#define F_VOID_FCT
Definition: pblas.h:123
LLD_
#define LLD_
Definition: PBtools.h:47
zcopy_
F_VOID_FCT zcopy_()
ZERO
#define ZERO
Definition: PBtools.h:66
blacs_gridinfo_
F_VOID_FUNC blacs_gridinfo_(int *ConTxt, int *nprow, int *npcol, int *myrow, int *mycol)
Definition: blacs_info_.c:6
complex16::im
double im
Definition: pblas.h:93
pbchkvect
void pbchkvect(int n, int npos0, int ix, int jx, desc_X, int incx, int dpos0, int *iix, int *jjx, int *ixrow, int *ixcol, int nprow, int npcol, int myrow, int mycol, int *info)
Definition: pbchkvect.c:15
pblas.h
ONE
#define ONE
Definition: PBtools.h:64
zaxpy_
F_VOID_FCT zaxpy_()
F_INTG_FCT
#define F_INTG_FCT
Definition: pblas.h:124
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
MYROC0
#define MYROC0(nblocks, n, nb, nprocs)
Definition: pblas.h:191
CTXT_
#define CTXT_
Definition: PBtools.h:38
pzaxpy_
void pzaxpy_(int *n, complex16 *alpha, X, int *ix, int *jx, desc_X, int *incx, Y, int *iy, int *jy, desc_Y, int *incy)
Definition: pzaxpy.c:15