ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzrot.c
Go to the documentation of this file.
1 /* ---------------------------------------------------------------------
2 *
3 * -- Mark R. Fahey
4 * June 28, 2000
5 *
6 * ---------------------------------------------------------------------
7 */
8 /*
9 * Include files
10 */
11 #include "pblas.h"
12 
13 void pzrot_( n, X, ix, jx, desc_X, incx, Y, iy, jy, desc_Y, incy, c, s )
14 /*
15 * Mark Fahey
16 * June 22, 2000
17 */
18 /*
19 * .. Scalar Arguments ..
20 */
21  int * incx, * incy, * ix, * iy, * jx, * jy, * n;
22  double * c;
23  complex16 * s;
24 /*
25 * ..
26 * .. Array Arguments ..
27 */
28  int desc_X[], desc_Y[];
29  complex16 X[], Y[];
30 {
31 /*
32 * Purpose
33 * =======
34 *
35 * PZROT applies a plane rotation, where the cos (C) is real and the
36 * sin (S) is complex, and the vectors CX and CY are complex, i.e.,
37 *
38 * [ sub( X ) ] := [ C S ] [ sub( X ) ]
39 * [ sub( Y ) ] := [ -conjg(S) C ] [ sub( Y ) ]
40 *
41 * where sub( X ) denotes X(IX,JX:JX+N-1) if INCX = M_X,
42 * X(IX:IX+N-1,JX) if INCX = 1 and INCX <> M_X,
43 *
44 * sub( Y ) denotes Y(IY,JY:JY+N-1) if INCY = M_Y,
45 * Y(IY:IY+N-1,JY) if INCY = 1 and INCY <> M_Y,
46 *
47 * and where C*C + S*CONJG(S) = 1.0.
48 *
49 * Notes
50 * =====
51 *
52 * Each global data object is described by an associated description
53 * vector. This vector stores the information required to establish
54 * the mapping between an object element and its corresponding process
55 * and memory location.
56 *
57 * Let A be a generic term for any 2D block cyclicly distributed array.
58 * Such a global array has an associated description vector DESCA.
59 * In the following comments, the character _ should be read as
60 * "of the global array".
61 *
62 * NOTATION STORED IN EXPLANATION
63 * --------------- -------------- --------------------------------------
64 * DT_A (global) descA[ DT_ ] The descriptor type. In this case,
65 * DT_A = 1.
66 * CTXT_A (global) descA[ CTXT_ ] The BLACS context handle, indicating
67 * the BLACS process grid A is distribu-
68 * ted over. The context itself is glo-
69 * bal, but the handle (the integer
70 * value) may vary.
71 * M_A (global) descA[ M_ ] The number of rows in the global
72 * array A.
73 * N_A (global) descA[ N_ ] The number of columns in the global
74 * array A.
75 * MB_A (global) descA[ MB_ ] The blocking factor used to distribu-
76 * te the rows of the array.
77 * NB_A (global) descA[ NB_ ] The blocking factor used to distribu-
78 * te the columns of the array.
79 * RSRC_A (global) descA[ RSRC_ ] The process row over which the first
80 * row of the array A is distributed.
81 * CSRC_A (global) descA[ CSRC_ ] The process column over which the
82 * first column of the array A is
83 * distributed.
84 * LLD_A (local) descA[ LLD_ ] The leading dimension of the local
85 * array. LLD_A >= MAX(1,LOCr(M_A)).
86 *
87 * Let K be the number of rows or columns of a distributed matrix,
88 * and assume that its process grid has dimension p x q.
89 * LOCr( K ) denotes the number of elements of K that a process
90 * would receive if K were distributed over the p processes of its
91 * process column.
92 * Similarly, LOCc( K ) denotes the number of elements of K that a
93 * process would receive if K were distributed over the q processes of
94 * its process row.
95 * The values of LOCr() and LOCc() may be determined via a call to the
96 * ScaLAPACK tool function, NUMROC:
97 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
98 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
99 * An upper bound for these quantities may be computed by:
100 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
101 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
102 *
103 * Because vectors may be seen as particular matrices, a distributed
104 * vector is considered to be a distributed matrix.
105 *
106 * If INCX = M_X and INCY = M_Y, NB_X must be equal to NB_Y, and the
107 * process column having the first entries of sub( Y ) must also contain
108 * the first entries of sub( X ). Moreover, the quantity
109 * MOD( JX-1, NB_X ) must be equal to MOD( JY-1, NB_Y ).
110 *
111 * If INCX = M_X, INCY = 1 and INCY <> M_Y, NB_X must be equal to MB_Y.
112 * Moreover, the quantity MOD( JX-1, NB_X ) must be equal to
113 * MOD( IY-1, MB_Y ).
114 *
115 * If INCX = 1, INCX <> M_X and INCY = M_Y, MB_X must be equal to NB_Y.
116 * Moreover, the quantity MOD( IX-1, MB_X ) must be equal to
117 * MOD( JY-1, NB_Y ).
118 *
119 * If INCX = 1, INCX <> M_X, INCY = 1 and INCY <> M_Y, MB_X must be
120 * equal to MB_Y, and the process row having the first entries of
121 * sub( Y ) must also contain the first entries of sub( X ). Moreover,
122 * the quantity MOD( IX-1, MB_X ) must be equal to MOD( IY-1, MB_Y ).
123 *
124 * Arguments
125 * =========
126 *
127 * N (input) INTEGER
128 * The number of elements in the vectors CX and CY.
129 *
130 * X (local input) COMPLEX array containing the local
131 * pieces of a distributed matrix of dimension of at least
132 * ( (JX-1)*M_X + IX + ( N - 1 )*abs( INCX ) )
133 * This array contains the entries of the distributed vector
134 * sub( X ).
135 * On output, CX is overwritten with C*X + S*Y.
136 *
137 * IX (global input) pointer to INTEGER
138 * The global row index of the submatrix of the distributed
139 * matrix X to operate on.
140 *
141 * JX (global input) pointer to INTEGER
142 * The global column index of the submatrix of the distributed
143 * matrix X to operate on.
144 *
145 * DESCX (global and local input) INTEGER array of dimension 8.
146 * The array descriptor of the distributed matrix X.
147 *
148 * INCX (global input) pointer to INTEGER
149 * The global increment for the elements of X. Only two values
150 * of INCX are supported in this version, namely 1 and M_X.
151 *
152 * Y (local input) COMPLEX array containing the local
153 * pieces of a distributed matrix of dimension of at least
154 * ( (JY-1)*M_Y + IY + ( N - 1 )*abs( INCY ) )
155 * This array contains the entries of the distributed vector
156 * sub( Y ).
157 * On output, CY is overwritten with -CONJG(S)*X + C*Y.
158 *
159 * IY (global input) pointer to INTEGER
160 * The global row index of the submatrix of the distributed
161 * matrix Y to operate on.
162 *
163 * JY (global input) pointer to INTEGER
164 * The global column index of the submatrix of the distributed
165 * matrix Y to operate on.
166 *
167 * DESCY (global and local input) INTEGER array of dimension 8.
168 * The array descriptor of the distributed matrix Y.
169 *
170 * INCY (global input) pointer to INTEGER
171 * The global increment for the elements of Y. Only two values
172 * of INCY are supported in this version, namely 1 and M_Y.
173 *
174 * C (input) pointer to DOUBLE
175 * S (input) pointer COMPLEX
176 * C and S define a rotation
177 * [ C S ]
178 * [ -conjg(S) C ]
179 * where C*C + S*CONJG(S) = 1.0.
180 *
181 * =====================================================================
182 *
183 * .. Local Scalars ..
184 */
185  int ictxt, iix, iiy, info, ixcol, ixrow, iycol, iyrow, jjx,
186  jjy, lcm, lcmp, mycol, myrow, nn, np, np0,
187  nprow, npcol, nq, nz, ione=1, tmp1, wksz;
188  complex16 xwork[1], ywork[1], zero;
189 /* ..
190 * .. PBLAS Buffer ..
191 */
192  complex16 * buff;
193 /* ..
194 * .. External Functions ..
195 */
196  void blacs_gridinfo_();
197  void zgerv2d_();
198  void zgesd2d_();
199  void pbchkvect();
200  void PB_Cabort();
201  char * getpbbuf();
202  F_INTG_FCT pbztrnv_();
203  F_INTG_FCT zrot_();
204  F_INTG_FCT ilcm_();
205 /* ..
206 * .. Executable Statements ..
207 *
208 * Get grid parameters
209 */
210  ictxt = desc_X[CTXT_];
211  blacs_gridinfo_( &ictxt, &nprow, &npcol, &myrow, &mycol );
212 /*
213 * Test the input parameters
214 */
215  info = 0;
216  if( nprow == -1 )
217  info = -(500+CTXT_+1);
218  else
219  {
220  pbchkvect( *n, 1, *ix, *jx, desc_X, *incx, 5, &iix, &jjx,
221  &ixrow, &ixcol, nprow, npcol, myrow, mycol, &info );
222  pbchkvect( *n, 1, *iy, *jy, desc_Y, *incy, 10, &iiy, &jjy,
223  &iyrow, &iycol, nprow, npcol, myrow, mycol, &info );
224 
225  if( info == 0 )
226  {
227  if( *n != 1 )
228  {
229  if( *incx == desc_X[M_] )
230  { /* X is distributed along a process row */
231  if( *incy == desc_Y[M_] )
232  { /* Y is distributed over a process row */
233  if( ( ixcol != iycol ) ||
234  ( ( (*jx-1) % desc_X[NB_] ) !=
235  ( (*jy-1) % desc_Y[NB_] ) ) )
236  info = -9;
237  else if( desc_Y[NB_] != desc_X[NB_] )
238  info = -(1000+NB_+1);
239  }
240  else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
241  { /* Y is distributed over a process column */
242  if( ( (*jx-1) % desc_X[NB_] ) != ( (*iy-1) % desc_Y[MB_] ) )
243  info = -8;
244  else if( desc_Y[MB_] != desc_X[NB_] )
245  info = -(1000+MB_+1);
246  }
247  else
248  {
249  info = -11;
250  }
251  }
252  else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
253  { /* X is distributed along a process column */
254  if( *incy == desc_Y[M_] )
255  { /* Y is distributed over a process row */
256  if( ( (*ix-1) % desc_X[MB_] ) != ( (*jy-1) % desc_Y[NB_] ) )
257  info = -9;
258  else if( desc_Y[NB_] != desc_X[MB_] )
259  info = -(1000+NB_+1);
260  }
261  else if( ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
262  { /* Y is distributed over a process column */
263  if( ( ixrow != iyrow ) ||
264  ( ( (*ix-1) % desc_X[MB_] ) !=
265  ( (*iy-1) % desc_Y[MB_] ) ) )
266  info = -8;
267  else if( desc_Y[MB_] != desc_X[MB_] )
268  info = -(1000+MB_+1);
269  }
270  else
271  {
272  info = -11;
273  }
274  }
275  else
276  {
277  info = -6;
278  }
279  }
280  if( ictxt != desc_Y[CTXT_] )
281  info = -(1000+CTXT_+1);
282  }
283  }
284  if( info ) { PB_Cabort( ictxt, "PZROT", info ); return; }
285 /*
286  if( info )
287  {
288  pberror_( &ictxt, "PZROT", &info );
289  return;
290  }
291 */
292 
293 /*
294 * Quick return if possible.
295 */
296  zero.re = ZERO;
297  zero.im = ZERO;
298  if( *n == 0 ) return;
299 /*
300 * rotation
301 */
302  if( *n == 1 )
303  {
304  if( ( myrow == ixrow ) && ( mycol == ixcol ) )
305  {
306  buff = &X[iix-1+(jjx-1)*desc_X[LLD_]];
307  if( ( myrow != iyrow ) || ( mycol != iycol ) )
308  {
309  zgesd2d_( &ictxt, n, n, buff, n, &iyrow, &iycol );
310  zgerv2d_( &ictxt, n, n, ywork, n, &iyrow, &iycol );
311  }
312  else
313  *ywork = Y[iiy-1+(jjy-1)*desc_Y[LLD_]];
314  zrot_( n, buff, n, ywork, n, c, s );
315  X[iix-1+(jjx-1)*desc_X[LLD_]] = *buff;
316  if( ( myrow == iyrow ) && ( mycol == iycol ) )
317  Y[iiy-1+(jjy-1)*desc_Y[LLD_]] = *ywork;
318  }
319  else if( ( myrow == iyrow ) && ( mycol == iycol ) )
320  {
321  zgesd2d_( &ictxt, n, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n,
322  &ixrow, &ixcol );
323  zgerv2d_( &ictxt, n, n, xwork, n, &ixrow, &ixcol );
324  zrot_( n, xwork, n, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], n, c, s );
325  }
326  return;
327  }
328 
329  if( ( *incx == desc_X[M_] ) && ( *incy == desc_Y[M_] ) )
330  { /* X and Y are both distributed over a process row */
331  nz = (*jx-1) % desc_Y[NB_];
332  nn = *n + nz;
333  nq = numroc_( &nn, &desc_X[NB_], &mycol, &ixcol, &npcol );
334  if( mycol == ixcol )
335  nq -= nz;
336  if( ixrow == iyrow )
337  {
338  if( myrow == ixrow )
339  {
340  zrot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
341  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s );
342  }
343  }
344  else
345  {
346  if( myrow == ixrow )
347  {
348  zgesd2d_( &ictxt, &ione, &nq,
349  &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
350  &iyrow, &mycol );
351  buff = (complex16 *)getpbbuf( "PZROT", nq*sizeof(complex16) );
352  zgerv2d_( &ictxt, &nq, &ione, buff, &nq, &iyrow, &mycol );
353  zrot_( &nq, &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
354  buff, &ione, c, s );
355  }
356  else if( myrow == iyrow )
357  {
358  zgesd2d_( &ictxt, &ione, &nq,
359  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
360  &ixrow, &mycol );
361  buff = (complex16 *)getpbbuf( "PZROT", nq*sizeof(complex16) );
362  zgerv2d_( &ictxt, &nq, &ione, buff, &nq, &ixrow, &mycol );
363  zrot_( &nq, buff, &ione,
364  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_], c, s );
365  }
366  }
367  }
368  else if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) &&
369  ( *incy == 1 ) && ( *incy != desc_Y[M_] ) )
370  { /* X and Y are both distributed over a process column */
371  nz = (*ix-1) % desc_X[MB_];
372  nn = *n + nz;
373  np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
374  if( myrow == ixrow )
375  np -= nz;
376  if( ixcol == iycol )
377  {
378  if( mycol == ixcol )
379  {
380  zrot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
381  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
382  }
383  }
384  else
385  {
386  if( mycol == ixcol )
387  {
388  zgesd2d_( &ictxt, &np, &ione,
389  &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
390  &myrow, &iycol );
391  buff = (complex16 *)getpbbuf( "PZROT", np*sizeof(complex16) );
392  zgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &iycol );
393  zrot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]], incx,
394  buff, &ione, c, s );
395  }
396  else if( mycol == iycol )
397  {
398  zgesd2d_( &ictxt, &np, &ione,
399  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
400  &myrow, &ixcol );
401  buff = (complex16 *)getpbbuf( "PZROT", np*sizeof(complex16) );
402  zgerv2d_( &ictxt, &np, &ione, buff, &np, &myrow, &ixcol );
403  zrot_( &np, buff, &ione,
404  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
405  }
406  }
407  }
408  else /* X and Y are not distributed along the same direction */
409  {
410  lcm = ilcm_( &nprow, &npcol );
411  if( ( *incx == 1 ) && ( *incx != desc_X[M_] ) )
412  { /* X is distributed over a process column */
413  lcmp = lcm / nprow;
414  nz = (*jy-1) % desc_Y[NB_];
415  nn = *n + nz;
416  tmp1 = nn / desc_Y[MB_];
417  np = numroc_( &nn, &desc_X[MB_], &myrow, &ixrow, &nprow );
418  np0 = MYROC0( tmp1, nn, desc_X[MB_], nprow );
419  tmp1 = np0 / desc_X[MB_];
420  wksz = MYROC0( tmp1, np0, desc_X[MB_], lcmp );
421  wksz = np + wksz;
422 
423  buff = (complex16 *)getpbbuf( "PZROT", wksz*sizeof(complex16) );
424 
425  if( mycol == iycol )
426  jjy -= nz;
427  if( myrow == ixrow )
428  np -= nz;
429  pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
430  &desc_Y[NB_], &nz, &Y[iiy-1+(jjy-1)*desc_Y[LLD_]],
431  &desc_Y[LLD_], &zero, buff, &ione, &iyrow, &iycol,
432  &ixrow, &ixcol, buff+np );
433  if( mycol == ixcol )
434  {
435  zrot_( &np, &X[iix-1+(jjx-1)*desc_X[LLD_]],
436  incx, buff, &ione, c, s );
437  }
438  pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
439  &desc_Y[NB_], &nz, buff, &ione, &zero,
440  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], &desc_Y[LLD_],
441  &ixrow, &ixcol, &iyrow, &iycol, buff+np );
442  }
443  else /* Y is distributed over a process column */
444  {
445  lcmp = lcm / nprow;
446  nz = (*jx-1) % desc_X[NB_];
447  nn = *n + nz;
448  tmp1 = nn / desc_X[MB_];
449  np = numroc_( &nn, desc_Y+MB_, &myrow, &iyrow, &nprow );
450  np0 = MYROC0( tmp1, nn, desc_Y[MB_], nprow );
451  tmp1 = np0 / desc_Y[MB_];
452  wksz = MYROC0( tmp1, np0, desc_Y[MB_], lcmp );
453  wksz = np + wksz;
454 
455  buff = (complex16 *)getpbbuf( "PZROT", wksz*sizeof(complex16) );
456 
457  if( myrow == iyrow )
458  np -= nz;
459  pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
460  &desc_X[NB_], &nz, &X[iix-1+(jjx-1)*desc_X[LLD_]],
461  &desc_X[LLD_], &zero, buff, &ione, &ixrow, &ixcol,
462  &iyrow, &iycol, buff+np );
463  if( mycol == iycol )
464  {
465  zrot_( &np, buff, &ione,
466  &Y[iiy-1+(jjy-1)*desc_Y[LLD_]], incy, c, s );
467  }
468  pbztrnv_( &ictxt, C2F_CHAR( "R" ), C2F_CHAR( "T" ), n,
469  &desc_X[NB_], &nz, buff, &ione, &zero,
470  &X[iix-1+(jjx-1)*desc_X[LLD_]], &desc_X[LLD_],
471  &iyrow, &iycol, &ixrow, &ixcol, buff+np );
472  }
473  }
474 }
M_
#define M_
Definition: PBtools.h:39
getpbbuf
char * getpbbuf(char *mess, int length)
Definition: getpbbuf.c:3
MB_
#define MB_
Definition: PBtools.h:43
zgerv2d_
F_VOID_FUNC zgerv2d_(int *ConTxt, int *m, int *n, double *A, int *lda, int *rsrc, int *csrc)
Definition: zgerv2d_.c:6
complex16
Definition: pblas.h:93
NB_
#define NB_
Definition: PBtools.h:44
zgesd2d_
F_VOID_FUNC zgesd2d_(int *ConTxt, int *m, int *n, double *A, int *lda, int *rdest, int *cdest)
Definition: zgesd2d_.c:7
complex16::re
double re
Definition: pblas.h:93
LLD_
#define LLD_
Definition: PBtools.h:47
ZERO
#define ZERO
Definition: PBtools.h:66
blacs_gridinfo_
F_VOID_FUNC blacs_gridinfo_(int *ConTxt, int *nprow, int *npcol, int *myrow, int *mycol)
Definition: blacs_info_.c:6
complex16::im
double im
Definition: pblas.h:93
pbchkvect
void pbchkvect(int n, int npos0, int ix, int jx, desc_X, int incx, int dpos0, int *iix, int *jjx, int *ixrow, int *ixcol, int nprow, int npcol, int myrow, int mycol, int *info)
Definition: pbchkvect.c:15
PB_Cabort
void PB_Cabort()
pblas.h
F_INTG_FCT
#define F_INTG_FCT
Definition: pblas.h:124
pzrot_
void pzrot_(int *n, X, int *ix, int *jx, desc_X, int *incx, Y, int *iy, int *jy, desc_Y, int *incy, double *c, complex16 *s)
Definition: pzrot.c:13
C2F_CHAR
#define C2F_CHAR(a)
Definition: pblas.h:121
MYROC0
#define MYROC0(nblocks, n, nb, nprocs)
Definition: pblas.h:191
CTXT_
#define CTXT_
Definition: PBtools.h:38