ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcgemrdrv.c
Go to the documentation of this file.
1 #include "redist.h"
2 /* $Id: pcgemrdrv.c,v 1.1.1.1 2000/02/15 18:04:11 susan Exp $
3  *
4  * pcgemrdrv.c :
5  *
6  *
7  * PURPOSE:
8  *
9  * this driver is testing the PCGEMR2D routine. It calls it to obtain a new
10  * scattered block data decomposition of a distributed COMPLEX (block
11  * scattered) matrix. Then it calls PCGEMR2D for the inverse redistribution
12  * and checks the results with the initial data.
13  *
14  * Data are going from a Block Scattered nbrow0 x nbcol0 decomposition on the
15  * processor grid p0 x q0, to data distributed in a BS nbrow1 x nbcol1 on the
16  * processor grid p1 x q1, then back to the BS nbrow0 x nbcol0 decomposition
17  * on the processor grid p0 x q0.
18  *
19  * See pcgemr.c file for detailed info on the PCGEMR2D function.
20  *
21  *
22  * The testing parameters are read from the file GEMR2D.dat, see the file in the
23  * distribution to have an example.
24  *
25  * created by Bernard Tourancheau in April 1994.
26  *
27  * modifications : see sccs history
28  *
29  * ===================================
30  *
31  *
32  * NOTE :
33  *
34  * - the matrix elements are COMPLEX
35  *
36  * - memory requirements : this procedure requires approximately 3 times the
37  * memory space of the initial data block in grid 0 (initial block, copy for
38  * test and second redistribution result) and 1 time the memory space of the
39  * result data block in grid 1. with the element size = sizeof(complex)
40  * bytes,
41  *
42  *
43  * - use the procedures of the files:
44  *
45  * pcgemr.o pcgemr2.o pcgemraux.o
46  *
47  *
48  * ======================================
49  *
50  * WARNING ASSUMPTIONS :
51  *
52  *
53  * ========================================
54  *
55  *
56  * Planned changes:
57  *
58  *
59  *
60  * ========================================= */
61 #define static2 static
62 #if defined(Add_) || defined(f77IsF2C)
63 #define fortran_mr2d pcgemr2do_
64 #define fortran_mr2dnew pcgemr2d_
65 #elif defined(UpCase)
66 #define fortran_mr2dnew PCGEMR2D
67 #define fortran_mr2d PCGEMR2DO
68 #define ccopy_ CCOPY
69 #define clacpy_ CLACPY
70 #else
71 #define fortran_mr2d pcgemr2do
72 #define fortran_mr2dnew pcgemr2d
73 #define ccopy_ ccopy
74 #define clacpy_ clacpy
75 #endif
76 #define Clacpy Ccgelacpy
77 void Clacpy();
78 typedef struct {
79  float r, i;
80 } complex;
81 typedef struct {
82  int desctype;
83  int ctxt;
84  int m;
85  int n;
86  int nbrow;
87  int nbcol;
88  int sprow;
89  int spcol;
90  int lda;
91 } MDESC;
92 #define BLOCK_CYCLIC_2D 1
93 typedef struct {
94  int lstart;
95  int len;
96 } IDESC;
97 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
98 #define max(A,B) ((A)>(B)?(A):(B))
99 #define min(A,B) ((A)>(B)?(B):(A))
100 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
101 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
102 #ifdef MALLOCDEBUG
103 #define malloc mymalloc
104 #define free myfree
105 #define realloc myrealloc
106 #endif
107 /* Cblacs */
108 extern void Cblacs_pcoord();
109 extern int Cblacs_pnum();
110 extern void Csetpvmtids();
111 extern void Cblacs_get();
112 extern void Cblacs_pinfo();
113 extern void Cblacs_gridinfo();
114 extern void Cblacs_gridinit();
115 extern void Cblacs_exit();
116 extern void Cblacs_gridexit();
117 extern void Cblacs_setup();
118 extern void Cigebs2d();
119 extern void Cigebr2d();
120 extern void Cigesd2d();
121 extern void Cigerv2d();
122 extern void Cigsum2d();
123 extern void Cigamn2d();
124 extern void Cigamx2d();
125 extern void Ccgesd2d();
126 extern void Ccgerv2d();
127 /* lapack */
128 void clacpy_();
129 /* aux fonctions */
130 extern int localindice();
131 extern void *mr2d_malloc();
132 extern int ppcm();
133 extern int localsize();
134 extern int memoryblocksize();
135 extern int changeorigin();
136 extern void paramcheck();
137 /* tools and others function */
138 #define scanD0 cgescanD0
139 #define dispmat cgedispmat
140 #define setmemory cgesetmemory
141 #define freememory cgefreememory
142 #define scan_intervals cgescan_intervals
143 extern void scanD0();
144 extern void dispmat();
145 extern void setmemory();
146 extern void freememory();
147 extern int scan_intervals();
148 extern void Cpcgemr2do();
149 extern void Cpcgemr2d();
150 /* some defines for Cpcgemr2do */
151 #define SENDBUFF 0
152 #define RECVBUFF 1
153 #define SIZEBUFF 2
154 #if 0
155 #define DEBUG
156 #endif
157 #ifndef DEBUG
158 #define NDEBUG
159 #endif
160 #include <stdio.h>
161 #include <stdlib.h>
162 #include <string.h>
163 #include <ctype.h>
164 #include <assert.h>
165 /* initblock: intialize the local part of a matrix with random data (well,
166  * not very random) */
167 static2 void
168 initblock(block, m, n)
169  complex *block;
170  int m, n;
171 {
172  complex *pdata;
173  int i;
174  pdata = block;
175  for (i = 0; i < m * n; i++, pdata++) {
176  (*pdata).r = i;
177  };
178 }
179 /* getparam:read from a file a list of integer parameters, the end of the
180  * parameters to read is given by a NULL at the end of the args list */
181 #ifdef __STDC__
182 #include <stdarg.h>
183 static void
184 getparam(FILE * f,...)
185 {
186 #else
187 #include <varargs.h>
188 static void
189 getparam(va_alist)
190 va_dcl
191 {
192  FILE *f;
193 #endif
194  va_list ap;
195  int i;
196  static int nbline;
197  char *ptr, *next;
198  int *var;
199  static char buffer[200];
200 #ifdef __STDC__
201  va_start(ap, f);
202 #else
203  va_start(ap);
204  f = va_arg(ap, FILE *);
205 #endif
206  do {
207  next = fgets(buffer, 200, f);
208  if (next == NULL) {
209  fprintf(stderr, "bad configuration driver file:after line %d\n", nbline);
210  exit(1);
211  }
212  nbline += 1;
213  } while (buffer[0] == '#');
214  ptr = buffer;
215  var = va_arg(ap, int *);
216  while (var != NULL) {
217  *var = strtol(ptr, &next, 10);
218  if (ptr == next) {
219  fprintf(stderr, "bad configuration driver file:error line %d\n", nbline);
220  exit(1);
221  }
222  ptr = next;
223  var = va_arg(ap, int *);
224  }
225  va_end(ap);
226 }
227 void
228 initforpvm(argc, argv)
229  int argc;
230  char *argv[];
231 {
232  int pnum, nproc;
233  Cblacs_pinfo(&pnum, &nproc);
234  if (nproc < 1) { /* we are with PVM */
235  if (pnum == 0) {
236  if (argc < 2) {
237  fprintf(stderr, "usage with PVM:xcgemr nbproc\n\
238 \t where nbproc is the number of nodes to initialize\n");
239  exit(1);
240  }
241  nproc = atoi(argv[1]);
242  }
243  Cblacs_setup(&pnum, &nproc);
244  }
245 }
246 int
247 main(argc, argv)
248  int argc;
249  char *argv[];
250 {
251  /* We initialize the data-block on the current processor, then redistribute
252  * it, and perform the inverse redistribution to compare the local memory
253  * with the initial one. */
254  /* Data file */
255  FILE *fp;
256  int nbre, nbremax;
257  /* Data distribution 0 parameters */
258  int p0, /* # of rows in the processor grid */
259  q0; /* # of columns in the processor grid */
260  /* Data distribution 1 parameters */
261  int p1, q1;
262  /* # of parameter to be read on the keyboard */
263 #define nbparameter 24
264  /* General variables */
265  int blocksize0;
266  int mypnum, nprocs;
267  int parameters[nbparameter], nberrors;
268  int i;
269  int ia, ja, ib, jb, m, n;
270  int gcontext, context0, context1;
271  int myprow1, myprow0, mypcol0, mypcol1;
272  int dummy;
273  MDESC ma, mb;
274  complex *ptrmyblock, *ptrsavemyblock, *ptrmyblockcopy, *ptrmyblockvide;
275 #ifdef UsingMpiBlacs
276  MPI_Init(&argc, &argv);
277 #endif
278  setvbuf(stdout, NULL, _IOLBF, 0);
279  setvbuf(stderr, NULL, _IOLBF, 0);
280 #ifdef T3D
281  free(malloc(14000000));
282 #endif
283  initforpvm(argc, argv);
284  /* Read physical parameters */
285  Cblacs_pinfo(&mypnum, &nprocs);
286  /* initialize BLACS for the parameter communication */
287  Cblacs_get(0, 0, &gcontext);
288  Cblacs_gridinit(&gcontext, "R", nprocs, 1);
289  Cblacs_gridinfo(gcontext, &dummy, &dummy, &mypnum, &dummy);
290  if (mypnum == 0) {
291  if ((fp = fopen("GEMR2D.dat", "r")) == NULL) {
292  fprintf(stderr, "Can't open GEMR2D.dat\n");
293  exit(1);
294  };
295  printf("\n// CGEMR2D TESTER for COMPLEX //\n");
296  getparam(fp, &nbre, NULL);
297  printf("////////// %d tests \n\n", nbre);
298  parameters[0] = nbre;
299  Cigebs2d(gcontext, "All", "H", 1, 1, parameters, 1);
300  } else {
301  Cigebr2d(gcontext, "All", "H", 1, 1, parameters, 1, 0, 0);
302  nbre = parameters[0];
303  };
304  if (mypnum == 0) {
305  printf("\n m n m0 n0 sr0 sc0 i0 j0 p0 q0 nbr0 nbc0 \
306 m1 n1 sr1 sc1 i1 j1 p1 q1 nbr1 nbc1\n\n");
307  };
308  /****** TEST LOOP *****/
309  /* Here we are in grip 1xnprocs */
310  nbremax = nbre;
311 #ifdef DEBUG
312  fprintf(stderr, "bonjour,je suis le noeud %d\n", mypnum);
313 #endif
314  while (nbre-- != 0) { /* Loop on the serie of tests */
315  /* All the processors read the parameters so we have to be in a 1xnprocs
316  * grid at each iteration */
317  /* Read processors grid and matrices parameters */
318  if (mypnum == 0) {
319  int u, d;
320  getparam(fp,
321  &m, &n,
322  &ma.m, &ma.n, &ma.sprow, &ma.spcol,
323  &ia, &ja, &p0, &q0, &ma.nbrow, &ma.nbcol,
324  &mb.m, &mb.n, &mb.sprow, &mb.spcol,
325  &ib, &jb, &p1, &q1, &mb.nbrow, &mb.nbcol,
326  NULL);
327  printf("\t\t************* TEST # %d **********\n",
328  nbremax - nbre);
329  printf(" %3d %3d %3d %3d %3d %3d %3d %3d \
330 %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d",
331  m, n,
332  ma.m, ma.n, ma.sprow, ma.spcol,
333  ia, ja, p0, q0, ma.nbrow, ma.nbcol,
334  mb.m, mb.n, mb.sprow, mb.spcol,
335  ib, jb, p1, q1, mb.nbrow, mb.nbcol);
336  printf("\n");
337  if (p0 * q0 > nprocs || p1 * q1 > nprocs) {
338  fprintf(stderr, "not enough nodes:%d processors required\n",
339  max(p0 * q0, p1 * q1));
340  exit(1);
341  }
342  parameters[0] = p0;
343  parameters[1] = q0;
344  parameters[2] = ma.nbrow;
345  parameters[3] = ma.nbcol;
346  parameters[4] = p1;
347  parameters[5] = q1;
348  parameters[6] = mb.nbrow;
349  parameters[7] = mb.nbcol;
350  parameters[8] = ma.m;
351  parameters[9] = ma.n;
352  parameters[10] = ma.sprow;
353  parameters[11] = ma.spcol;
354  parameters[12] = mb.sprow;
355  parameters[13] = mb.spcol;
356  parameters[14] = ia;
357  parameters[15] = ja;
358  parameters[16] = ib;
359  parameters[17] = jb;
360  parameters[18] = m;
361  parameters[19] = n;
362  parameters[20] = mb.m;
363  parameters[21] = mb.n;
364  Cigebs2d(gcontext, "All", "H", 1, nbparameter, parameters, 1);
365  } else {
366  Cigebr2d(gcontext, "All", "H", 1, nbparameter, parameters, 1, 0, 0);
367  p0 = parameters[0];
368  q0 = parameters[1];
369  ma.nbrow = parameters[2];
370  ma.nbcol = parameters[3];
371  p1 = parameters[4];
372  q1 = parameters[5];
373  mb.nbrow = parameters[6];
374  mb.nbcol = parameters[7];
375  ma.m = parameters[8];
376  ma.n = parameters[9];
377  ma.sprow = parameters[10];
378  ma.spcol = parameters[11];
379  mb.sprow = parameters[12];
380  mb.spcol = parameters[13];
381  ia = parameters[14];
382  ja = parameters[15];
383  ib = parameters[16];
384  jb = parameters[17];
385  m = parameters[18];
386  n = parameters[19];
387  mb.m = parameters[20];
388  mb.n = parameters[21];
391  };
392  Cblacs_get(0, 0, &context0);
393  Cblacs_gridinit(&context0, "R", p0, q0);
394  Cblacs_get(0, 0, &context1);
395  Cblacs_gridinit(&context1, "R", p1, q1);
396  Cblacs_gridinfo(context0, &dummy, &dummy, &myprow0, &mypcol0);
397  if (myprow0 >= p0 || mypcol0 >= q0)
398  myprow0 = mypcol0 = -1;
399  Cblacs_gridinfo(context1, &dummy, &dummy, &myprow1, &mypcol1);
400  if (myprow1 >= p1 || mypcol1 >= q1)
401  myprow1 = mypcol1 = -1;
402  assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
403  assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
404  ma.ctxt = context0;
405  mb.ctxt = context1;
406  /* From here, we are not assuming that only the processors working in the
407  * redistribution are calling xxMR2D, but the ones not concerned will do
408  * nothing. */
409  /* We compute the exact size of the local memory block for the memory
410  * allocations */
411  if (myprow0 >= 0 && mypcol0 >= 0) {
412  blocksize0 = memoryblocksize(&ma);
413  ma.lda = localsize(SHIFT(myprow0, ma.sprow, p0), p0, ma.nbrow, ma.m);
414  setmemory(&ptrmyblock, blocksize0);
415  initblock(ptrmyblock, 1, blocksize0);
416  setmemory(&ptrmyblockcopy, blocksize0);
417  memcpy((char *) ptrmyblockcopy, (char *) ptrmyblock,
418  blocksize0 * sizeof(complex));
419  setmemory(&ptrmyblockvide, blocksize0);
420  for (i = 0; i < blocksize0; i++)
421  ptrmyblockvide[i].r = -1;
422  }; /* if (mypnum < p0 * q0) */
423  if (myprow1 >= 0 && mypcol1 >= 0) {
424  setmemory(&ptrsavemyblock, memoryblocksize(&mb));
425  mb.lda = localsize(SHIFT(myprow1, mb.sprow, p1), p1, mb.nbrow, mb.m);
426  }; /* if (mypnum < p1 * q1) */
427  /* Redistribute the matrix from grid 0 to grid 1 (memory location
428  * ptrmyblock to ptrsavemyblock) */
429  Cpcgemr2d(m, n,
430  ptrmyblock, ia, ja, &ma,
431  ptrsavemyblock, ib, jb, &mb, gcontext);
432  /* Perform the inverse redistribution of the matrix from grid 1 to grid 0
433  * (memory location ptrsavemyblock to ptrmyblockvide) */
434  Cpcgemr2d(m, n,
435  ptrsavemyblock, ib, jb, &mb,
436  ptrmyblockvide, ia, ja, &ma, gcontext);
437  /* Check the differences */
438  nberrors = 0;
439  if (myprow0 >= 0 && mypcol0 >= 0) {
440  /* only for the processors that do have data at the begining */
441  for (i = 0; i < blocksize0; i++) {
442  int li, lj, gi, gj;
443  int in;
444  in = 1;
445  li = i % ma.lda;
446  lj = i / ma.lda;
447  gi = (li / ma.nbrow) * p0 * ma.nbrow +
448  SHIFT(myprow0, ma.sprow, p0) * ma.nbrow + li % ma.nbrow;
449  gj = (lj / ma.nbcol) * q0 * ma.nbcol +
450  SHIFT(mypcol0, ma.spcol, q0) * ma.nbcol + lj % ma.nbcol;
451  assert(gi < ma.m && gj < ma.n);
452  gi -= (ia - 1);
453  gj -= (ja - 1);
454  if (gi < 0 || gj < 0 || gi >= m || gj >= n)
455  in = 0;
456  if (!in) {
457  ptrmyblockcopy[i].r = -1;
458  }
459  if (ptrmyblockvide[i].r != ptrmyblockcopy[i].r) {
460  nberrors++;
461  printf("Proc %d : Error element number %d, value = %f , initvalue =%f \n"
462  ,mypnum, i,
463  ptrmyblockvide[i].r, ptrmyblockcopy[i].r);
464  };
465  };
466  if (nberrors > 0) {
467  printf("Processor %d, has tested %d COMPLEX elements,\
468 Number of redistribution errors = %d \n",
469  mypnum, blocksize0, nberrors);
470  }
471  }
472  /* Look at the errors on all the processors at this point. */
473  Cigsum2d(gcontext, "All", "H", 1, 1, &nberrors, 1, 0, 0);
474  if (mypnum == 0)
475  if (nberrors)
476  printf(" => Total number of redistribution errors = %d \n",
477  nberrors);
478  else
479  printf("TEST PASSED OK\n");
480  /* release memory for the next iteration */
481  if (myprow0 >= 0 && mypcol0 >= 0) {
482  freememory((char *) ptrmyblock);
483  freememory((char *) ptrmyblockvide);
484  freememory((char *) ptrmyblockcopy);
485  }; /* if (mypnum < p0 * q0) */
486  /* release memory for the next iteration */
487  if (myprow1 >= 0 && mypcol1 >= 0) {
488  freememory((char *) ptrsavemyblock);
489  };
490  if (myprow0 >= 0)
491  Cblacs_gridexit(context0);
492  if (myprow1 >= 0)
493  Cblacs_gridexit(context1);
494  }; /* while nbre != 0 */
495  if (mypnum == 0) {
496  fclose(fp);
497  };
498  Cblacs_exit(0);
499  return 0;
500 }/* main */
Cigamx2d
void Cigamx2d()
Cblacs_pcoord
void Cblacs_pcoord()
Cblacs_pinfo
void Cblacs_pinfo()
localsize
int localsize()
Cblacs_get
void Cblacs_get()
main
int main(int argc, argv)
Definition: pcgemrdrv.c:247
MDESC::ctxt
int ctxt
Definition: pcgemr.c:165
Cpcgemr2do
void Cpcgemr2do()
BLOCK_CYCLIC_2D
#define BLOCK_CYCLIC_2D
Definition: pcgemrdrv.c:92
complex::r
float r
Definition: pcgemr.c:161
dispmat
#define dispmat
Definition: pcgemrdrv.c:139
MDESC::sprow
int sprow
Definition: pcgemr.c:170
MDESC::n
int n
Definition: pcgemr.c:167
MDESC::nbcol
int nbcol
Definition: pcgemr.c:169
MDESC
Definition: pcgemr.c:163
Cpcgemr2d
void Cpcgemr2d()
Cblacs_gridinfo
void Cblacs_gridinfo()
paramcheck
void paramcheck()
mr2d_malloc
void * mr2d_malloc()
Cblacs_setup
void Cblacs_setup()
redist.h
MDESC::desctype
int desctype
Definition: pcgemr.c:164
setmemory
#define setmemory
Definition: pcgemrdrv.c:140
initforpvm
void initforpvm(int argc, argv)
Definition: pcgemrdrv.c:228
IDESC
Definition: pcgemr.c:175
initblock
static2 void initblock(complex *block, int m, int n)
Definition: pcgemrdrv.c:168
Ccgerv2d
void Ccgerv2d()
Cigamn2d
void Cigamn2d()
Cigerv2d
void Cigerv2d()
Cblacs_exit
void Cblacs_exit()
Cigsum2d
void Cigsum2d()
Csetpvmtids
void Csetpvmtids()
scanD0
#define scanD0
Definition: pcgemrdrv.c:138
MDESC::spcol
int spcol
Definition: pcgemr.c:171
Ccgesd2d
void Ccgesd2d()
Cblacs_pnum
int Cblacs_pnum()
max
#define max(A, B)
Definition: pcgemrdrv.c:98
Cigesd2d
void Cigesd2d()
scan_intervals
#define scan_intervals
Definition: pcgemrdrv.c:142
nbparameter
#define nbparameter
Cigebs2d
void Cigebs2d()
changeorigin
int changeorigin()
clacpy_
#define clacpy_
Definition: pcgemrdrv.c:74
freememory
#define freememory
Definition: pcgemrdrv.c:141
Clacpy
#define Clacpy
Definition: pcgemrdrv.c:76
SHIFT
#define SHIFT(row, sprow, nbrow)
Definition: pcgemrdrv.c:97
MDESC::lda
int lda
Definition: pcgemr.c:172
Cblacs_gridexit
void Cblacs_gridexit()
MDESC::nbrow
int nbrow
Definition: pcgemr.c:168
ppcm
int ppcm()
localindice
int localindice()
MDESC::m
int m
Definition: pcgemr.c:166
static2
#define static2
Definition: pcgemrdrv.c:61
memoryblocksize
int memoryblocksize()
complex
Definition: pcgemr.c:160
Cblacs_gridinit
void Cblacs_gridinit()
Cigebr2d
void Cigebr2d()