ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdtrmr2.c
Go to the documentation of this file.
1 #include "redist.h"
2 /* $Id: pdtrmr2.c,v 1.1.1.1 2000/02/15 18:04:09 susan Exp $
3  *
4  * some functions used by the pdtrmr2d routine see file pdtrmr.c for more
5  * documentation.
6  *
7  * Created March 1993 by B. Tourancheau (See sccs for modifications). */
8 #define static2 static
9 #if defined(Add_) || defined(f77IsF2C)
10 #define fortran_mr2d pdtrmr2do_
11 #define fortran_mr2dnew pdtrmr2d_
12 #elif defined(UpCase)
13 #define fortran_mr2dnew PDTRMR2D
14 #define fortran_mr2d PDTRMR2DO
15 #define dcopy_ DCOPY
16 #define dlacpy_ DLACPY
17 #else
18 #define fortran_mr2d pdtrmr2do
19 #define fortran_mr2dnew pdtrmr2d
20 #define dcopy_ dcopy
21 #define dlacpy_ dlacpy
22 #endif
23 #define Clacpy Cdtrlacpy
24 void Clacpy();
25 typedef struct {
26  int desctype;
27  int ctxt;
28  int m;
29  int n;
30  int nbrow;
31  int nbcol;
32  int sprow;
33  int spcol;
34  int lda;
35 } MDESC;
36 #define BLOCK_CYCLIC_2D 1
37 typedef struct {
38  int gstart;
39  int len;
40 } IDESC;
41 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
42 #define max(A,B) ((A)>(B)?(A):(B))
43 #define min(A,B) ((A)>(B)?(B):(A))
44 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
45 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
46 #ifdef MALLOCDEBUG
47 #define malloc mymalloc
48 #define free myfree
49 #define realloc myrealloc
50 #endif
51 /* Cblacs */
52 extern void Cblacs_pcoord();
53 extern int Cblacs_pnum();
54 extern void Csetpvmtids();
55 extern void Cblacs_get();
56 extern void Cblacs_pinfo();
57 extern void Cblacs_gridinfo();
58 extern void Cblacs_gridinit();
59 extern void Cblacs_exit();
60 extern void Cblacs_gridexit();
61 extern void Cblacs_setup();
62 extern void Cigebs2d();
63 extern void Cigebr2d();
64 extern void Cigesd2d();
65 extern void Cigerv2d();
66 extern void Cigsum2d();
67 extern void Cigamn2d();
68 extern void Cigamx2d();
69 extern void Cdgesd2d();
70 extern void Cdgerv2d();
71 /* lapack */
72 void dlacpy_();
73 /* aux fonctions */
74 extern int localindice();
75 extern void *mr2d_malloc();
76 extern int ppcm();
77 extern int localsize();
78 extern int memoryblocksize();
79 extern int changeorigin();
80 extern void paramcheck();
81 /* tools and others function */
82 #define scanD0 dtrscanD0
83 #define dispmat dtrdispmat
84 #define setmemory dtrsetmemory
85 #define freememory dtrfreememory
86 #define scan_intervals dtrscan_intervals
87 extern void scanD0();
88 extern void dispmat();
89 extern void setmemory();
90 extern void freememory();
91 extern int scan_intervals();
92 extern void Cpdtrmr2do();
93 extern void Cpdtrmr2d();
94 /* some defines for Cpdtrmr2do */
95 #define SENDBUFF 0
96 #define RECVBUFF 1
97 #define SIZEBUFF 2
98 #if 0
99 #define DEBUG
100 #endif
101 #ifndef DEBUG
102 #define NDEBUG
103 #endif
104 #include <stdio.h>
105 #include <stdlib.h>
106 #include <string.h>
107 #include <assert.h>
108 #include <ctype.h>
109 /* Created March 1993 by B. Tourancheau (See sccs for modifications). */
110 /************************************************************************/
111 /* Set the memory space with the malloc function */
112 void
113 setmemory(adpointer, blocksize)
114  double **adpointer;
115  int blocksize;
116 {
117  assert(blocksize >= 0);
118  if (blocksize == 0) {
119  *adpointer = NULL;
120  return;
121  }
122  *adpointer = (double *) mr2d_malloc(
123  blocksize * sizeof(double));
124 }
125 /******************************************************************/
126 /* Free the memory space after the malloc */
127 void
128 freememory(ptrtobefreed)
129  double *ptrtobefreed;
130 {
131  if (ptrtobefreed == NULL)
132  return;
133  free((char *) ptrtobefreed);
134 }
135 /* extern functions for intersect() extern dcopy_(); */
136 /**************************************************************/
137 /* return the number of elements int the column after i and the distance of
138  * the first one from i, i,j can be negative out of borns, the number of
139  * elements returned can be negative (means 0) */
140 static2 int
141 insidemat(uplo, diag, i, j, m, n, offset)
142  int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */
143  char *uplo, *diag;
144  int *offset;
145 {
146  /* tests outside mxn */
147  assert(j >= 0 && j < n);
148  assert(i >= 0);
149  if (toupper(*uplo) == 'U') {
150  int nbline; /* number of lines in the j_th column */
151  int virtualnbline; /* number of line if we were not limited by m */
152  *offset = 0;
153  virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N');
154  nbline = min(virtualnbline, m);
155  return nbline - i;
156  } else {
157  int firstline; /* first line in the j_th column */
158  int diagcol; /* column where the diag begin */
159  int virtualline; /* virtual first line if the matrix was extended with
160  * negative indices */
161  int off;
162  diagcol = max(n - m, 0);;
163  virtualline = j - diagcol + (toupper(*diag) == 'U');
164  firstline = max(0, virtualline);
165  off = max(firstline - i, 0);
166  *offset = off;
167  i += off;
168  return m - i;
169  }
170 }/* insidemat() */
171 /********************************************************************/
172 /* Execute an action on the local memories when an intersection occurs (the
173  * action can be the filling of the memory buffer, the count of the memory
174  * buffer size or the setting of the memory with the element received) */
175 static2 void
176 intersect(uplo, diag,
177  j, start, end,
178  action,
179  ptrsizebuff, pptrbuff, ptrblock,
180  m, n,
181  ma, ia, ja, templateheight0, templatewidth0,
182  mb, ib, jb, templateheight1, templatewidth1)
183  int action, *ptrsizebuff;
184  int j, start, end;
185  double **pptrbuff, *ptrblock;
186  int templateheight0, templatewidth0;
187  int templateheight1, templatewidth1;
188  MDESC *ma, *mb;
189  int ia, ja, ib, jb, m, n;
190  char *uplo, *diag;
191 /* Execute the action on the local memory for the current interval and
192  * increment pptrbuff and ptrsizebuff of the intervalsize */
193 /* Notice that if the interval is contigous in the virtual matrice, it is
194  * also contigous in the real one ! */
195 {
196  /* int un = 1; only when we use dcopy instead of memcpy */
197  double *ptrstart;
198  int offset, nbline;
199  int intervalsize;
200  assert(start < end);
201  assert(j >= 0 && j < n);
202  nbline =
203  insidemat(uplo, diag, start, j, m, n, &offset);
204  if (nbline <= 0)
205  return;
206  start += offset;
207  if (start >= end)
208  return;
209  intervalsize = min(end - start, nbline);
210  (*ptrsizebuff) += intervalsize;
211  switch (action) {
212  case SENDBUFF: /* fill buff with local elements to be sent */
213  ptrstart = ptrblock + localindice(start + ia, j + ja,
214  templateheight0, templatewidth0, ma);
215  memcpy((char *) (*pptrbuff), (char *) ptrstart,
216  intervalsize * sizeof(double));
217  /* dcopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff),
218  * &un); */
219  (*pptrbuff) += intervalsize;
220  break;
221  case RECVBUFF: /* fill local memory with the values received */
222  ptrstart = ptrblock + localindice(start + ib, j + jb,
223  templateheight1, templatewidth1, mb);
224  memcpy((char *) ptrstart, (char *) (*pptrbuff),
225  intervalsize * sizeof(double));
226  /* dcopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart),
227  * &un); */
228  (*pptrbuff) += intervalsize;
229  break;
230  case SIZEBUFF: /* computation of sizebuff */
231  break;
232  default:
233  printf("action is %d outside the scope of the case [0..2] !! \n ", action);
234  exit(0);
235  break;
236  }; /* switch (action) */
237 }/* intersect() */
238 /* scan_intervals: scans two distributions in one dimension, and compute the
239  * intersections on the local processor. result must be long enough to
240  * contains the result that are stocked in IDESC structure, the function
241  * returns the number of intersections found */
242 int
243 scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1,
244  result)
245  char type;
246  int ja, jb, n, q0, q1, col0, col1;
247  MDESC *ma, *mb;
248  IDESC *result;
249 {
250  int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1;
251  int l; /* local indice on the beginning of the interval */
252  assert(type == 'c' || type == 'r');
253  nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow);
254  nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow);
255  templatewidth0 = q0 * nbcol0;
256  templatewidth1 = q1 * nbcol1;
257  {
258  int sp0 = (type == 'c' ? ma->spcol : ma->sprow);
259  int sp1 = (type == 'c' ? mb->spcol : mb->sprow);
260  j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja;
261  j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb;
262  }
263  offset = 0;
264  l = 0;
265  /* a small check to verify that the submatrix begin inside the first block
266  * of the original matrix, this done by a sort of coordinate change at the
267  * beginning of the Cpdtrmr2d */
268  assert(j0 + nbcol0 > 0);
269  assert(j1 + nbcol1 > 0);
270  while ((j0 < n) && (j1 < n)) {
271  int end0, end1;
272  int start, end;
273  end0 = j0 + nbcol0;
274  end1 = j1 + nbcol1;
275  if (end0 <= j1) {
276  j0 += templatewidth0;
277  l += nbcol0;
278  continue;
279  }
280  if (end1 <= j0) {
281  j1 += templatewidth1;
282  continue;
283  }
284  /* compute the raw intersection */
285  start = max(j0, j1);
286  start = max(start, 0);
287  /* the start is correct now, update the corresponding fields */
288  result[offset].gstart = start;
289  end = min(end0, end1);
290  if (end0 == end) {
291  j0 += templatewidth0;
292  l += nbcol0;
293  }
294  if (end1 == end)
295  j1 += templatewidth1;
296  /* throw the limit if they go out of the matrix */
297  end = min(end, n);
298  assert(end > start);
299  /* it is a bit tricky to see why the length is always positive after all
300  * this min and max, first we have the property that every interval
301  * considered is at least partly into the submatrix, second we arrive
302  * here only if the raw intersection is non-void, if we remove a limit
303  * that means the corresponding frontier is in both intervals which
304  * proove the final interval is non-void, clear ?? */
305  result[offset].len = end - start;
306  offset += 1;
307  } /* while */
308  return offset;
309 }
310 /*********************************************************************/
311 /* Do the scanning of intervals and the requested action */
312 void
313 scanD0(uplo, diag, action, ptrbuff, ptrsizebuff,
314  m, n,
315  ma, ia, ja, p0, q0,
316  mb, ib, jb, p1, q1,
317  v_inter, vinter_nb,
318  h_inter, hinter_nb,
319  ptrblock)
320  int action, /* # of the action done on the intersected intervals */
321  *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be
322  * an output parameter in every cases) */
323  double *ptrbuff /* address of the communication ptrbuffer (a
324  * suffisant memory space is supposed to be allocated
325  before the call) */ , *ptrblock;
326  int p0, q0, p1, q1;
327  IDESC *v_inter, *h_inter;
328  int vinter_nb, hinter_nb;
329  int m, n;
330  int ia, ja, ib, jb;
331  MDESC *ma, *mb;
332  char *uplo, *diag;
333 {/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to
334  * the [a..a+au-1] interval of length au */
335  int templateheight1, templatewidth1;
336  int templateheight0, templatewidth0;
337  int h, v; /* for scanning the intervals */
338  /* initializations */
339  templateheight1 = p1 * mb->nbrow;
340  templateheight0 = p0 * ma->nbrow;
341  templatewidth1 = q1 * mb->nbcol;
342  templatewidth0 = q0 * ma->nbcol;
343  /* we now will deal will logical grids, that's to say we change our
344  * numbering of processors so that (0,0) begin on logical processor (0,0) */
345  /* in case we will not enter the while loop */
346  (*ptrsizebuff) = 0;
347  for (h = 0; h < hinter_nb; h++)
348  for (v = 0; v < vinter_nb; v++) {
349  int j;
350  for (j = 0; j < h_inter[h].len; j++)
351  intersect(uplo, diag, j + h_inter[h].gstart,
352  v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len,
353  action, ptrsizebuff, &ptrbuff, ptrblock, m, n,
354  ma, ia, ja, templateheight0, templatewidth0,
355  mb, ib, jb, templateheight1, templatewidth1);
356  }
357 }/* scanD0() */
SIZEBUFF
#define SIZEBUFF
Definition: pdtrmr2.c:97
Cdgesd2d
void Cdgesd2d()
changeorigin
int changeorigin()
memoryblocksize
int memoryblocksize()
scanD0
#define scanD0
Definition: pdtrmr2.c:82
Csetpvmtids
void Csetpvmtids()
Cblacs_gridexit
void Cblacs_gridexit()
Cblacs_gridinfo
void Cblacs_gridinfo()
localsize
int localsize()
Cpdtrmr2do
void Cpdtrmr2do()
dlacpy_
#define dlacpy_
Definition: pdtrmr2.c:21
paramcheck
void paramcheck()
ppcm
int ppcm()
max
#define max(A, B)
Definition: pdtrmr2.c:42
MDESC::sprow
int sprow
Definition: pcgemr.c:170
MDESC::nbcol
int nbcol
Definition: pcgemr.c:169
Cblacs_pnum
int Cblacs_pnum()
Clacpy
#define Clacpy
Definition: pdtrmr2.c:23
MDESC
Definition: pcgemr.c:163
Cdgerv2d
void Cdgerv2d()
Cigesd2d
void Cigesd2d()
Cblacs_get
void Cblacs_get()
Cigamx2d
void Cigamx2d()
localindice
int localindice()
Cigebs2d
void Cigebs2d()
Cblacs_pcoord
void Cblacs_pcoord()
IDESC
Definition: pcgemr.c:175
IDESC::gstart
int gstart
Definition: pctrmr.c:191
freememory
#define freememory
Definition: pdtrmr2.c:85
setmemory
#define setmemory
Definition: pdtrmr2.c:84
Cblacs_pinfo
void Cblacs_pinfo()
SENDBUFF
#define SENDBUFF
Definition: pdtrmr2.c:95
dispmat
#define dispmat
Definition: pdtrmr2.c:83
Cblacs_exit
void Cblacs_exit()
min
#define min(A, B)
Definition: pdtrmr2.c:43
SHIFT
#define SHIFT(row, sprow, nbrow)
Definition: pdtrmr2.c:41
MDESC::spcol
int spcol
Definition: pcgemr.c:171
redist.h
insidemat
static2 int insidemat(char *uplo, char *diag, int i, int j, int m, int n, int *offset)
Definition: pdtrmr2.c:141
Cigerv2d
void Cigerv2d()
scan_intervals
#define scan_intervals
Definition: pdtrmr2.c:86
Cpdtrmr2d
void Cpdtrmr2d()
Cigebr2d
void Cigebr2d()
IDESC::len
int len
Definition: pcgemr.c:177
Cigsum2d
void Cigsum2d()
mr2d_malloc
void * mr2d_malloc()
Cblacs_setup
void Cblacs_setup()
RECVBUFF
#define RECVBUFF
Definition: pdtrmr2.c:96
Cblacs_gridinit
void Cblacs_gridinit()
MDESC::nbrow
int nbrow
Definition: pcgemr.c:168
static2
#define static2
Definition: pdtrmr2.c:8
Cigamn2d
void Cigamn2d()
intersect
static2 void intersect(char *uplo, char *diag, int j, int start, int end, int action, int *ptrsizebuff, double **pptrbuff, double *ptrblock, int m, int n, MDESC *ma, int ia, int ja, int templateheight0, int templatewidth0, MDESC *mb, int ib, int jb, int templateheight1, int templatewidth1)
Definition: pdtrmr2.c:176