SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pctrmr2.c
Go to the documentation of this file.
1#include "redist.h"
2/* $Id: pctrmr2.c,v 1.1.1.1 2000/02/15 18:04:10 susan Exp $
3 *
4 * some functions used by the pctrmr2d routine see file pctrmr.c for more
5 * documentation.
6 *
7 * Created March 1993 by B. Tourancheau (See sccs for modifications). */
8#define static2 static
9#if defined(Add_) || defined(f77IsF2C)
10#define fortran_mr2d pctrmr2do_
11#define fortran_mr2dnew pctrmr2d_
12#elif defined(UpCase)
13#define fortran_mr2dnew PCTRMR2D
14#define fortran_mr2d PCTRMR2DO
15#define ccopy_ CCOPY
16#define clacpy_ CLACPY
17#else
18#define fortran_mr2d pctrmr2do
19#define fortran_mr2dnew pctrmr2d
20#define ccopy_ ccopy
21#define clacpy_ clacpy
22#endif
23#define Clacpy Cctrlacpy
24void Clacpy();
25typedef struct {
26 float r, i;
27} complex;
28typedef struct {
29 Int desctype;
30 Int ctxt;
31 Int m;
32 Int n;
33 Int nbrow;
34 Int nbcol;
35 Int sprow;
36 Int spcol;
37 Int lda;
38} MDESC;
39#define BLOCK_CYCLIC_2D 1
40typedef struct {
41 Int gstart;
42 Int len;
43} IDESC;
44#define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
45#define max(A,B) ((A)>(B)?(A):(B))
46#define min(A,B) ((A)>(B)?(B):(A))
47#define DIVUP(a,b) ( ((a)-1) /(b)+1)
48#define ROUNDUP(a,b) (DIVUP(a,b)*(b))
49#ifdef MALLOCDEBUG
50#define malloc mymalloc
51#define free myfree
52#define realloc myrealloc
53#endif
54/* Cblacs */
55extern void Cblacs_pcoord();
57extern void Csetpvmtids();
58extern void Cblacs_get();
59extern void Cblacs_pinfo();
60extern void Cblacs_gridinfo();
61extern void Cblacs_gridinit();
62extern void Cblacs_exit();
63extern void Cblacs_gridexit();
64extern void Cblacs_setup();
65extern void Cigebs2d();
66extern void Cigebr2d();
67extern void Cigesd2d();
68extern void Cigerv2d();
69extern void Cigsum2d();
70extern void Cigamn2d();
71extern void Cigamx2d();
72extern void Ccgesd2d();
73extern void Ccgerv2d();
74/* lapack */
75void clacpy_();
76/* aux fonctions */
78extern void *mr2d_malloc();
79extern Int ppcm();
80extern Int localsize();
83extern void paramcheck();
84/* tools and others function */
85#define scanD0 ctrscanD0
86#define dispmat ctrdispmat
87#define setmemory ctrsetmemory
88#define freememory ctrfreememory
89#define scan_intervals ctrscan_intervals
90extern void scanD0();
91extern void dispmat();
92extern void setmemory();
93extern void freememory();
94extern Int scan_intervals();
95extern void Cpctrmr2do();
96extern void Cpctrmr2d();
97/* some defines for Cpctrmr2do */
98#define SENDBUFF 0
99#define RECVBUFF 1
100#define SIZEBUFF 2
101#if 0
102#define DEBUG
103#endif
104#ifndef DEBUG
105#define NDEBUG
106#endif
107#include <stdio.h>
108#include <stdlib.h>
109#include <string.h>
110#include <assert.h>
111#include <ctype.h>
112/* Created March 1993 by B. Tourancheau (See sccs for modifications). */
113/************************************************************************/
114/* Set the memory space with the malloc function */
115void
116setmemory(complex **adpointer, Int blocksize)
117{
118 assert(blocksize >= 0);
119 if (blocksize == 0) {
120 *adpointer = NULL;
121 return;
122 }
123 *adpointer = (complex *) mr2d_malloc(
124 blocksize * sizeof(complex));
125}
126/******************************************************************/
127/* Free the memory space after the malloc */
128void
129freememory(complex *ptrtobefreed)
130{
131 if (ptrtobefreed == NULL)
132 return;
133 free((char *) ptrtobefreed);
134}
135/* extern functions for intersect() extern ccopy_(); */
136/**************************************************************/
137/* return the number of elements int the column after i and the distance of
138 * the first one from i, i,j can be negative out of borns, the number of
139 * elements returned can be negative (means 0) */
141insidemat(uplo, diag, i, j, m, n, offset)
142 Int m, n, i, j; /* coordonnees de depart, taille de la sous-matrice */
143 char *uplo, *diag;
144 Int *offset;
145{
146 /* tests outside mxn */
147 assert(j >= 0 && j < n);
148 assert(i >= 0);
149 if (toupper(*uplo) == 'U') {
150 Int nbline; /* number of lines in the j_th column */
151 Int virtualnbline; /* number of line if we were not limited by m */
152 *offset = 0;
153 virtualnbline = max(m - n, 0) + j + (toupper(*diag) == 'N');
154 nbline = min(virtualnbline, m);
155 return nbline - i;
156 } else {
157 Int firstline; /* first line in the j_th column */
158 Int diagcol; /* column where the diag begin */
159 Int virtualline; /* virtual first line if the matrix was extended with
160 * negative indices */
161 Int off;
162 diagcol = max(n - m, 0);;
163 virtualline = j - diagcol + (toupper(*diag) == 'U');
164 firstline = max(0, virtualline);
165 off = max(firstline - i, 0);
166 *offset = off;
167 i += off;
168 return m - i;
169 }
170}/* insidemat() */
171/********************************************************************/
172/* Execute an action on the local memories when an intersection occurs (the
173 * action can be the filling of the memory buffer, the count of the memory
174 * buffer size or the setting of the memory with the element received) */
175static2 void
176intersect(uplo, diag,
177 j, start, end,
178 action,
179 ptrsizebuff, pptrbuff, ptrblock,
180 m, n,
181 ma, ia, ja, templateheight0, templatewidth0,
182 mb, ib, jb, templateheight1, templatewidth1)
183 Int action, *ptrsizebuff;
184 Int j, start, end;
185 complex **pptrbuff, *ptrblock;
186 Int templateheight0, templatewidth0;
187 Int templateheight1, templatewidth1;
188 MDESC *ma, *mb;
189 Int ia, ja, ib, jb, m, n;
190 char *uplo, *diag;
191/* Execute the action on the local memory for the current interval and
192 * increment pptrbuff and ptrsizebuff of the intervalsize */
193/* Notice that if the interval is contigous in the virtual matrice, it is
194 * also contigous in the real one ! */
195{
196 /* Int un = 1; only when we use dcopy instead of memcpy */
197 complex *ptrstart;
198 Int offset, nbline;
199 Int intervalsize;
200 assert(start < end);
201 assert(j >= 0 && j < n);
202 nbline =
203 insidemat(uplo, diag, start, j, m, n, &offset);
204 if (nbline <= 0)
205 return;
206 start += offset;
207 if (start >= end)
208 return;
209 intervalsize = min(end - start, nbline);
210 (*ptrsizebuff) += intervalsize;
211 switch (action) {
212 case SENDBUFF: /* fill buff with local elements to be sent */
213 ptrstart = ptrblock + localindice(start + ia, j + ja,
214 templateheight0, templatewidth0, ma);
215 memcpy((char *) (*pptrbuff), (char *) ptrstart,
216 intervalsize * sizeof(complex));
217 /* ccopy_(&intervalsize, (char *) (ptrstart), &un, (char *) (*pptrbuff),
218 * &un); */
219 (*pptrbuff) += intervalsize;
220 break;
221 case RECVBUFF: /* fill local memory with the values received */
222 ptrstart = ptrblock + localindice(start + ib, j + jb,
223 templateheight1, templatewidth1, mb);
224 memcpy((char *) ptrstart, (char *) (*pptrbuff),
225 intervalsize * sizeof(complex));
226 /* ccopy_(&intervalsize, (char *) (*pptrbuff), &un, (char *) (ptrstart),
227 * &un); */
228 (*pptrbuff) += intervalsize;
229 break;
230 case SIZEBUFF: /* computation of sizebuff */
231 break;
232 default:
233 printf("action is %d outside the scope of the case [0..2] !! \n ", action);
234 exit(0);
235 break;
236 }; /* switch (action) */
237}/* intersect() */
238/* scan_intervals: scans two distributions in one dimension, and compute the
239 * intersections on the local processor. result must be long enough to
240 * contains the result that are stocked in IDESC structure, the function
241 * returns the number of intersections found */
242Int
243scan_intervals(type, ja, jb, n, ma, mb, q0, q1, col0, col1,
244 result)
245 char type;
246 Int ja, jb, n, q0, q1, col0, col1;
247 MDESC *ma, *mb;
248 IDESC *result;
249{
250 Int offset, j0, j1, templatewidth0, templatewidth1, nbcol0, nbcol1;
251 Int l; /* local indice on the beginning of the interval */
252 assert(type == 'c' || type == 'r');
253 nbcol0 = (type == 'c' ? ma->nbcol : ma->nbrow);
254 nbcol1 = (type == 'c' ? mb->nbcol : mb->nbrow);
255 templatewidth0 = q0 * nbcol0;
256 templatewidth1 = q1 * nbcol1;
257 {
258 Int sp0 = (type == 'c' ? ma->spcol : ma->sprow);
259 Int sp1 = (type == 'c' ? mb->spcol : mb->sprow);
260 j0 = SHIFT(col0, sp0, q0) * nbcol0 - ja;
261 j1 = SHIFT(col1, sp1, q1) * nbcol1 - jb;
262 }
263 offset = 0;
264 l = 0;
265 /* a small check to verify that the submatrix begin inside the first block
266 * of the original matrix, this done by a sort of coordinate change at the
267 * beginning of the Cpctrmr2d */
268 assert(j0 + nbcol0 > 0);
269 assert(j1 + nbcol1 > 0);
270 while ((j0 < n) && (j1 < n)) {
271 Int end0, end1;
272 Int start, end;
273 end0 = j0 + nbcol0;
274 end1 = j1 + nbcol1;
275 if (end0 <= j1) {
276 j0 += templatewidth0;
277 l += nbcol0;
278 continue;
279 }
280 if (end1 <= j0) {
281 j1 += templatewidth1;
282 continue;
283 }
284 /* compute the raw intersection */
285 start = max(j0, j1);
286 start = max(start, 0);
287 /* the start is correct now, update the corresponding fields */
288 result[offset].gstart = start;
289 end = min(end0, end1);
290 if (end0 == end) {
291 j0 += templatewidth0;
292 l += nbcol0;
293 }
294 if (end1 == end)
295 j1 += templatewidth1;
296 /* throw the limit if they go out of the matrix */
297 end = min(end, n);
298 assert(end > start);
299 /* it is a bit tricky to see why the length is always positive after all
300 * this min and max, first we have the property that every interval
301 * considered is at least partly into the submatrix, second we arrive
302 * here only if the raw intersection is non-void, if we remove a limit
303 * that means the corresponding frontier is in both intervals which
304 * proove the final interval is non-void, clear ?? */
305 result[offset].len = end - start;
306 offset += 1;
307 } /* while */
308 return offset;
309}
310/*********************************************************************/
311/* Do the scanning of intervals and the requested action */
312void
313scanD0(uplo, diag, action, ptrbuff, ptrsizebuff,
314 m, n,
315 ma, ia, ja, p0, q0,
316 mb, ib, jb, p1, q1,
317 v_inter, vinter_nb,
318 h_inter, hinter_nb,
319 ptrblock)
320 Int action, /* # of the action done on the intersected intervals */
321 *ptrsizebuff; /* size of the communication ptrbuffer (chosen to be
322 * an output parameter in every cases) */
323 complex *ptrbuff /* address of the communication ptrbuffer (a
324 * suffisant memory space is supposed to be allocated
325 before the call) */ , *ptrblock;
326 Int p0, q0, p1, q1;
327 IDESC *v_inter, *h_inter;
328 Int vinter_nb, hinter_nb;
329 Int m, n;
330 Int ia, ja, ib, jb;
331 MDESC *ma, *mb;
332 char *uplo, *diag;
333{/* Rmk: the a+au type addresses are strict bounds as a+au does not belong to
334 * the [a..a+au-1] interval of length au */
335 Int templateheight1, templatewidth1;
336 Int templateheight0, templatewidth0;
337 Int h, v; /* for scanning the intervals */
338 /* initializations */
339 templateheight1 = p1 * mb->nbrow;
340 templateheight0 = p0 * ma->nbrow;
341 templatewidth1 = q1 * mb->nbcol;
342 templatewidth0 = q0 * ma->nbcol;
343 /* we now will deal will logical grids, that's to say we change our
344 * numbering of processors so that (0,0) begin on logical processor (0,0) */
345 /* in case we will not enter the while loop */
346 (*ptrsizebuff) = 0;
347 for (h = 0; h < hinter_nb; h++)
348 for (v = 0; v < vinter_nb; v++) {
349 Int j;
350 for (j = 0; j < h_inter[h].len; j++)
351 intersect(uplo, diag, j + h_inter[h].gstart,
352 v_inter[v].gstart, v_inter[v].gstart + v_inter[v].len,
353 action, ptrsizebuff, &ptrbuff, ptrblock, m, n,
354 ma, ia, ja, templateheight0, templatewidth0,
355 mb, ib, jb, templateheight1, templatewidth1);
356 }
357}/* scanD0() */
#define Int
Definition Bconfig.h:22
Int memoryblocksize()
Int changeorigin()
#define freememory
Definition pctrmr2.c:88
void Ccgesd2d()
#define scan_intervals
Definition pctrmr2.c:89
#define SHIFT(row, sprow, nbrow)
Definition pctrmr2.c:44
void Cblacs_gridexit()
#define max(A, B)
Definition pctrmr2.c:45
#define min(A, B)
Definition pctrmr2.c:46
void Cigsum2d()
#define static2
Definition pctrmr2.c:8
#define scanD0
Definition pctrmr2.c:85
Int Cblacs_pnum()
void Cpctrmr2d()
static2 Int insidemat(char *uplo, char *diag, Int i, Int j, Int m, Int n, Int *offset)
Definition pctrmr2.c:141
#define SIZEBUFF
Definition pctrmr2.c:100
#define Clacpy
Definition pctrmr2.c:23
void Cigamx2d()
static2 void intersect(char *uplo, char *diag, Int j, Int start, Int end, Int action, Int *ptrsizebuff, complex **pptrbuff, complex *ptrblock, Int m, Int n, MDESC *ma, Int ia, Int ja, Int templateheight0, Int templatewidth0, MDESC *mb, Int ib, Int jb, Int templateheight1, Int templatewidth1)
Definition pctrmr2.c:176
void Cblacs_pinfo()
void Cpctrmr2do()
void Cblacs_pcoord()
void Cigamn2d()
Int localsize()
#define setmemory
Definition pctrmr2.c:87
void Cblacs_get()
void Ccgerv2d()
void Cigerv2d()
void Cigebs2d()
#define dispmat
Definition pctrmr2.c:86
#define SENDBUFF
Definition pctrmr2.c:98
void paramcheck()
void Cblacs_setup()
#define RECVBUFF
Definition pctrmr2.c:99
void Cblacs_gridinit()
void Cblacs_gridinfo()
void Csetpvmtids()
Int localindice()
void Cigesd2d()
Int ppcm()
void Cigebr2d()
#define clacpy_
Definition pctrmr2.c:21
void Cblacs_exit()
void * mr2d_malloc()
Int len
Definition pcgemr.c:177
Int gstart
Definition pctrmr.c:191
Int spcol
Definition pcgemr.c:171
Int nbcol
Definition pcgemr.c:169
Int sprow
Definition pcgemr.c:170
Int nbrow
Definition pcgemr.c:168