SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pctrmr.c
Go to the documentation of this file.
1#include "redist.h"
158#define static2 static
159#if defined(Add_) || defined(f77IsF2C)
160#define fortran_mr2d pctrmr2do_
161#define fortran_mr2dnew pctrmr2d_
162#elif defined(UpCase)
163#define fortran_mr2dnew PCTRMR2D
164#define fortran_mr2d PCTRMR2DO
165#define ccopy_ CCOPY
166#define clacpy_ CLACPY
167#else
168#define fortran_mr2d pctrmr2do
169#define fortran_mr2dnew pctrmr2d
170#define ccopy_ ccopy
171#define clacpy_ clacpy
172#endif
173#define Clacpy Cctrlacpy
174void Clacpy();
175typedef struct {
176 float r, i;
177} complex;
178typedef struct {
179 Int desctype;
180 Int ctxt;
181 Int m;
182 Int n;
183 Int nbrow;
184 Int nbcol;
185 Int sprow;
186 Int spcol;
187 Int lda;
188} MDESC;
189#define BLOCK_CYCLIC_2D 1
190typedef struct {
192 Int len;
193} IDESC;
194#define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
195#define max(A,B) ((A)>(B)?(A):(B))
196#define min(A,B) ((A)>(B)?(B):(A))
197#define DIVUP(a,b) ( ((a)-1) /(b)+1)
198#define ROUNDUP(a,b) (DIVUP(a,b)*(b))
199#ifdef MALLOCDEBUG
200#define malloc mymalloc
201#define free myfree
202#define realloc myrealloc
203#endif
204/* Cblacs */
205extern void Cblacs_pcoord();
207extern void Csetpvmtids();
208extern void Cblacs_get();
209extern void Cblacs_pinfo();
210extern void Cblacs_gridinfo();
211extern void Cblacs_gridinit();
212extern void Cblacs_exit();
213extern void Cblacs_gridexit();
214extern void Cblacs_setup();
215extern void Cigebs2d();
216extern void Cigebr2d();
217extern void Cigesd2d();
218extern void Cigerv2d();
219extern void Cigsum2d();
220extern void Cigamn2d();
221extern void Cigamx2d();
222extern void Ccgesd2d();
223extern void Ccgerv2d();
224/* lapack */
225void clacpy_();
226/* aux fonctions */
228extern void *mr2d_malloc();
229extern Int ppcm();
230extern Int localsize();
233extern void paramcheck();
234/* tools and others function */
235#define scanD0 ctrscanD0
236#define dispmat ctrdispmat
237#define setmemory ctrsetmemory
238#define freememory ctrfreememory
239#define scan_intervals ctrscan_intervals
240extern void scanD0();
241extern void dispmat();
242extern void setmemory();
243extern void freememory();
244extern Int scan_intervals();
245extern void Cpctrmr2do();
246extern void Cpctrmr2d();
247/* some defines for Cpctrmr2do */
248#define SENDBUFF 0
249#define RECVBUFF 1
250#define SIZEBUFF 2
251#if 0
252#define DEBUG
253#endif
254#ifndef DEBUG
255#define NDEBUG
256#endif
257#include <stdio.h>
258#include <stdlib.h>
259#include <assert.h>
260#define DESCLEN 9
261void
262fortran_mr2d(char *uplo, char *diag, Int *m, Int *n, complex *A, Int *ia, Int *ja, Int desc_A[DESCLEN],
263 complex *B, Int *ib, Int *jb, Int desc_B[DESCLEN])
264{
265 Cpctrmr2do(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A,
266 B, *ib, *jb, (MDESC *) desc_B);
267 return;
268}
269void
270fortran_mr2dnew(char *uplo, char *diag, Int *m, Int *n, complex *A, Int *ia, Int *ja, Int desc_A[DESCLEN],
271 complex *B, Int *ib, Int *jb, Int desc_B[DESCLEN], Int *gcontext)
272{
273 Cpctrmr2d(uplo, diag, *m, *n, A, *ia, *ja, (MDESC *) desc_A,
274 B, *ib, *jb, (MDESC *) desc_B, *gcontext);
275 return;
276}
282void
283Cpctrmr2do(uplo, diag, m, n,
284 ptrmyblock, ia, ja, ma,
285 ptrmynewblock, ib, jb, mb)
286 char *uplo, *diag;
287 complex *ptrmyblock, *ptrmynewblock;
288/* pointers to the memory location of the matrix and the redistributed matrix */
289 MDESC *ma;
290 MDESC *mb;
291 Int ia, ja, ib, jb, m, n;
292{
293 Int dummy, nprocs;
294 Int gcontext;
295 /* first we initialize a global grid which serve as a reference to
296 * communicate from grid a to grid b */
297 Cblacs_pinfo(&dummy, &nprocs);
298 Cblacs_get(0, 0, &gcontext);
299 Cblacs_gridinit(&gcontext, "R", (Int)1, nprocs);
300 Cpctrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma,
301 ptrmynewblock, ib, jb, mb, gcontext);
302 Cblacs_gridexit(gcontext);
303}
304#define NBPARAM 20 /* p0,q0,p1,q1, puis ma,na,mba,nba,rowa,cola puis
305 * idem B puis ia,ja puis ib,jb */
306#define MAGIC_MAX 100000000
307void
308Cpctrmr2d(uplo, diag, m, n,
309 ptrmyblock, ia, ja, ma,
310 ptrmynewblock, ib, jb, mb, globcontext)
311 char *uplo, *diag;
312 complex *ptrmyblock, *ptrmynewblock;
313/* pointers to the memory location of the matrix and the redistributed matrix */
314 MDESC *ma;
315 MDESC *mb;
316 Int ia, ja, ib, jb, m, n, globcontext;
317{
318 complex *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
319 complex *recvptr;
320 MDESC newa, newb;
321 Int *proc0, *proc1, *param;
322 Int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
323 Int i, j;
324 Int nprow, npcol, gcontext;
325 Int recvsize, sendsize;
326 IDESC *h_inter; /* to store the horizontal intersections */
327 IDESC *v_inter; /* to store the vertical intersections */
328 Int hinter_nb, vinter_nb; /* number of intrsections in both directions */
329 Int dummy;
330 Int p0, q0, p1, q1;
331 Int *ra, *ca;
332 /* end of variables */
333 /* To simplify further calcul we change the matrix indexation from
334 * 1..m,1..n (fortran) to 0..m-1,0..n-1 */
335 if (m == 0 || n == 0)
336 return;
337 ia -= 1;
338 ja -= 1;
339 ib -= 1;
340 jb -= 1;
341 Cblacs_gridinfo(globcontext, &nprow, &npcol, &dummy, &mypnum);
342 gcontext = globcontext;
343 nprocs = nprow * npcol;
344 /* if the global context that is given to us has not the shape of a line
345 * (nprow != 1), create a new context. TODO: to be optimal, we should
346 * avoid this because it is an uncessary synchronisation */
347 if (nprow != 1) {
348 gridreshape(&gcontext);
349 Cblacs_gridinfo(gcontext, &dummy, &dummy, &dummy, &mypnum);
350 }
351 Cblacs_gridinfo(ma->ctxt, &p0, &q0, &myprow0, &mypcol0);
352 /* compatibility T3D, must check myprow and mypcol are within bounds */
353 if (myprow0 >= p0 || mypcol0 >= q0)
354 myprow0 = mypcol0 = -1;
355 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
356 Cblacs_gridinfo(mb->ctxt, &p1, &q1, &myprow1, &mypcol1);
357 if (myprow1 >= p1 || mypcol1 >= q1)
358 myprow1 = mypcol1 = -1;
359 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
360 /* exchange the missing parameters among the processors: shape of grids and
361 * location of the processors */
362 param = (Int *) mr2d_malloc(3 * (nprocs * 2 + NBPARAM) * sizeof(Int));
363 ra = param + nprocs * 2 + NBPARAM;
364 ca = param + (nprocs * 2 + NBPARAM) * 2;
365 for (i = 0; i < nprocs * 2 + NBPARAM; i++)
366 param[i] = MAGIC_MAX;
367 proc0 = param + NBPARAM;
368 proc1 = param + NBPARAM + nprocs;
369 /* we calulate proc0 and proc1 that will give the number of a proc in
370 * respectively a or b in the global context */
371 if (myprow0 >= 0) {
372 proc0[myprow0 * q0 + mypcol0] = mypnum;
373 param[0] = p0;
374 param[1] = q0;
375 param[4] = ma->m;
376 param[5] = ma->n;
377 param[6] = ma->nbrow;
378 param[7] = ma->nbcol;
379 param[8] = ma->sprow;
380 param[9] = ma->spcol;
381 param[10] = ia;
382 param[11] = ja;
383 }
384 if (myprow1 >= 0) {
385 proc1[myprow1 * q1 + mypcol1] = mypnum;
386 param[2] = p1;
387 param[3] = q1;
388 param[12] = mb->m;
389 param[13] = mb->n;
390 param[14] = mb->nbrow;
391 param[15] = mb->nbcol;
392 param[16] = mb->sprow;
393 param[17] = mb->spcol;
394 param[18] = ib;
395 param[19] = jb;
396 }
397 Cigamn2d(gcontext, "All", "H", 2 * nprocs + NBPARAM, (Int)1, param, 2 * nprocs + NBPARAM,
398 ra, ca, 2 * nprocs + NBPARAM, (Int)-1, (Int)-1);
399 newa = *ma;
400 newb = *mb;
401 ma = &newa;
402 mb = &newb;
403 if (myprow0 == -1) {
404 p0 = param[0];
405 q0 = param[1];
406 ma->m = param[4];
407 ma->n = param[5];
408 ma->nbrow = param[6];
409 ma->nbcol = param[7];
410 ma->sprow = param[8];
411 ma->spcol = param[9];
412 ia = param[10];
413 ja = param[11];
414 }
415 if (myprow1 == -1) {
416 p1 = param[2];
417 q1 = param[3];
418 mb->m = param[12];
419 mb->n = param[13];
420 mb->nbrow = param[14];
421 mb->nbcol = param[15];
422 mb->sprow = param[16];
423 mb->spcol = param[17];
424 ib = param[18];
425 jb = param[19];
426 }
427 for (i = 0; i < NBPARAM; i++) {
428 if (param[i] == MAGIC_MAX) {
429 fprintf(stderr, "xxGEMR2D:something wrong in the parameters\n");
430 exit(1);
431 }
432 }
433#ifndef NDEBUG
434 for (i = 0; i < p0 * q0; i++)
435 assert(proc0[i] >= 0 && proc0[i] < nprocs);
436 for (i = 0; i < p1 * q1; i++)
437 assert(proc1[i] >= 0 && proc1[i] < nprocs);
438#endif
439 /* check the validity of the parameters */
440 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
441 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
442 /* we change the problem so that ia < a->nbrow ... andia + m = a->m ... */
443 {
444 Int decal;
445 ia = changeorigin(myprow0, ma->sprow, p0,
446 ma->nbrow, ia, &decal, &ma->sprow);
447 ptrmyblock += decal;
448 ja = changeorigin(mypcol0, ma->spcol, q0,
449 ma->nbcol, ja, &decal, &ma->spcol);
450 ptrmyblock += decal * ma->lda;
451 ma->m = ia + m;
452 ma->n = ja + n;
453 ib = changeorigin(myprow1, mb->sprow, p1,
454 mb->nbrow, ib, &decal, &mb->sprow);
455 ptrmynewblock += decal;
456 jb = changeorigin(mypcol1, mb->spcol, q1,
457 mb->nbcol, jb, &decal, &mb->spcol);
458 ptrmynewblock += decal * mb->lda;
459 mb->m = ib + m;
460 mb->n = jb + n;
461 if (p0 == 1)
462 ma->nbrow = ma->m;
463 if (q0 == 1)
464 ma->nbcol = ma->n;
465 if (p1 == 1)
466 mb->nbrow = mb->m;
467 if (q1 == 1)
468 mb->nbcol = mb->n;
469#ifndef NDEBUG
470 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
471 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
472#endif
473 }
474 /* We compute the size of the memory buffer ( we choose the worst case,
475 * when the buffer sizes == the memory block sizes). */
476 if (myprow0 >= 0 && mypcol0 >= 0) {
477 /* Initialize pointer variables */
478 setmemory(&ptrsendbuff, memoryblocksize(ma));
479 }; /* if (mypnum < p0 * q0) */
480 if (myprow1 >= 0 && mypcol1 >= 0) {
481 /* Initialize pointer variables */
482 setmemory(&ptrrecvbuff, memoryblocksize(mb));
483 }; /* if (mypnum < p1 * q1) */
484 /* allocing room for the tabs, alloc for the worst case,local_n or local_m
485 * intervals, in fact the worst case should be less, perhaps half that,I
486 * should think of that one day. */
487 h_inter = (IDESC *) mr2d_malloc(DIVUP(ma->n, q0 * ma->nbcol) *
488 ma->nbcol * sizeof(IDESC));
489 v_inter = (IDESC *) mr2d_malloc(DIVUP(ma->m, p0 * ma->nbrow)
490 * ma->nbrow * sizeof(IDESC));
491 /* We go for the scanning of indices. For each processor including mypnum,
492 * we fill the sendbuff buffer (scanD0(SENDBUFF)) and when it is done send
493 * it. Then for each processor, we compute the size of message to be
494 * receive scanD0(SIZEBUFF)), post a receive and then allocate the elements
495 * of recvbuff the right place (scanD)(RECVBUFF)) */
496 recvptr = ptrrecvbuff;
497 {
498 Int tot, myrang, step, sens;
499 Int *sender, *recver;
500 Int mesending, merecving;
501 tot = max(p0 * q0, p1 * q1);
502 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
503 &sender, &recver, &myrang);
504 if (myrang == -1)
505 goto after_comm;
506 mesending = myprow0 >= 0;
507 assert(sender[myrang] >= 0 || !mesending);
508 assert(!mesending || proc0[sender[myrang]] == mypnum);
509 merecving = myprow1 >= 0;
510 assert(recver[myrang] >= 0 || !merecving);
511 assert(!merecving || proc1[recver[myrang]] == mypnum);
512 step = tot - 1 - myrang;
513 do {
514 for (sens = 0; sens < 2; sens++) {
515 /* be careful here, when we communicating with ourselves, we must
516 * send first (myrang > step == 0) */
517 if (mesending && recver[step] >= 0 &&
518 (sens == 0)) {
519 i = recver[step] / q1;
520 j = recver[step] % q1;
521 vinter_nb = scan_intervals('r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
522 v_inter);
523 hinter_nb = scan_intervals('c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
524 h_inter);
525 scanD0(uplo, diag, SENDBUFF, ptrsendbuff, &sendsize,
526 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
527 v_inter, vinter_nb, h_inter, hinter_nb,
528 ptrmyblock);
529 } /* if (mesending...) { */
530 if (mesending && recver[step] >= 0 &&
531 (sens == myrang > step)) {
532 i = recver[step] / q1;
533 j = recver[step] % q1;
534 if (sendsize > 0
535 && (step != myrang || !merecving)
536 ) {
537 Ccgesd2d(gcontext, sendsize, (Int)1, ptrsendbuff, sendsize,
538 (Int)0, proc1[i * q1 + j]);
539 } /* sendsize > 0 */
540 } /* if (mesending ... */
541 if (merecving && sender[step] >= 0 &&
542 (sens == myrang <= step)) {
543 i = sender[step] / q0;
544 j = sender[step] % q0;
545 vinter_nb = scan_intervals('r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
546 v_inter);
547 hinter_nb = scan_intervals('c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
548 h_inter);
549 scanD0(uplo, diag, SIZEBUFF, ptrNULL, &recvsize,
550 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
551 v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL);
552 if (recvsize > 0) {
553 if (step == myrang && mesending) {
554 Clacpy(recvsize, 1,
555 ptrsendbuff, recvsize,
556 ptrrecvbuff, recvsize);
557 } else {
558 Ccgerv2d(gcontext, recvsize, (Int)1, ptrrecvbuff, recvsize,
559 (Int)0, proc0[i * q0 + j]);
560 }
561 } /* recvsize > 0 */
562 } /* if (merecving ...) */
563 if (merecving && sender[step] >= 0 && sens == 1) {
564 scanD0(uplo, diag, RECVBUFF, ptrrecvbuff, &recvsize,
565 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
566 v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock);
567 } /* if (merecving...) */
568 } /* for (sens = 0) */
569 step -= 1;
570 if (step < 0)
571 step = tot - 1;
572 } while (step != tot - 1 - myrang);
573after_comm:
574 free(sender);
575 } /* { int tot,nr,ns ...} */
576 /* don't forget to clean up things! */
577 if (myprow1 >= 0 && mypcol1 >= 0) {
578 freememory((char *) ptrrecvbuff);
579 };
580 if (myprow0 >= 0 && mypcol0 >= 0) {
581 freememory((char *) ptrsendbuff);
582 };
583 if (nprow != 1)
584 Cblacs_gridexit(gcontext);
585 free(v_inter);
586 free(h_inter);
587 free(param);
588}/* distrib */
590init_chenille(Int mypnum, Int nprocs, Int n0, Int *proc0, Int n1, Int *proc1, Int **psend, Int **precv, Int *myrang)
591{
592 Int ns, nr, i, tot;
593 Int *sender, *recver, *g0, *g1;
594 tot = max(n0, n1);
595 sender = (Int *) mr2d_malloc((nprocs + tot) * sizeof(Int) * 2);
596 recver = sender + tot;
597 *psend = sender;
598 *precv = recver;
599 g0 = recver + tot;
600 g1 = g0 + nprocs;
601 for (i = 0; i < nprocs; i++) {
602 g0[i] = -1;
603 g1[i] = -1;
604 }
605 for (i = 0; i < tot; i++) {
606 sender[i] = -1;
607 recver[i] = -1;
608 }
609 for (i = 0; i < n0; i++)
610 g0[proc0[i]] = i;
611 for (i = 0; i < n1; i++)
612 g1[proc1[i]] = i;
613 ns = 0;
614 nr = 0;
615 *myrang = -1;
616 for (i = 0; i < nprocs; i++)
617 if (g0[i] >= 0 && g1[i] >= 0) {
618 if (i == mypnum)
619 *myrang = nr;
620 sender[ns] = g0[i];
621 ns += 1;
622 recver[nr] = g1[i];
623 nr += 1;
624 assert(ns <= n0 && nr <= n1 && nr == ns);
625 }
626 for (i = 0; i < nprocs; i++)
627 if (g0[i] >= 0 && g1[i] < 0) {
628 if (i == mypnum)
629 *myrang = ns;
630 sender[ns] = g0[i];
631 ns += 1;
632 assert(ns <= n0);
633 }
634 for (i = 0; i < nprocs; i++)
635 if (g1[i] >= 0 && g0[i] < 0) {
636 if (i == mypnum)
637 *myrang = nr;
638 recver[nr] = g1[i];
639 nr += 1;
640 assert(nr <= n1);
641 }
642}
643void
644Clacpy(Int m, Int n, complex *a, Int lda, complex *b, Int ldb)
645{
646 Int i, j;
647 lda -= m;
648 ldb -= m;
649 assert(lda >= 0 && ldb >= 0);
650 for (j = 0; j < n; j++) {
651 for (i = 0; i < m; i++)
652 *b++ = *a++;
653 b += ldb;
654 a += lda;
655 }
656}
658gridreshape(Int *ctxtp)
659{
660 Int ori, final; /* original context, and new context created, with
661 * line form */
662 Int nprow, npcol, myrow, mycol;
663 Int *usermap;
664 Int i, j;
665 ori = *ctxtp;
666 Cblacs_gridinfo(ori, &nprow, &npcol, &myrow, &mycol);
667 usermap = mr2d_malloc(sizeof(Int) * nprow * npcol);
668 for (i = 0; i < nprow; i++)
669 for (j = 0; j < npcol; j++) {
670 usermap[i + j * nprow] = Cblacs_pnum(ori, i, j);
671 }
672 /* Cblacs_get(0, 0, &final); */
673 Cblacs_get(ori, (Int)10, &final);
674 Cblacs_gridmap(&final, usermap, (Int)1, (Int)1, nprow * npcol);
675 *ctxtp = final;
676 free(usermap);
677}
#define Int
Definition Bconfig.h:22
void Cblacs_gridmap()
static2 Int inter_len()
Int memoryblocksize()
Int changeorigin()
#define freememory
Definition pctrmr.c:238
void Ccgesd2d()
#define scan_intervals
Definition pctrmr.c:239
void Cblacs_gridexit()
#define max(A, B)
Definition pctrmr.c:195
void Cigsum2d()
#define static2
Definition pctrmr.c:158
#define scanD0
Definition pctrmr.c:235
Int Cblacs_pnum()
void Cpctrmr2d()
static2 void gridreshape()
#define DIVUP(a, b)
Definition pctrmr.c:197
#define SIZEBUFF
Definition pctrmr.c:250
#define fortran_mr2dnew
Definition pctrmr.c:169
#define Clacpy
Definition pctrmr.c:173
void Cigamx2d()
void Cblacs_pinfo()
void Cpctrmr2do()
#define NBPARAM
Definition pctrmr.c:304
void Cblacs_pcoord()
void Cigamn2d()
Int localsize()
#define setmemory
Definition pctrmr.c:237
void Cblacs_get()
void Ccgerv2d()
void Cigerv2d()
void Cigebs2d()
#define dispmat
Definition pctrmr.c:236
static2 void buff2block()
#define SENDBUFF
Definition pctrmr.c:248
void paramcheck()
void Cblacs_setup()
#define RECVBUFF
Definition pctrmr.c:249
static2 Int block2buff()
void Cblacs_gridinit()
void Cblacs_gridinfo()
#define MAGIC_MAX
Definition pctrmr.c:305
void Csetpvmtids()
Int localindice()
void Cigesd2d()
#define DESCLEN
Definition pctrmr.c:260
Int ppcm()
void Cigebr2d()
#define clacpy_
Definition pctrmr.c:171
void Cblacs_exit()
#define fortran_mr2d
Definition pctrmr.c:168
void * mr2d_malloc()
static2 void init_chenille()
Int gstart
Definition pctrmr.c:191
Int m
Definition pcgemr.c:166
Int spcol
Definition pcgemr.c:171
Int nbcol
Definition pcgemr.c:169
Int sprow
Definition pcgemr.c:170
Int nbrow
Definition pcgemr.c:168
Int ctxt
Definition pcgemr.c:165
Int n
Definition pcgemr.c:167
Int lda
Definition pcgemr.c:172