143 #define static2 static
144 #if defined(Add_) || defined(f77IsF2C)
145 #define fortran_mr2d pcgemr2do_
146 #define fortran_mr2dnew pcgemr2d_
147 #elif defined(UpCase)
148 #define fortran_mr2dnew PCGEMR2D
149 #define fortran_mr2d PCGEMR2DO
151 #define clacpy_ CLACPY
153 #define fortran_mr2d pcgemr2do
154 #define fortran_mr2dnew pcgemr2d
156 #define clacpy_ clacpy
158 #define Clacpy Ccgelacpy
174 #define BLOCK_CYCLIC_2D 1
179 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
180 #define max(A,B) ((A)>(B)?(A):(B))
181 #define min(A,B) ((A)>(B)?(B):(A))
182 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
183 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
185 #define malloc mymalloc
187 #define realloc myrealloc
220 #define scanD0 cgescanD0
221 #define dispmat cgedispmat
222 #define setmemory cgesetmemory
223 #define freememory cgefreememory
224 #define scan_intervals cgescan_intervals
249 int *ia, *ib, *ja, *jb, *m, *n;
254 B, *ib, *jb, (
MDESC *) desc_B);
259 B, ib, jb, desc_B, gcontext)
260 int *ia, *ib, *ja, *jb, *m, *n;
266 B, *ib, *jb, (
MDESC *) desc_B, *gcontext);
276 ptrmyblock, ia, ja, ma,
277 ptrmynewblock, ib, jb, mb)
278 complex *ptrmyblock, *ptrmynewblock;
282 int ia, ja, ib, jb, m, n;
292 ptrmynewblock, ib, jb, mb, gcontext);
297 #define MAGIC_MAX 100000000
300 ptrmyblock, ia, ja, ma,
301 ptrmynewblock, ib, jb, mb, globcontext)
302 complex *ptrmyblock, *ptrmynewblock;
306 int ia, ja, ib, jb, m, n, globcontext;
308 complex *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
311 int *proc0, *proc1, *param;
312 int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
314 int nprow, npcol, gcontext;
315 int recvsize, sendsize;
318 int hinter_nb, vinter_nb;
325 if (m == 0 || n == 0)
332 gcontext = globcontext;
333 nprocs = nprow * npcol;
343 if (myprow0 >= p0 || mypcol0 >= q0)
344 myprow0 = mypcol0 = -1;
345 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
347 if (myprow1 >= p1 || mypcol1 >= q1)
348 myprow1 = mypcol1 = -1;
349 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
353 ra = param + nprocs * 2 +
NBPARAM;
354 ca = param + (nprocs * 2 +
NBPARAM) * 2;
355 for (i = 0; i < nprocs * 2 +
NBPARAM; i++)
358 proc1 = param +
NBPARAM + nprocs;
362 proc0[myprow0 * q0 + mypcol0] = mypnum;
367 param[6] = ma->
nbrow;
368 param[7] = ma->
nbcol;
369 param[8] = ma->
sprow;
370 param[9] = ma->
spcol;
375 proc1[myprow1 * q1 + mypcol1] = mypnum;
380 param[14] = mb->
nbrow;
381 param[15] = mb->
nbcol;
382 param[16] = mb->
sprow;
383 param[17] = mb->
spcol;
388 ra, ca, 2 * nprocs +
NBPARAM, -1, -1);
398 ma->
nbrow = param[6];
399 ma->
nbcol = param[7];
400 ma->
sprow = param[8];
401 ma->
spcol = param[9];
410 mb->
nbrow = param[14];
411 mb->
nbcol = param[15];
412 mb->
sprow = param[16];
413 mb->
spcol = param[17];
417 for (i = 0; i <
NBPARAM; i++) {
419 fprintf(stderr,
"xxGEMR2D:something wrong in the parameters\n");
424 for (i = 0; i < p0 * q0; i++)
425 assert(proc0[i] >= 0 && proc0[i] < nprocs);
426 for (i = 0; i < p1 * q1; i++)
427 assert(proc1[i] >= 0 && proc1[i] < nprocs);
430 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
431 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
440 ptrmyblock += decal * ma->
lda;
445 ptrmynewblock += decal;
448 ptrmynewblock += decal * mb->
lda;
460 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
461 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
466 if (myprow0 >= 0 && mypcol0 >= 0) {
470 if (myprow1 >= 0 && mypcol1 >= 0) {
486 recvptr = ptrrecvbuff;
488 int tot, myrang, step, sens;
489 int *sender, *recver;
490 int mesending, merecving;
491 tot =
max(p0 * q0, p1 * q1);
492 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
493 &sender, &recver, &myrang);
496 mesending = myprow0 >= 0;
497 assert(sender[myrang] >= 0 || !mesending);
498 assert(!mesending || proc0[sender[myrang]] == mypnum);
499 merecving = myprow1 >= 0;
500 assert(recver[myrang] >= 0 || !merecving);
501 assert(!merecving || proc1[recver[myrang]] == mypnum);
502 step = tot - 1 - myrang;
504 for (sens = 0; sens < 2; sens++) {
507 if (mesending && recver[step] >= 0 &&
509 i = recver[step] / q1;
510 j = recver[step] % q1;
511 vinter_nb =
scan_intervals(
'r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
513 hinter_nb =
scan_intervals(
'c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
515 sendsize =
block2buff(v_inter, vinter_nb, h_inter, hinter_nb,
516 ptrmyblock, ma, ptrsendbuff);
518 if (mesending && recver[step] >= 0 &&
519 (sens == myrang > step)) {
520 i = recver[step] / q1;
521 j = recver[step] % q1;
523 && (step != myrang || !merecving)
525 Ccgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize,
526 0, proc1[i * q1 + j]);
529 if (merecving && sender[step] >= 0 &&
530 (sens == myrang <= step)) {
531 i = sender[step] / q0;
532 j = sender[step] % q0;
533 vinter_nb =
scan_intervals(
'r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
535 hinter_nb =
scan_intervals(
'c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
537 recvsize =
inter_len(hinter_nb, h_inter, vinter_nb, v_inter);
539 if (step == myrang && mesending) {
541 ptrsendbuff, recvsize,
542 ptrrecvbuff, recvsize);
544 Ccgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize,
545 0, proc0[i * q0 + j]);
549 if (merecving && sender[step] >= 0 && sens == 1) {
550 buff2block(v_inter, vinter_nb, h_inter, hinter_nb,
551 recvptr, ptrmynewblock, mb);
557 }
while (step != tot - 1 - myrang);
562 if (myprow1 >= 0 && mypcol1 >= 0) {
565 if (myprow0 >= 0 && mypcol0 >= 0) {
576 int nprocs, mypnum, n0, n1;
577 int *proc0, *proc1, **psend, **precv, *myrang;
580 int *sender, *recver, *g0, *g1;
582 sender = (
int *)
mr2d_malloc((nprocs + tot) *
sizeof(int) * 2);
583 recver = sender + tot;
588 for (i = 0; i < nprocs; i++) {
592 for (i = 0; i < tot; i++) {
596 for (i = 0; i < n0; i++)
598 for (i = 0; i < n1; i++)
603 for (i = 0; i < nprocs; i++)
604 if (g0[i] >= 0 && g1[i] >= 0) {
611 assert(ns <= n0 && nr <= n1 && nr == ns);
613 for (i = 0; i < nprocs; i++)
614 if (g0[i] >= 0 && g1[i] < 0) {
621 for (i = 0; i < nprocs; i++)
622 if (g1[i] >= 0 && g0[i] < 0) {
630 #define Mlacpy(mo,no,ao,ldao,bo,ldbo) \
633 int _m,_n,_lda,_ldb; \
635 _m = (mo);_n = (no); \
636 _a = (ao);_b = (bo); \
637 _lda = (ldao) - _m; \
638 _ldb = (ldbo) - _m; \
639 assert(_lda >= 0 && _ldb >= 0); \
640 for (_j=0;_j<_n;_j++) { \
641 for (_i=0;_i<_m;_i++) \
657 for (h = 0; h < hinb; h++) {
659 for (v = 0; v < vinb; v++) {
660 Mlacpy(vi[v].len, hi[h].len,
663 buff + sizebuff, vi[v].
len);
664 sizebuff += hi[h].
len * vi[v].
len;
679 for (h = 0; h < hinb; h++) {
681 for (v = 0; v < vinb; v++) {
682 Mlacpy(vi[v].len, hi[h].len,
683 buff + sizebuff, vi[v].len,
686 sizebuff += hi[h].
len * vi[v].
len;
695 int hlen, vlen, h, v;
697 for (h = 0; h < hinb; h++)
700 for (v = 0; v < vinb; v++)
712 assert(lda >= 0 && ldb >= 0);
713 for (j = 0; j < n; j++) {
714 for (i = 0; i < m; i++)
726 int nprow, npcol, myrow, mycol;
731 usermap =
mr2d_malloc(
sizeof(
int) * nprow * npcol);
732 for (i = 0; i < nprow; i++)
733 for (j = 0; j < npcol; j++) {