158 #define static2 static
159 #if defined(Add_) || defined(f77IsF2C)
160 #define fortran_mr2d pztrmr2do_
161 #define fortran_mr2dnew pztrmr2d_
162 #elif defined(UpCase)
163 #define fortran_mr2dnew PZTRMR2D
164 #define fortran_mr2d PZTRMR2DO
166 #define zlacpy_ ZLACPY
168 #define fortran_mr2d pztrmr2do
169 #define fortran_mr2dnew pztrmr2d
171 #define zlacpy_ zlacpy
173 #define Clacpy Cztrlacpy
189 #define BLOCK_CYCLIC_2D 1
194 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
195 #define max(A,B) ((A)>(B)?(A):(B))
196 #define min(A,B) ((A)>(B)?(B):(A))
197 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
198 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
200 #define malloc mymalloc
202 #define realloc myrealloc
235 #define scanD0 ztrscanD0
236 #define dispmat ztrdispmat
237 #define setmemory ztrsetmemory
238 #define freememory ztrfreememory
239 #define scan_intervals ztrscan_intervals
265 int *ia, *ib, *ja, *jb, *m, *n;
270 B, *ib, *jb, (
MDESC *) desc_B);
275 B, ib, jb, desc_B, gcontext)
277 int *ia, *ib, *ja, *jb, *m, *n;
283 B, *ib, *jb, (
MDESC *) desc_B, *gcontext);
293 ptrmyblock, ia, ja, ma,
294 ptrmynewblock, ib, jb, mb)
296 dcomplex *ptrmyblock, *ptrmynewblock;
300 int ia, ja, ib, jb, m, n;
309 Cpztrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma,
310 ptrmynewblock, ib, jb, mb, gcontext);
315 #define MAGIC_MAX 100000000
318 ptrmyblock, ia, ja, ma,
319 ptrmynewblock, ib, jb, mb, globcontext)
321 dcomplex *ptrmyblock, *ptrmynewblock;
325 int ia, ja, ib, jb, m, n, globcontext;
327 dcomplex *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
330 int *proc0, *proc1, *param;
331 int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
333 int nprow, npcol, gcontext;
334 int recvsize, sendsize;
337 int hinter_nb, vinter_nb;
344 if (m == 0 || n == 0)
351 gcontext = globcontext;
352 nprocs = nprow * npcol;
362 if (myprow0 >= p0 || mypcol0 >= q0)
363 myprow0 = mypcol0 = -1;
364 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
366 if (myprow1 >= p1 || mypcol1 >= q1)
367 myprow1 = mypcol1 = -1;
368 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
372 ra = param + nprocs * 2 +
NBPARAM;
373 ca = param + (nprocs * 2 +
NBPARAM) * 2;
374 for (i = 0; i < nprocs * 2 +
NBPARAM; i++)
377 proc1 = param +
NBPARAM + nprocs;
381 proc0[myprow0 * q0 + mypcol0] = mypnum;
386 param[6] = ma->
nbrow;
387 param[7] = ma->
nbcol;
388 param[8] = ma->
sprow;
389 param[9] = ma->
spcol;
394 proc1[myprow1 * q1 + mypcol1] = mypnum;
399 param[14] = mb->
nbrow;
400 param[15] = mb->
nbcol;
401 param[16] = mb->
sprow;
402 param[17] = mb->
spcol;
407 ra, ca, 2 * nprocs +
NBPARAM, -1, -1);
417 ma->
nbrow = param[6];
418 ma->
nbcol = param[7];
419 ma->
sprow = param[8];
420 ma->
spcol = param[9];
429 mb->
nbrow = param[14];
430 mb->
nbcol = param[15];
431 mb->
sprow = param[16];
432 mb->
spcol = param[17];
436 for (i = 0; i <
NBPARAM; i++) {
438 fprintf(stderr,
"xxGEMR2D:something wrong in the parameters\n");
443 for (i = 0; i < p0 * q0; i++)
444 assert(proc0[i] >= 0 && proc0[i] < nprocs);
445 for (i = 0; i < p1 * q1; i++)
446 assert(proc1[i] >= 0 && proc1[i] < nprocs);
449 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
450 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
459 ptrmyblock += decal * ma->
lda;
464 ptrmynewblock += decal;
467 ptrmynewblock += decal * mb->
lda;
479 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
480 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
485 if (myprow0 >= 0 && mypcol0 >= 0) {
489 if (myprow1 >= 0 && mypcol1 >= 0) {
505 recvptr = ptrrecvbuff;
507 int tot, myrang, step, sens;
508 int *sender, *recver;
509 int mesending, merecving;
510 tot =
max(p0 * q0, p1 * q1);
511 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
512 &sender, &recver, &myrang);
515 mesending = myprow0 >= 0;
516 assert(sender[myrang] >= 0 || !mesending);
517 assert(!mesending || proc0[sender[myrang]] == mypnum);
518 merecving = myprow1 >= 0;
519 assert(recver[myrang] >= 0 || !merecving);
520 assert(!merecving || proc1[recver[myrang]] == mypnum);
521 step = tot - 1 - myrang;
523 for (sens = 0; sens < 2; sens++) {
526 if (mesending && recver[step] >= 0 &&
528 i = recver[step] / q1;
529 j = recver[step] % q1;
530 vinter_nb =
scan_intervals(
'r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
532 hinter_nb =
scan_intervals(
'c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
535 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
536 v_inter, vinter_nb, h_inter, hinter_nb,
539 if (mesending && recver[step] >= 0 &&
540 (sens == myrang > step)) {
541 i = recver[step] / q1;
542 j = recver[step] % q1;
544 && (step != myrang || !merecving)
546 Czgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize,
547 0, proc1[i * q1 + j]);
550 if (merecving && sender[step] >= 0 &&
551 (sens == myrang <= step)) {
552 i = sender[step] / q0;
553 j = sender[step] % q0;
554 vinter_nb =
scan_intervals(
'r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
556 hinter_nb =
scan_intervals(
'c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
559 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
560 v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL);
562 if (step == myrang && mesending) {
564 ptrsendbuff, recvsize,
565 ptrrecvbuff, recvsize);
567 Czgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize,
568 0, proc0[i * q0 + j]);
572 if (merecving && sender[step] >= 0 && sens == 1) {
574 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
575 v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock);
581 }
while (step != tot - 1 - myrang);
586 if (myprow1 >= 0 && mypcol1 >= 0) {
589 if (myprow0 >= 0 && mypcol0 >= 0) {
600 int nprocs, mypnum, n0, n1;
601 int *proc0, *proc1, **psend, **precv, *myrang;
604 int *sender, *recver, *g0, *g1;
606 sender = (
int *)
mr2d_malloc((nprocs + tot) *
sizeof(int) * 2);
607 recver = sender + tot;
612 for (i = 0; i < nprocs; i++) {
616 for (i = 0; i < tot; i++) {
620 for (i = 0; i < n0; i++)
622 for (i = 0; i < n1; i++)
627 for (i = 0; i < nprocs; i++)
628 if (g0[i] >= 0 && g1[i] >= 0) {
635 assert(ns <= n0 && nr <= n1 && nr == ns);
637 for (i = 0; i < nprocs; i++)
638 if (g0[i] >= 0 && g1[i] < 0) {
645 for (i = 0; i < nprocs; i++)
646 if (g1[i] >= 0 && g0[i] < 0) {
662 assert(lda >= 0 && ldb >= 0);
663 for (j = 0; j < n; j++) {
664 for (i = 0; i < m; i++)
676 int nprow, npcol, myrow, mycol;
681 usermap =
mr2d_malloc(
sizeof(
int) * nprow * npcol);
682 for (i = 0; i < nprow; i++)
683 for (j = 0; j < npcol; j++) {