158 #define static2 static
159 #if defined(Add_) || defined(f77IsF2C)
160 #define fortran_mr2d pstrmr2do_
161 #define fortran_mr2dnew pstrmr2d_
162 #elif defined(UpCase)
163 #define fortran_mr2dnew PSTRMR2D
164 #define fortran_mr2d PSTRMR2DO
166 #define slacpy_ SLACPY
168 #define fortran_mr2d pstrmr2do
169 #define fortran_mr2dnew pstrmr2d
171 #define slacpy_ slacpy
173 #define Clacpy Cstrlacpy
186 #define BLOCK_CYCLIC_2D 1
191 #define SHIFT(row,sprow,nbrow) ((row)-(sprow)+ ((row) >= (sprow) ? 0 : (nbrow)))
192 #define max(A,B) ((A)>(B)?(A):(B))
193 #define min(A,B) ((A)>(B)?(B):(A))
194 #define DIVUP(a,b) ( ((a)-1) /(b)+1)
195 #define ROUNDUP(a,b) (DIVUP(a,b)*(b))
197 #define malloc mymalloc
199 #define realloc myrealloc
232 #define scanD0 strscanD0
233 #define dispmat strdispmat
234 #define setmemory strsetmemory
235 #define freememory strfreememory
236 #define scan_intervals strscan_intervals
262 int *ia, *ib, *ja, *jb, *m, *n;
267 B, *ib, *jb, (
MDESC *) desc_B);
272 B, ib, jb, desc_B, gcontext)
274 int *ia, *ib, *ja, *jb, *m, *n;
280 B, *ib, *jb, (
MDESC *) desc_B, *gcontext);
290 ptrmyblock, ia, ja, ma,
291 ptrmynewblock, ib, jb, mb)
293 float *ptrmyblock, *ptrmynewblock;
297 int ia, ja, ib, jb, m, n;
306 Cpstrmr2d(uplo, diag, m, n, ptrmyblock, ia, ja, ma,
307 ptrmynewblock, ib, jb, mb, gcontext);
312 #define MAGIC_MAX 100000000
315 ptrmyblock, ia, ja, ma,
316 ptrmynewblock, ib, jb, mb, globcontext)
318 float *ptrmyblock, *ptrmynewblock;
322 int ia, ja, ib, jb, m, n, globcontext;
324 float *ptrsendbuff, *ptrrecvbuff, *ptrNULL = 0;
327 int *proc0, *proc1, *param;
328 int mypnum, myprow0, mypcol0, myprow1, mypcol1, nprocs;
330 int nprow, npcol, gcontext;
331 int recvsize, sendsize;
334 int hinter_nb, vinter_nb;
341 if (m == 0 || n == 0)
348 gcontext = globcontext;
349 nprocs = nprow * npcol;
359 if (myprow0 >= p0 || mypcol0 >= q0)
360 myprow0 = mypcol0 = -1;
361 assert((myprow0 < p0 && mypcol0 < q0) || (myprow0 == -1 && mypcol0 == -1));
363 if (myprow1 >= p1 || mypcol1 >= q1)
364 myprow1 = mypcol1 = -1;
365 assert((myprow1 < p1 && mypcol1 < q1) || (myprow1 == -1 && mypcol1 == -1));
369 ra = param + nprocs * 2 +
NBPARAM;
370 ca = param + (nprocs * 2 +
NBPARAM) * 2;
371 for (i = 0; i < nprocs * 2 +
NBPARAM; i++)
374 proc1 = param +
NBPARAM + nprocs;
378 proc0[myprow0 * q0 + mypcol0] = mypnum;
383 param[6] = ma->
nbrow;
384 param[7] = ma->
nbcol;
385 param[8] = ma->
sprow;
386 param[9] = ma->
spcol;
391 proc1[myprow1 * q1 + mypcol1] = mypnum;
396 param[14] = mb->
nbrow;
397 param[15] = mb->
nbcol;
398 param[16] = mb->
sprow;
399 param[17] = mb->
spcol;
404 ra, ca, 2 * nprocs +
NBPARAM, -1, -1);
414 ma->
nbrow = param[6];
415 ma->
nbcol = param[7];
416 ma->
sprow = param[8];
417 ma->
spcol = param[9];
426 mb->
nbrow = param[14];
427 mb->
nbcol = param[15];
428 mb->
sprow = param[16];
429 mb->
spcol = param[17];
433 for (i = 0; i <
NBPARAM; i++) {
435 fprintf(stderr,
"xxGEMR2D:something wrong in the parameters\n");
440 for (i = 0; i < p0 * q0; i++)
441 assert(proc0[i] >= 0 && proc0[i] < nprocs);
442 for (i = 0; i < p1 * q1; i++)
443 assert(proc1[i] >= 0 && proc1[i] < nprocs);
446 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
447 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
456 ptrmyblock += decal * ma->
lda;
461 ptrmynewblock += decal;
464 ptrmynewblock += decal * mb->
lda;
476 paramcheck(ma, ia, ja, m, n, p0, q0, gcontext);
477 paramcheck(mb, ib, jb, m, n, p1, q1, gcontext);
482 if (myprow0 >= 0 && mypcol0 >= 0) {
486 if (myprow1 >= 0 && mypcol1 >= 0) {
502 recvptr = ptrrecvbuff;
504 int tot, myrang, step, sens;
505 int *sender, *recver;
506 int mesending, merecving;
507 tot =
max(p0 * q0, p1 * q1);
508 init_chenille(mypnum, nprocs, p0 * q0, proc0, p1 * q1, proc1,
509 &sender, &recver, &myrang);
512 mesending = myprow0 >= 0;
513 assert(sender[myrang] >= 0 || !mesending);
514 assert(!mesending || proc0[sender[myrang]] == mypnum);
515 merecving = myprow1 >= 0;
516 assert(recver[myrang] >= 0 || !merecving);
517 assert(!merecving || proc1[recver[myrang]] == mypnum);
518 step = tot - 1 - myrang;
520 for (sens = 0; sens < 2; sens++) {
523 if (mesending && recver[step] >= 0 &&
525 i = recver[step] / q1;
526 j = recver[step] % q1;
527 vinter_nb =
scan_intervals(
'r', ia, ib, m, ma, mb, p0, p1, myprow0, i,
529 hinter_nb =
scan_intervals(
'c', ja, jb, n, ma, mb, q0, q1, mypcol0, j,
532 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
533 v_inter, vinter_nb, h_inter, hinter_nb,
536 if (mesending && recver[step] >= 0 &&
537 (sens == myrang > step)) {
538 i = recver[step] / q1;
539 j = recver[step] % q1;
541 && (step != myrang || !merecving)
543 Csgesd2d(gcontext, sendsize, 1, ptrsendbuff, sendsize,
544 0, proc1[i * q1 + j]);
547 if (merecving && sender[step] >= 0 &&
548 (sens == myrang <= step)) {
549 i = sender[step] / q0;
550 j = sender[step] % q0;
551 vinter_nb =
scan_intervals(
'r', ib, ia, m, mb, ma, p1, p0, myprow1, i,
553 hinter_nb =
scan_intervals(
'c', jb, ja, n, mb, ma, q1, q0, mypcol1, j,
556 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
557 v_inter, vinter_nb, h_inter, hinter_nb, ptrNULL);
559 if (step == myrang && mesending) {
561 ptrsendbuff, recvsize,
562 ptrrecvbuff, recvsize);
564 Csgerv2d(gcontext, recvsize, 1, ptrrecvbuff, recvsize,
565 0, proc0[i * q0 + j]);
569 if (merecving && sender[step] >= 0 && sens == 1) {
571 m, n, ma, ia, ja, p0, q0, mb, ib, jb, p1, q1,
572 v_inter, vinter_nb, h_inter, hinter_nb, ptrmynewblock);
578 }
while (step != tot - 1 - myrang);
583 if (myprow1 >= 0 && mypcol1 >= 0) {
586 if (myprow0 >= 0 && mypcol0 >= 0) {
597 int nprocs, mypnum, n0, n1;
598 int *proc0, *proc1, **psend, **precv, *myrang;
601 int *sender, *recver, *g0, *g1;
603 sender = (
int *)
mr2d_malloc((nprocs + tot) *
sizeof(int) * 2);
604 recver = sender + tot;
609 for (i = 0; i < nprocs; i++) {
613 for (i = 0; i < tot; i++) {
617 for (i = 0; i < n0; i++)
619 for (i = 0; i < n1; i++)
624 for (i = 0; i < nprocs; i++)
625 if (g0[i] >= 0 && g1[i] >= 0) {
632 assert(ns <= n0 && nr <= n1 && nr == ns);
634 for (i = 0; i < nprocs; i++)
635 if (g0[i] >= 0 && g1[i] < 0) {
642 for (i = 0; i < nprocs; i++)
643 if (g1[i] >= 0 && g0[i] < 0) {
659 assert(lda >= 0 && ldb >= 0);
660 for (j = 0; j < n; j++) {
661 for (i = 0; i < m; i++)
673 int nprow, npcol, myrow, mycol;
678 usermap =
mr2d_malloc(
sizeof(
int) * nprow * npcol);
679 for (i = 0; i < nprow; i++)
680 for (j = 0; j < npcol; j++) {