de/d61/pclacp3_8f_source.html

      SUBROUTINE pclacp3( M, I, A, DESCA, B, LDB, II, JJ, REV )

*

*  -- ScaLAPACK routine (version 1.7) --

*     University of Tennessee, Knoxville, Oak Ridge National Laboratory,

*     and University of California, Berkeley.

*     July 31, 2001

*

*     .. Scalar Arguments ..

      INTEGER            I, II, JJ, LDB, M, REV

*     ..

*     .. Array Arguments ..

      INTEGER            DESCA( * )

      COMPLEX         A( * ), B( LDB, * )

*     ..

*

*  Purpose

*  =======

*

*  PCLACP3 is an auxiliary routine that copies from a global parallel

*    array into a local replicated array or vise versa.  Notice that

*    the entire submatrix that is copied gets placed on one node or

*    more.  The receiving node can be specified precisely, or all nodes

*    can receive, or just one row or column of nodes.

*

*  Notes

*  =====

*

*  Each global data object is described by an associated description

*  vector.  This vector stores the information required to establish

*  the mapping between an object element and its corresponding process

*  and memory location.

*

*  Let A be a generic term for any 2D block cyclicly distributed array.

*  Such a global array has an associated description vector DESCA.

*  In the following comments, the character _ should be read as

*  "of the global array".

*

*  NOTATION        STORED IN      EXPLANATION

*  --------------- -------------- --------------------------------------

*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case,

*                                 DTYPE_A = 1.

*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating

*                                 the BLACS process grid A is distribu-

*                                 ted over. The context itself is glo-

*                                 bal, but the handle (the integer

*                                 value) may vary.

*  M_A    (global) DESCA( M_ )    The number of rows in the global

*                                 array A.

*  N_A    (global) DESCA( N_ )    The number of columns in the global

*                                 array A.

*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute

*                                 the rows of the array.

*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute

*                                 the columns of the array.

*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first

*                                 row of the array A is distributed.

*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the

*                                 first column of the array A is

*                                 distributed.

*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local

*                                 array.  LLD_A >= MAX(1,LOCr(M_A)).

*

*  Let K be the number of rows or columns of a distributed matrix,

*  and assume that its process grid has dimension p x q.

*  LOCr( K ) denotes the number of elements of K that a process

*  would receive if K were distributed over the p processes of its

*  process column.

*  Similarly, LOCc( K ) denotes the number of elements of K that a

*  process would receive if K were distributed over the q processes of

*  its process row.

*  The values of LOCr() and LOCc() may be determined via a call to the

*  ScaLAPACK tool function, NUMROC:

*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),

*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).

*  An upper bound for these quantities may be computed by:

*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A

*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A

*

*  Arguments

*  =========

*

*  M       (global input) INTEGER

*          M is the order of the square submatrix that is copied.

*          M >= 0.

*          Unchanged on exit

*

*  I       (global input) INTEGER

*          A(I,I) is the global location that the copying starts from.

*          Unchanged on exit.

*

*  A       (global input/output) COMPLEX array, dimension

*          (DESCA(LLD_),*)

*          On entry, the parallel matrix to be copied into or from.

*          On exit, if REV=1, the copied data.

*          Unchanged on exit if REV=0.

*

*  DESCA   (global and local input) INTEGER array of dimension DLEN_.

*          The array descriptor for the distributed matrix A.

*

*  B       (local input/output) COMPLEX array of size (LDB,M)

*          If REV=0, this is the global portion of the array

*             A(I:I+M-1,I:I+M-1).

*          If REV=1, this is the unchanged on exit.

*

*  LDB     (local input) INTEGER

*          The leading dimension of B.

*

*  II      (global input) INTEGER

*          By using REV 0 & 1, data can be sent out and returned again.

*          If REV=0, then II is destination row index for the node(s)

*             receiving the replicated B.

*             If II>=0,JJ>=0, then node (II,JJ) receives the data

*             If II=-1,JJ>=0, then all rows in column JJ receive the

*                             data

*             If II>=0,JJ=-1, then all cols in row II receive the data

*             If II=-1,JJ=-1, then all nodes receive the data

*          If REV<>0, then II is the source row index for the node(s)

*             sending the replicated B.

*

*  JJ      (global input) INTEGER

*          Similar description as II above

*

*  REV     (global input) INTEGER

*          Use REV = 0 to send global A into locally replicated B

*             (on node (II,JJ)).

*          Use REV <> 0 to send locally replicated B from node (II,JJ)

*             to its owner (which changes depending on its location in

*             A) into the global A.

*

*  Further Details

*  ===============

*

*  Implemented by:  M. Fahey, May 28, 1999

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,

     $                   LLD_, MB_, M_, NB_, N_, RSRC_

      parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,

     $                     ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,

     $                     rsrc_ = 7, csrc_ = 8, lld_ = 9 )

      COMPLEX            ZERO

      parameter( zero = ( 0.0e+0, 0.0e+0 ) )

*     ..

*     .. Local Scalars ..

      INTEGER            COL, CONTXT, HBL, ICOL1, ICOL2, IDI, IDJ, IFIN,

     $                   III, IROW1, IROW2, ISTOP, ISTOPI, ISTOPJ, ITMP,

     $                   JJJ, LDA, MYCOL, MYROW, NPCOL, NPROW, ROW

*     ..

*     .. External Functions ..

      INTEGER            NUMROC

      EXTERNAL           numroc

*     ..

*     .. External Subroutines ..

      EXTERNAL           blacs_gridinfo, infog1l, cgebr2d, cgebs2d,

     $                   cgerv2d, cgesd2d

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          min, mod

*     ..

*     .. Executable Statements ..

*

      IF( m.LE.0 )

     $   RETURN

*

      hbl = desca( mb_ )

      contxt = desca( ctxt_ )

      lda = desca( lld_ )

*

      CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )

*

      IF( rev.EQ.0 ) THEN

         DO 20 idi = 1, m

            DO 10 idj = 1, m

               b( idi, idj ) = zero

   10       CONTINUE

   20    CONTINUE

      END IF

*

      ifin = i + m - 1

*

      IF( mod( i+hbl, hbl ).NE.0 ) THEN

         istop = min( i+hbl-mod( i+hbl, hbl ), ifin )

      ELSE

         istop = i

      END IF

      idj = i

      istopj = istop

      IF( idj.LE.ifin ) THEN

   30    CONTINUE

         idi = i

         istopi = istop

         IF( idi.LE.ifin ) THEN

   40       CONTINUE

            row = mod( ( idi-1 ) / hbl, nprow )

            col = mod( ( idj-1 ) / hbl, npcol )

            CALL infog1l( idi, hbl, nprow, row, 0, irow1, itmp )

            irow2 = numroc( istopi, hbl, row, 0, nprow )

            CALL infog1l( idj, hbl, npcol, col, 0, icol1, itmp )

            icol2 = numroc( istopj, hbl, col, 0, npcol )

            IF( ( myrow.EQ.row ) .AND. ( mycol.EQ.col ) ) THEN

               IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.-1 ) ) THEN

*

*                 Send the message to everyone

*

                  IF( rev.EQ.0 ) THEN

                     CALL cgebs2d( contxt, 'All', ' ', irow2-irow1+1,

     $                             icol2-icol1+1, a( ( icol1-1 )*lda+

     $                             irow1 ), lda )

                  END IF

               END IF

               IF( ( ii.EQ.-1 ) .AND. ( jj.NE.-1 ) ) THEN

*

*                 Send the message to Column MYCOL which better be JJ

*

                  IF( rev.EQ.0 ) THEN

                     CALL cgebs2d( contxt, 'Col', ' ', irow2-irow1+1,

     $                             icol2-icol1+1, a( ( icol1-1 )*lda+

     $                             irow1 ), lda )

                  END IF

               END IF

               IF( ( ii.NE.-1 ) .AND. ( jj.EQ.-1 ) ) THEN

*

*                 Send the message to Row MYROW which better be II

*

                  IF( rev.EQ.0 ) THEN

                     CALL cgebs2d( contxt, 'Row', ' ', irow2-irow1+1,

     $                             icol2-icol1+1, a( ( icol1-1 )*lda+

     $                             irow1 ), lda )

                  END IF

               END IF

               IF( ( ii.NE.-1 ) .AND. ( jj.NE.-1 ) .AND.

     $             ( ( myrow.NE.ii ) .OR. ( mycol.NE.jj ) ) ) THEN

*

*                 Recv/Send the message to (II,JJ)

*

                  IF( rev.EQ.0 ) THEN

                     CALL cgesd2d( contxt, irow2-irow1+1, icol2-icol1+1,

     $                             a( ( icol1-1 )*lda+irow1 ), lda, ii,

     $                             jj )

                  ELSE

                     CALL cgerv2d( contxt, irow2-irow1+1, icol2-icol1+1,

     $                             b( idi-i+1, idj-i+1 ), ldb, ii, jj )

                  END IF

               END IF

               IF( rev.EQ.0 ) THEN

                  DO 60 jjj = icol1, icol2

                     DO 50 iii = irow1, irow2

                        b( idi+iii-irow1+1-i, idj+jjj-icol1+1-i )

     $                     = a( ( jjj-1 )*lda+iii )

   50                CONTINUE

   60             CONTINUE

               ELSE

                  DO 80 jjj = icol1, icol2

                     DO 70 iii = irow1, irow2

                        a( ( jjj-1 )*lda+iii ) = b( idi+iii-irow1+1-i,

     $                     idj+jjj-icol1+1-i )

   70                CONTINUE

   80             CONTINUE

               END IF

            ELSE

               IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.-1 ) ) THEN

                  IF( rev.EQ.0 ) THEN

                     CALL cgebr2d( contxt, 'All', ' ', irow2-irow1+1,

     $                             icol2-icol1+1, b( idi-i+1, idj-i+1 ),

     $                             ldb, row, col )

                  END IF

               END IF

               IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.mycol ) ) THEN

                  IF( rev.EQ.0 ) THEN

                     CALL cgebr2d( contxt, 'Col', ' ', irow2-irow1+1,

     $                             icol2-icol1+1, b( idi-i+1, idj-i+1 ),

     $                             ldb, row, col )

                  END IF

               END IF

               IF( ( ii.EQ.myrow ) .AND. ( jj.EQ.-1 ) ) THEN

                  IF( rev.EQ.0 ) THEN

                     CALL cgebr2d( contxt, 'Row', ' ', irow2-irow1+1,

     $                             icol2-icol1+1, b( idi-i+1, idj-i+1 ),

     $                             ldb, row, col )

                  END IF

               END IF

               IF( ( ii.EQ.myrow ) .AND. ( jj.EQ.mycol ) ) THEN

                  IF( rev.EQ.0 ) THEN

                     CALL cgerv2d( contxt, irow2-irow1+1, icol2-icol1+1,

     $                             b( idi-i+1, idj-i+1 ), ldb, row,

     $                             col )

                  ELSE

                     CALL cgesd2d( contxt, irow2-irow1+1, icol2-icol1+1,

     $                             b( idi-i+1, idj-i+1 ), ldb, row,

     $                             col )

*                    CALL CGESD2D(CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1,

*    $                            A((ICOL1-1)*LDA+IROW1),LDA, ROW, COL)

                  END IF

               END IF

            END IF

            idi = istopi + 1

            istopi = min( istopi+hbl, ifin )

            IF( idi.LE.ifin )

     $         GO TO 40

         END IF

         idj = istopj + 1

         istopj = min( istopj+hbl, ifin )

         IF( idj.LE.ifin )

     $      GO TO 30

      END IF

      RETURN

*

*     End of PCLACP3

*


      END

infog1l
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3

min
#define min(A, B)
Definition pcgemr.c:181

pclacp3
subroutine pclacp3(m, i, a, desca, b, ldb, ii, jj, rev)
Definition pclacp3.f:2