ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclacp3.f
Go to the documentation of this file.
1  SUBROUTINE pclacp3( M, I, A, DESCA, B, LDB, II, JJ, REV )
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * July 31, 2001
7 *
8 * .. Scalar Arguments ..
9  INTEGER I, II, JJ, LDB, M, REV
10 * ..
11 * .. Array Arguments ..
12  INTEGER DESCA( * )
13  COMPLEX A( * ), B( LDB, * )
14 * ..
15 *
16 * Purpose
17 * =======
18 *
19 * PCLACP3 is an auxiliary routine that copies from a global parallel
20 * array into a local replicated array or vise versa. Notice that
21 * the entire submatrix that is copied gets placed on one node or
22 * more. The receiving node can be specified precisely, or all nodes
23 * can receive, or just one row or column of nodes.
24 *
25 * Notes
26 * =====
27 *
28 * Each global data object is described by an associated description
29 * vector. This vector stores the information required to establish
30 * the mapping between an object element and its corresponding process
31 * and memory location.
32 *
33 * Let A be a generic term for any 2D block cyclicly distributed array.
34 * Such a global array has an associated description vector DESCA.
35 * In the following comments, the character _ should be read as
36 * "of the global array".
37 *
38 * NOTATION STORED IN EXPLANATION
39 * --------------- -------------- --------------------------------------
40 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
41 * DTYPE_A = 1.
42 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
43 * the BLACS process grid A is distribu-
44 * ted over. The context itself is glo-
45 * bal, but the handle (the integer
46 * value) may vary.
47 * M_A (global) DESCA( M_ ) The number of rows in the global
48 * array A.
49 * N_A (global) DESCA( N_ ) The number of columns in the global
50 * array A.
51 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
52 * the rows of the array.
53 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
54 * the columns of the array.
55 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
56 * row of the array A is distributed.
57 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
58 * first column of the array A is
59 * distributed.
60 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
61 * array. LLD_A >= MAX(1,LOCr(M_A)).
62 *
63 * Let K be the number of rows or columns of a distributed matrix,
64 * and assume that its process grid has dimension p x q.
65 * LOCr( K ) denotes the number of elements of K that a process
66 * would receive if K were distributed over the p processes of its
67 * process column.
68 * Similarly, LOCc( K ) denotes the number of elements of K that a
69 * process would receive if K were distributed over the q processes of
70 * its process row.
71 * The values of LOCr() and LOCc() may be determined via a call to the
72 * ScaLAPACK tool function, NUMROC:
73 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
74 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
75 * An upper bound for these quantities may be computed by:
76 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
77 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
78 *
79 * Arguments
80 * =========
81 *
82 * M (global input) INTEGER
83 * M is the order of the square submatrix that is copied.
84 * M >= 0.
85 * Unchanged on exit
86 *
87 * I (global input) INTEGER
88 * A(I,I) is the global location that the copying starts from.
89 * Unchanged on exit.
90 *
91 * A (global input/output) COMPLEX array, dimension
92 * (DESCA(LLD_),*)
93 * On entry, the parallel matrix to be copied into or from.
94 * On exit, if REV=1, the copied data.
95 * Unchanged on exit if REV=0.
96 *
97 * DESCA (global and local input) INTEGER array of dimension DLEN_.
98 * The array descriptor for the distributed matrix A.
99 *
100 * B (local input/output) COMPLEX array of size (LDB,M)
101 * If REV=0, this is the global portion of the array
102 * A(I:I+M-1,I:I+M-1).
103 * If REV=1, this is the unchanged on exit.
104 *
105 * LDB (local input) INTEGER
106 * The leading dimension of B.
107 *
108 * II (global input) INTEGER
109 * By using REV 0 & 1, data can be sent out and returned again.
110 * If REV=0, then II is destination row index for the node(s)
111 * receiving the replicated B.
112 * If II>=0,JJ>=0, then node (II,JJ) receives the data
113 * If II=-1,JJ>=0, then all rows in column JJ receive the
114 * data
115 * If II>=0,JJ=-1, then all cols in row II receive the data
116 * If II=-1,JJ=-1, then all nodes receive the data
117 * If REV<>0, then II is the source row index for the node(s)
118 * sending the replicated B.
119 *
120 * JJ (global input) INTEGER
121 * Similar description as II above
122 *
123 * REV (global input) INTEGER
124 * Use REV = 0 to send global A into locally replicated B
125 * (on node (II,JJ)).
126 * Use REV <> 0 to send locally replicated B from node (II,JJ)
127 * to its owner (which changes depending on its location in
128 * A) into the global A.
129 *
130 * Further Details
131 * ===============
132 *
133 * Implemented by: M. Fahey, May 28, 1999
134 *
135 * =====================================================================
136 *
137 * .. Parameters ..
138  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
139  $ LLD_, MB_, M_, NB_, N_, RSRC_
140  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
141  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
142  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
143  COMPLEX ZERO
144  parameter( zero = ( 0.0e+0, 0.0e+0 ) )
145 * ..
146 * .. Local Scalars ..
147  INTEGER COL, CONTXT, HBL, ICOL1, ICOL2, IDI, IDJ, IFIN,
148  $ III, IROW1, IROW2, ISTOP, ISTOPI, ISTOPJ, ITMP,
149  $ JJJ, LDA, MYCOL, MYROW, NPCOL, NPROW, ROW
150 * ..
151 * .. External Functions ..
152  INTEGER NUMROC
153  EXTERNAL numroc
154 * ..
155 * .. External Subroutines ..
156  EXTERNAL blacs_gridinfo, infog1l, cgebr2d, cgebs2d,
157  $ cgerv2d, cgesd2d
158 * ..
159 * .. Intrinsic Functions ..
160  INTRINSIC min, mod
161 * ..
162 * .. Executable Statements ..
163 *
164  IF( m.LE.0 )
165  $ RETURN
166 *
167  hbl = desca( mb_ )
168  contxt = desca( ctxt_ )
169  lda = desca( lld_ )
170 *
171  CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
172 *
173  IF( rev.EQ.0 ) THEN
174  DO 20 idi = 1, m
175  DO 10 idj = 1, m
176  b( idi, idj ) = zero
177  10 CONTINUE
178  20 CONTINUE
179  END IF
180 *
181  ifin = i + m - 1
182 *
183  IF( mod( i+hbl, hbl ).NE.0 ) THEN
184  istop = min( i+hbl-mod( i+hbl, hbl ), ifin )
185  ELSE
186  istop = i
187  END IF
188  idj = i
189  istopj = istop
190  IF( idj.LE.ifin ) THEN
191  30 CONTINUE
192  idi = i
193  istopi = istop
194  IF( idi.LE.ifin ) THEN
195  40 CONTINUE
196  row = mod( ( idi-1 ) / hbl, nprow )
197  col = mod( ( idj-1 ) / hbl, npcol )
198  CALL infog1l( idi, hbl, nprow, row, 0, irow1, itmp )
199  irow2 = numroc( istopi, hbl, row, 0, nprow )
200  CALL infog1l( idj, hbl, npcol, col, 0, icol1, itmp )
201  icol2 = numroc( istopj, hbl, col, 0, npcol )
202  IF( ( myrow.EQ.row ) .AND. ( mycol.EQ.col ) ) THEN
203  IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.-1 ) ) THEN
204 *
205 * Send the message to everyone
206 *
207  IF( rev.EQ.0 ) THEN
208  CALL cgebs2d( contxt, 'All', ' ', irow2-irow1+1,
209  $ icol2-icol1+1, a( ( icol1-1 )*lda+
210  $ irow1 ), lda )
211  END IF
212  END IF
213  IF( ( ii.EQ.-1 ) .AND. ( jj.NE.-1 ) ) THEN
214 *
215 * Send the message to Column MYCOL which better be JJ
216 *
217  IF( rev.EQ.0 ) THEN
218  CALL cgebs2d( contxt, 'Col', ' ', irow2-irow1+1,
219  $ icol2-icol1+1, a( ( icol1-1 )*lda+
220  $ irow1 ), lda )
221  END IF
222  END IF
223  IF( ( ii.NE.-1 ) .AND. ( jj.EQ.-1 ) ) THEN
224 *
225 * Send the message to Row MYROW which better be II
226 *
227  IF( rev.EQ.0 ) THEN
228  CALL cgebs2d( contxt, 'Row', ' ', irow2-irow1+1,
229  $ icol2-icol1+1, a( ( icol1-1 )*lda+
230  $ irow1 ), lda )
231  END IF
232  END IF
233  IF( ( ii.NE.-1 ) .AND. ( jj.NE.-1 ) .AND.
234  $ ( ( myrow.NE.ii ) .OR. ( mycol.NE.jj ) ) ) THEN
235 *
236 * Recv/Send the message to (II,JJ)
237 *
238  IF( rev.EQ.0 ) THEN
239  CALL cgesd2d( contxt, irow2-irow1+1, icol2-icol1+1,
240  $ a( ( icol1-1 )*lda+irow1 ), lda, ii,
241  $ jj )
242  ELSE
243  CALL cgerv2d( contxt, irow2-irow1+1, icol2-icol1+1,
244  $ b( idi-i+1, idj-i+1 ), ldb, ii, jj )
245  END IF
246  END IF
247  IF( rev.EQ.0 ) THEN
248  DO 60 jjj = icol1, icol2
249  DO 50 iii = irow1, irow2
250  b( idi+iii-irow1+1-i, idj+jjj-icol1+1-i )
251  $ = a( ( jjj-1 )*lda+iii )
252  50 CONTINUE
253  60 CONTINUE
254  ELSE
255  DO 80 jjj = icol1, icol2
256  DO 70 iii = irow1, irow2
257  a( ( jjj-1 )*lda+iii ) = b( idi+iii-irow1+1-i,
258  $ idj+jjj-icol1+1-i )
259  70 CONTINUE
260  80 CONTINUE
261  END IF
262  ELSE
263  IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.-1 ) ) THEN
264  IF( rev.EQ.0 ) THEN
265  CALL cgebr2d( contxt, 'All', ' ', irow2-irow1+1,
266  $ icol2-icol1+1, b( idi-i+1, idj-i+1 ),
267  $ ldb, row, col )
268  END IF
269  END IF
270  IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.mycol ) ) THEN
271  IF( rev.EQ.0 ) THEN
272  CALL cgebr2d( contxt, 'Col', ' ', irow2-irow1+1,
273  $ icol2-icol1+1, b( idi-i+1, idj-i+1 ),
274  $ ldb, row, col )
275  END IF
276  END IF
277  IF( ( ii.EQ.myrow ) .AND. ( jj.EQ.-1 ) ) THEN
278  IF( rev.EQ.0 ) THEN
279  CALL cgebr2d( contxt, 'Row', ' ', irow2-irow1+1,
280  $ icol2-icol1+1, b( idi-i+1, idj-i+1 ),
281  $ ldb, row, col )
282  END IF
283  END IF
284  IF( ( ii.EQ.myrow ) .AND. ( jj.EQ.mycol ) ) THEN
285  IF( rev.EQ.0 ) THEN
286  CALL cgerv2d( contxt, irow2-irow1+1, icol2-icol1+1,
287  $ b( idi-i+1, idj-i+1 ), ldb, row,
288  $ col )
289  ELSE
290  CALL cgesd2d( contxt, irow2-irow1+1, icol2-icol1+1,
291  $ b( idi-i+1, idj-i+1 ), ldb, row,
292  $ col )
293 * CALL CGESD2D(CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1,
294 * $ A((ICOL1-1)*LDA+IROW1),LDA, ROW, COL)
295  END IF
296  END IF
297  END IF
298  idi = istopi + 1
299  istopi = min( istopi+hbl, ifin )
300  IF( idi.LE.ifin )
301  $ GO TO 40
302  END IF
303  idj = istopj + 1
304  istopj = min( istopj+hbl, ifin )
305  IF( idj.LE.ifin )
306  $ GO TO 30
307  END IF
308  RETURN
309 *
310 * End of PCLACP3
311 *
312  END
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
pclacp3
subroutine pclacp3(M, I, A, DESCA, B, LDB, II, JJ, REV)
Definition: pclacp3.f:2
min
#define min(A, B)
Definition: pcgemr.c:181