SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pslacp3.f
Go to the documentation of this file.
1 SUBROUTINE pslacp3( M, I, A, DESCA, B, LDB, II, JJ, REV )
2 IMPLICIT NONE
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 25, 2001
8*
9* .. Scalar Arguments ..
10 INTEGER I, II, JJ, LDB, M, REV
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 REAL A( * ), B( LDB, * )
15* ..
16*
17* Purpose
18* =======
19*
20* PSLACP3 is an auxiliary routine that copies from a global parallel
21* array into a local replicated array or vise versa. Notice that
22* the entire submatrix that is copied gets placed on one node or
23* more. The receiving node can be specified precisely, or all nodes
24* can receive, or just one row or column of nodes.
25*
26* Notes
27* =====
28*
29* Each global data object is described by an associated description
30* vector. This vector stores the information required to establish
31* the mapping between an object element and its corresponding process
32* and memory location.
33*
34* Let A be a generic term for any 2D block cyclicly distributed array.
35* Such a global array has an associated description vector DESCA.
36* In the following comments, the character _ should be read as
37* "of the global array".
38*
39* NOTATION STORED IN EXPLANATION
40* --------------- -------------- --------------------------------------
41* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42* DTYPE_A = 1.
43* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
44* the BLACS process grid A is distribu-
45* ted over. The context itself is glo-
46* bal, but the handle (the integer
47* value) may vary.
48* M_A (global) DESCA( M_ ) The number of rows in the global
49* array A.
50* N_A (global) DESCA( N_ ) The number of columns in the global
51* array A.
52* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
53* the rows of the array.
54* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
55* the columns of the array.
56* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
57* row of the array A is distributed.
58* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59* first column of the array A is
60* distributed.
61* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62* array. LLD_A >= MAX(1,LOCr(M_A)).
63*
64* Let K be the number of rows or columns of a distributed matrix,
65* and assume that its process grid has dimension p x q.
66* LOCr( K ) denotes the number of elements of K that a process
67* would receive if K were distributed over the p processes of its
68* process column.
69* Similarly, LOCc( K ) denotes the number of elements of K that a
70* process would receive if K were distributed over the q processes of
71* its process row.
72* The values of LOCr() and LOCc() may be determined via a call to the
73* ScaLAPACK tool function, NUMROC:
74* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76* An upper bound for these quantities may be computed by:
77* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79*
80* Arguments
81* =========
82*
83* M (global input) INTEGER
84* M is the order of the square submatrix that is copied.
85* M >= 0.
86* Unchanged on exit
87*
88* I (global input) INTEGER
89* A(I,I) is the global location that the copying starts from.
90* Unchanged on exit.
91*
92* A (global input/output) REAL array, dimension
93* (DESCA(LLD_),*)
94* On entry, the parallel matrix to be copied into or from.
95* On exit, if REV=1, the copied data.
96* Unchanged on exit if REV=0.
97*
98* DESCA (global and local input) INTEGER array of dimension DLEN_.
99* The array descriptor for the distributed matrix A.
100*
101* B (local input/output) REAL array of size (LDB,M)
102* If REV=0, this is the global portion of the array
103* A(I:I+M-1,I:I+M-1).
104* If REV=1, this is the unchanged on exit.
105*
106* LDB (local input) INTEGER
107* The leading dimension of B.
108*
109* II (global input) INTEGER
110* By using REV 0 & 1, data can be sent out and returned again.
111* If REV=0, then II is destination row index for the node(s)
112* receiving the replicated B.
113* If II>=0,JJ>=0, then node (II,JJ) receives the data
114* If II=-1,JJ>=0, then all rows in column JJ receive the
115* data
116* If II>=0,JJ=-1, then all cols in row II receive the data
117* If II=-1,JJ=-1, then all nodes receive the data
118* If REV<>0, then II is the source row index for the node(s)
119* sending the replicated B.
120*
121* JJ (global input) INTEGER
122* Similar description as II above
123*
124* REV (global input) INTEGER
125* Use REV = 0 to send global A into locally replicated B
126* (on node (II,JJ)).
127* Use REV <> 0 to send locally replicated B from node (II,JJ)
128* to its owner (which changes depending on its location in
129* A) into the global A.
130*
131* Implemented by: G. Henry, May 1, 1997
132*
133* =====================================================================
134*
135* .. Parameters ..
136 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
137 $ LLD_, MB_, M_, NB_, N_, RSRC_
138 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
139 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
140 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
141 REAL ZERO
142 parameter( zero = 0.0 )
143* ..
144* .. Local Scalars ..
145 INTEGER COL, CONTXT, HBL, IAFIRST, ICOL1, ICOL2, IDI,
146 $ IDJ, IFIN, III, IROW1, IROW2, ISTOP, ISTOPI,
147 $ ISTOPJ, ITMP, JAFIRST, JJJ, LDA, MYCOL, MYROW,
148 $ NPCOL, NPROW, ROW
149* ..
150* .. External Functions ..
151 INTEGER NUMROC
152 EXTERNAL numroc
153* ..
154* .. External Subroutines ..
155 EXTERNAL blacs_gridinfo, sgebr2d, sgebs2d, sgerv2d,
156 $ sgesd2d, infog1l
157* ..
158* .. Intrinsic Functions ..
159 INTRINSIC min, mod
160* ..
161* .. Executable Statements ..
162*
163 IF( m.LE.0 )
164 $ RETURN
165*
166 hbl = desca( mb_ )
167 contxt = desca( ctxt_ )
168 lda = desca( lld_ )
169 iafirst = desca( rsrc_ )
170 jafirst = desca( csrc_ )
171*
172 CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
173*
174 IF( rev.EQ.0 ) THEN
175 DO 20 idi = 1, m
176 DO 10 idj = 1, m
177 b( idi, idj ) = zero
178 10 CONTINUE
179 20 CONTINUE
180 END IF
181*
182 ifin = i + m - 1
183*
184 IF( mod( i+hbl, hbl ).NE.0 ) THEN
185 istop = min( i+hbl-mod( i+hbl, hbl ), ifin )
186 ELSE
187 istop = i
188 END IF
189 idj = i
190 istopj = istop
191 IF( idj.LE.ifin ) THEN
192 30 CONTINUE
193 idi = i
194 istopi = istop
195 IF( idi.LE.ifin ) THEN
196 40 CONTINUE
197 row = mod( ( idi-1 ) / hbl + iafirst, nprow )
198 col = mod( ( idj-1 ) / hbl + jafirst, npcol )
199 CALL infog1l( idi, hbl, nprow, row, iafirst, irow1, itmp )
200 irow2 = numroc( istopi, hbl, row, iafirst, nprow )
201 CALL infog1l( idj, hbl, npcol, col, jafirst, icol1, itmp )
202 icol2 = numroc( istopj, hbl, col, jafirst, npcol )
203 IF( ( myrow.EQ.row ) .AND. ( mycol.EQ.col ) ) THEN
204 IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.-1 ) ) THEN
205*
206* Send the message to everyone
207*
208 IF( rev.EQ.0 ) THEN
209 CALL sgebs2d( contxt, 'All', ' ', irow2-irow1+1,
210 $ icol2-icol1+1, a( ( icol1-1 )*lda+
211 $ irow1 ), lda )
212 END IF
213 END IF
214 IF( ( ii.EQ.-1 ) .AND. ( jj.NE.-1 ) ) THEN
215*
216* Send the message to Column MYCOL which better be JJ
217*
218 IF( rev.EQ.0 ) THEN
219 CALL sgebs2d( contxt, 'Col', ' ', irow2-irow1+1,
220 $ icol2-icol1+1, a( ( icol1-1 )*lda+
221 $ irow1 ), lda )
222 END IF
223 END IF
224 IF( ( ii.NE.-1 ) .AND. ( jj.EQ.-1 ) ) THEN
225*
226* Send the message to Row MYROW which better be II
227*
228 IF( rev.EQ.0 ) THEN
229 CALL sgebs2d( contxt, 'Row', ' ', irow2-irow1+1,
230 $ icol2-icol1+1, a( ( icol1-1 )*lda+
231 $ irow1 ), lda )
232 END IF
233 END IF
234 IF( ( ii.NE.-1 ) .AND. ( jj.NE.-1 ) .AND.
235 $ ( ( myrow.NE.ii ) .OR. ( mycol.NE.jj ) ) ) THEN
236*
237* Recv/Send the message to (II,JJ)
238*
239 IF( rev.EQ.0 ) THEN
240 CALL sgesd2d( contxt, irow2-irow1+1, icol2-icol1+1,
241 $ a( ( icol1-1 )*lda+irow1 ), lda, ii,
242 $ jj )
243 ELSE
244 CALL sgerv2d( contxt, irow2-irow1+1, icol2-icol1+1,
245 $ b( idi-i+1, idj-i+1 ), ldb, ii, jj )
246 END IF
247 END IF
248 IF( rev.EQ.0 ) THEN
249 DO 60 jjj = icol1, icol2
250 DO 50 iii = irow1, irow2
251 b( idi+iii-irow1+1-i, idj+jjj-icol1+1-i )
252 $ = a( ( jjj-1 )*lda+iii )
253 50 CONTINUE
254 60 CONTINUE
255 ELSE
256 DO 80 jjj = icol1, icol2
257 DO 70 iii = irow1, irow2
258 a( ( jjj-1 )*lda+iii ) = b( idi+iii-irow1+1-i,
259 $ idj+jjj-icol1+1-i )
260 70 CONTINUE
261 80 CONTINUE
262 END IF
263 ELSE
264 IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.-1 ) ) THEN
265 IF( rev.EQ.0 ) THEN
266 CALL sgebr2d( contxt, 'All', ' ', irow2-irow1+1,
267 $ icol2-icol1+1, b( idi-i+1, idj-i+1 ),
268 $ ldb, row, col )
269 END IF
270 END IF
271 IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.mycol ) ) THEN
272 IF( rev.EQ.0 ) THEN
273 CALL sgebr2d( contxt, 'Col', ' ', irow2-irow1+1,
274 $ icol2-icol1+1, b( idi-i+1, idj-i+1 ),
275 $ ldb, row, col )
276 END IF
277 END IF
278 IF( ( ii.EQ.myrow ) .AND. ( jj.EQ.-1 ) ) THEN
279 IF( rev.EQ.0 ) THEN
280 CALL sgebr2d( contxt, 'Row', ' ', irow2-irow1+1,
281 $ icol2-icol1+1, b( idi-i+1, idj-i+1 ),
282 $ ldb, row, col )
283 END IF
284 END IF
285 IF( ( ii.EQ.myrow ) .AND. ( jj.EQ.mycol ) ) THEN
286 IF( rev.EQ.0 ) THEN
287 CALL sgerv2d( contxt, irow2-irow1+1, icol2-icol1+1,
288 $ b( idi-i+1, idj-i+1 ), ldb, row,
289 $ col )
290 ELSE
291 CALL sgesd2d( contxt, irow2-irow1+1, icol2-icol1+1,
292 $ b( idi-i+1, idj-i+1 ), ldb, row,
293 $ col )
294* CALL SGESD2D(CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1,
295* $ A((ICOL1-1)*LDA+IROW1),LDA, ROW, COL)
296 END IF
297 END IF
298 END IF
299 idi = istopi + 1
300 istopi = min( istopi+hbl, ifin )
301 IF( idi.LE.ifin )
302 $ GO TO 40
303 END IF
304 idj = istopj + 1
305 istopj = min( istopj+hbl, ifin )
306 IF( idj.LE.ifin )
307 $ GO TO 30
308 END IF
309 RETURN
310*
311* End of PSLACP3
312*
313 END
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
#define min(A, B)
Definition pcgemr.c:181
subroutine pslacp3(m, i, a, desca, b, ldb, ii, jj, rev)
Definition pslacp3.f:2