ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslacp3.f
Go to the documentation of this file.
1  SUBROUTINE pslacp3( M, I, A, DESCA, B, LDB, II, JJ, REV )
2  IMPLICIT NONE
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 25, 2001
8 *
9 * .. Scalar Arguments ..
10  INTEGER I, II, JJ, LDB, M, REV
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  REAL A( * ), B( LDB, * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PSLACP3 is an auxiliary routine that copies from a global parallel
21 * array into a local replicated array or vise versa. Notice that
22 * the entire submatrix that is copied gets placed on one node or
23 * more. The receiving node can be specified precisely, or all nodes
24 * can receive, or just one row or column of nodes.
25 *
26 * Notes
27 * =====
28 *
29 * Each global data object is described by an associated description
30 * vector. This vector stores the information required to establish
31 * the mapping between an object element and its corresponding process
32 * and memory location.
33 *
34 * Let A be a generic term for any 2D block cyclicly distributed array.
35 * Such a global array has an associated description vector DESCA.
36 * In the following comments, the character _ should be read as
37 * "of the global array".
38 *
39 * NOTATION STORED IN EXPLANATION
40 * --------------- -------------- --------------------------------------
41 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42 * DTYPE_A = 1.
43 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
44 * the BLACS process grid A is distribu-
45 * ted over. The context itself is glo-
46 * bal, but the handle (the integer
47 * value) may vary.
48 * M_A (global) DESCA( M_ ) The number of rows in the global
49 * array A.
50 * N_A (global) DESCA( N_ ) The number of columns in the global
51 * array A.
52 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
53 * the rows of the array.
54 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
55 * the columns of the array.
56 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
57 * row of the array A is distributed.
58 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59 * first column of the array A is
60 * distributed.
61 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62 * array. LLD_A >= MAX(1,LOCr(M_A)).
63 *
64 * Let K be the number of rows or columns of a distributed matrix,
65 * and assume that its process grid has dimension p x q.
66 * LOCr( K ) denotes the number of elements of K that a process
67 * would receive if K were distributed over the p processes of its
68 * process column.
69 * Similarly, LOCc( K ) denotes the number of elements of K that a
70 * process would receive if K were distributed over the q processes of
71 * its process row.
72 * The values of LOCr() and LOCc() may be determined via a call to the
73 * ScaLAPACK tool function, NUMROC:
74 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76 * An upper bound for these quantities may be computed by:
77 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79 *
80 * Arguments
81 * =========
82 *
83 * M (global input) INTEGER
84 * M is the order of the square submatrix that is copied.
85 * M >= 0.
86 * Unchanged on exit
87 *
88 * I (global input) INTEGER
89 * A(I,I) is the global location that the copying starts from.
90 * Unchanged on exit.
91 *
92 * A (global input/output) REAL array, dimension
93 * (DESCA(LLD_),*)
94 * On entry, the parallel matrix to be copied into or from.
95 * On exit, if REV=1, the copied data.
96 * Unchanged on exit if REV=0.
97 *
98 * DESCA (global and local input) INTEGER array of dimension DLEN_.
99 * The array descriptor for the distributed matrix A.
100 *
101 * B (local input/output) REAL array of size (LDB,M)
102 * If REV=0, this is the global portion of the array
103 * A(I:I+M-1,I:I+M-1).
104 * If REV=1, this is the unchanged on exit.
105 *
106 * LDB (local input) INTEGER
107 * The leading dimension of B.
108 *
109 * II (global input) INTEGER
110 * By using REV 0 & 1, data can be sent out and returned again.
111 * If REV=0, then II is destination row index for the node(s)
112 * receiving the replicated B.
113 * If II>=0,JJ>=0, then node (II,JJ) receives the data
114 * If II=-1,JJ>=0, then all rows in column JJ receive the
115 * data
116 * If II>=0,JJ=-1, then all cols in row II receive the data
117 * If II=-1,JJ=-1, then all nodes receive the data
118 * If REV<>0, then II is the source row index for the node(s)
119 * sending the replicated B.
120 *
121 * JJ (global input) INTEGER
122 * Similar description as II above
123 *
124 * REV (global input) INTEGER
125 * Use REV = 0 to send global A into locally replicated B
126 * (on node (II,JJ)).
127 * Use REV <> 0 to send locally replicated B from node (II,JJ)
128 * to its owner (which changes depending on its location in
129 * A) into the global A.
130 *
131 * Implemented by: G. Henry, May 1, 1997
132 *
133 * =====================================================================
134 *
135 * .. Parameters ..
136  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
137  $ LLD_, MB_, M_, NB_, N_, RSRC_
138  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
139  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
140  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
141  REAL ZERO
142  parameter( zero = 0.0 )
143 * ..
144 * .. Local Scalars ..
145  INTEGER COL, CONTXT, HBL, IAFIRST, ICOL1, ICOL2, IDI,
146  $ IDJ, IFIN, III, IROW1, IROW2, ISTOP, ISTOPI,
147  $ ISTOPJ, ITMP, JAFIRST, JJJ, LDA, MYCOL, MYROW,
148  $ NPCOL, NPROW, ROW
149 * ..
150 * .. External Functions ..
151  INTEGER NUMROC
152  EXTERNAL numroc
153 * ..
154 * .. External Subroutines ..
155  EXTERNAL blacs_gridinfo, sgebr2d, sgebs2d, sgerv2d,
156  $ sgesd2d, infog1l
157 * ..
158 * .. Intrinsic Functions ..
159  INTRINSIC min, mod
160 * ..
161 * .. Executable Statements ..
162 *
163  IF( m.LE.0 )
164  $ RETURN
165 *
166  hbl = desca( mb_ )
167  contxt = desca( ctxt_ )
168  lda = desca( lld_ )
169  iafirst = desca( rsrc_ )
170  jafirst = desca( csrc_ )
171 *
172  CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
173 *
174  IF( rev.EQ.0 ) THEN
175  DO 20 idi = 1, m
176  DO 10 idj = 1, m
177  b( idi, idj ) = zero
178  10 CONTINUE
179  20 CONTINUE
180  END IF
181 *
182  ifin = i + m - 1
183 *
184  IF( mod( i+hbl, hbl ).NE.0 ) THEN
185  istop = min( i+hbl-mod( i+hbl, hbl ), ifin )
186  ELSE
187  istop = i
188  END IF
189  idj = i
190  istopj = istop
191  IF( idj.LE.ifin ) THEN
192  30 CONTINUE
193  idi = i
194  istopi = istop
195  IF( idi.LE.ifin ) THEN
196  40 CONTINUE
197  row = mod( ( idi-1 ) / hbl + iafirst, nprow )
198  col = mod( ( idj-1 ) / hbl + jafirst, npcol )
199  CALL infog1l( idi, hbl, nprow, row, iafirst, irow1, itmp )
200  irow2 = numroc( istopi, hbl, row, iafirst, nprow )
201  CALL infog1l( idj, hbl, npcol, col, jafirst, icol1, itmp )
202  icol2 = numroc( istopj, hbl, col, jafirst, npcol )
203  IF( ( myrow.EQ.row ) .AND. ( mycol.EQ.col ) ) THEN
204  IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.-1 ) ) THEN
205 *
206 * Send the message to everyone
207 *
208  IF( rev.EQ.0 ) THEN
209  CALL sgebs2d( contxt, 'All', ' ', irow2-irow1+1,
210  $ icol2-icol1+1, a( ( icol1-1 )*lda+
211  $ irow1 ), lda )
212  END IF
213  END IF
214  IF( ( ii.EQ.-1 ) .AND. ( jj.NE.-1 ) ) THEN
215 *
216 * Send the message to Column MYCOL which better be JJ
217 *
218  IF( rev.EQ.0 ) THEN
219  CALL sgebs2d( contxt, 'Col', ' ', irow2-irow1+1,
220  $ icol2-icol1+1, a( ( icol1-1 )*lda+
221  $ irow1 ), lda )
222  END IF
223  END IF
224  IF( ( ii.NE.-1 ) .AND. ( jj.EQ.-1 ) ) THEN
225 *
226 * Send the message to Row MYROW which better be II
227 *
228  IF( rev.EQ.0 ) THEN
229  CALL sgebs2d( contxt, 'Row', ' ', irow2-irow1+1,
230  $ icol2-icol1+1, a( ( icol1-1 )*lda+
231  $ irow1 ), lda )
232  END IF
233  END IF
234  IF( ( ii.NE.-1 ) .AND. ( jj.NE.-1 ) .AND.
235  $ ( ( myrow.NE.ii ) .OR. ( mycol.NE.jj ) ) ) THEN
236 *
237 * Recv/Send the message to (II,JJ)
238 *
239  IF( rev.EQ.0 ) THEN
240  CALL sgesd2d( contxt, irow2-irow1+1, icol2-icol1+1,
241  $ a( ( icol1-1 )*lda+irow1 ), lda, ii,
242  $ jj )
243  ELSE
244  CALL sgerv2d( contxt, irow2-irow1+1, icol2-icol1+1,
245  $ b( idi-i+1, idj-i+1 ), ldb, ii, jj )
246  END IF
247  END IF
248  IF( rev.EQ.0 ) THEN
249  DO 60 jjj = icol1, icol2
250  DO 50 iii = irow1, irow2
251  b( idi+iii-irow1+1-i, idj+jjj-icol1+1-i )
252  $ = a( ( jjj-1 )*lda+iii )
253  50 CONTINUE
254  60 CONTINUE
255  ELSE
256  DO 80 jjj = icol1, icol2
257  DO 70 iii = irow1, irow2
258  a( ( jjj-1 )*lda+iii ) = b( idi+iii-irow1+1-i,
259  $ idj+jjj-icol1+1-i )
260  70 CONTINUE
261  80 CONTINUE
262  END IF
263  ELSE
264  IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.-1 ) ) THEN
265  IF( rev.EQ.0 ) THEN
266  CALL sgebr2d( contxt, 'All', ' ', irow2-irow1+1,
267  $ icol2-icol1+1, b( idi-i+1, idj-i+1 ),
268  $ ldb, row, col )
269  END IF
270  END IF
271  IF( ( ii.EQ.-1 ) .AND. ( jj.EQ.mycol ) ) THEN
272  IF( rev.EQ.0 ) THEN
273  CALL sgebr2d( contxt, 'Col', ' ', irow2-irow1+1,
274  $ icol2-icol1+1, b( idi-i+1, idj-i+1 ),
275  $ ldb, row, col )
276  END IF
277  END IF
278  IF( ( ii.EQ.myrow ) .AND. ( jj.EQ.-1 ) ) THEN
279  IF( rev.EQ.0 ) THEN
280  CALL sgebr2d( contxt, 'Row', ' ', irow2-irow1+1,
281  $ icol2-icol1+1, b( idi-i+1, idj-i+1 ),
282  $ ldb, row, col )
283  END IF
284  END IF
285  IF( ( ii.EQ.myrow ) .AND. ( jj.EQ.mycol ) ) THEN
286  IF( rev.EQ.0 ) THEN
287  CALL sgerv2d( contxt, irow2-irow1+1, icol2-icol1+1,
288  $ b( idi-i+1, idj-i+1 ), ldb, row,
289  $ col )
290  ELSE
291  CALL sgesd2d( contxt, irow2-irow1+1, icol2-icol1+1,
292  $ b( idi-i+1, idj-i+1 ), ldb, row,
293  $ col )
294 * CALL SGESD2D(CONTXT, IROW2-IROW1+1, ICOL2-ICOL1+1,
295 * $ A((ICOL1-1)*LDA+IROW1),LDA, ROW, COL)
296  END IF
297  END IF
298  END IF
299  idi = istopi + 1
300  istopi = min( istopi+hbl, ifin )
301  IF( idi.LE.ifin )
302  $ GO TO 40
303  END IF
304  idj = istopj + 1
305  istopj = min( istopj+hbl, ifin )
306  IF( idj.LE.ifin )
307  $ GO TO 30
308  END IF
309  RETURN
310 *
311 * End of PSLACP3
312 *
313  END
pslacp3
subroutine pslacp3(M, I, A, DESCA, B, LDB, II, JJ, REV)
Definition: pslacp3.f:2
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
min
#define min(A, B)
Definition: pcgemr.c:181