ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdcol2row.f
Go to the documentation of this file.
1  SUBROUTINE pdcol2row( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC,
2  $ CSRC, RDEST, CDEST, WORK)
3 *
4 * -- ScaLAPACK tools routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
11  $ rdest, rsrc
12 * ..
13 * .. Array Arguments ..
14  DOUBLE PRECISION VD( LDVD, * ), VS( LDVS, * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * Take a block of vectors with M total rows which are distributed over
21 * a column of processes, and distribute those rows over a row of
22 * processes. This routine minimizes communication by sending all
23 * information it has that a given process in the RDEST needs at once.
24 * To do this it uses the least common multiple (LCM) concept. This is
25 * simply the realization that if I have part of a vector split over a
26 * process column consisting of P processes, and I want to send all of
27 * that vector that I own to a new vector distributed over Q processes
28 * within a process row, that after I find the process in RDEST that
29 * owns the row of the vector I'm currently looking at, he will want
30 * every ( (LCM(P,Q) / P ) block of my vector (the block being of size
31 * NB x N).
32 *
33 * Arguments
34 * =========
35 *
36 * Rem: MP, resp. NQ, denotes the number of local rows, resp. local
37 * ==== columns, necessary to store a global vector of dimension M
38 * across P processes, resp. N over Q processes.
39 *
40 * ICTXT (global input) INTEGER
41 * The BLACS context handle, indicating the global context of
42 * the operation. The context itself is global.
43 *
44 * M (global input) INTEGER
45 * The number of global rows each vector has.
46 *
47 * N (global input) INTEGER
48 * The number of vectors in the vector block.
49 *
50 * NB (global input) INTEGER
51 * The blocking factor used to divide the rows of the vector
52 * amongst the processes of a column.
53 *
54 * VS (local input) DOUBLE PRECISION
55 * Array of dimension (LDVS,N), the block of vectors stored on
56 * process column CSRC to be put into memory VD, and stored
57 * on process row RDEST.
58 *
59 * LDVS (local input) INTEGER
60 * The leading dimension of VS, LDVS >= MAX( 1, MP ).
61 *
62 * VD (local output) DOUBLE PRECISION
63 * Array of dimension (LDVD,N), on output, the contents of VS
64 * stored on process row RDEST will be here.
65 *
66 * LDVD (local input) INTEGER
67 * The leading dimension of VD, LDVD >= MAX( 1, MQ ).
68 *
69 * RSRC (global input) INTEGER
70 * The process row the distributed block of vectors VS begins
71 * on.
72 *
73 * CSRC (global input) INTEGER
74 * The process column VS is distributed over.
75 *
76 * RDEST (global input) INTEGER
77 * The process row to distribute VD over.
78 *
79 * CDEST (global input) INTEGER
80 * The process column that VD begins on.
81 *
82 * WORK (local workspace) DOUBLE PRECISION
83 * Array of dimension (LDW), the required size of work varies:
84 * if( nprow.eq.npcol ) then
85 * LDW = 0; WORK not accessed.
86 * else
87 * lcm = least common multiple of process rows and columns.
88 * Mp = number of rows of VS on my process.
89 * nprow = number of process rows
90 * CEIL = the ceiling of given operation
91 * LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) )
92 * end if
93 *
94 * =====================================================================
95 *
96 * .. Local Scalars ..
97  INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB,
98  $ jj, k, lcm, mp, mq, mycol, mydist, myrow,
99  $ nblocks, npcol, nprow, rblkskip
100 * ..
101 * .. External Subroutines ..
102  EXTERNAL blacs_gridinfo, dgesd2d, dgerv2d, dlacpy
103 * ..
104 * .. External Functions ..
105  INTEGER ILCM, NUMROC
106  EXTERNAL ilcm, numroc
107 * ..
108 * .. Executable Statements ..
109 *
110 * .. Initialize Variables ..
111 *
112  icpy = 0
113 *
114 * Get grid parameters.
115 *
116  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
117 *
118 * If we are not in special case for NPROW = NPCOL where there
119 * is no copying required
120 *
121  IF( nprow.NE.npcol ) THEN
122  lcm = ilcm( nprow, npcol )
123  rblkskip = lcm / npcol
124  cblkskip = lcm / nprow
125 *
126 * If I have part of VS, the source vector(s)
127 *
128  IF( mycol.EQ.csrc ) THEN
129 *
130  istart = 1
131 *
132 * Figure my distance from RSRC: the process in RDEST the same
133 * distance from CDEST will want my first block
134 *
135  mydist = mod( nprow+myrow-rsrc, nprow )
136  mp = numroc( m, nb, myrow, rsrc, nprow )
137  icdest = mod( cdest+mydist, npcol )
138 *
139 * Loop over all possible destination processes
140 *
141  DO 20 k = 1, cblkskip
142  jj = 1
143 *
144 * If I am not destination process
145 *
146  IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
147 *
148 * Pack all data I own that destination needs
149 *
150  DO 10 ii = istart, mp, nb*cblkskip
151  jb = min(nb, mp-ii+1)
152  CALL dlacpy( 'G', jb, n, vs(ii,1), ldvs,
153  $ work(jj), jb )
154  jj = jj + nb*n
155  10 CONTINUE
156 *
157 * Figure how many rows are to be sent and send them if
158 * necessary (NOTE: will send extra if NB > JB)
159 *
160  jj = jj - 1
161  IF( jj.GT.0 )
162  $ CALL dgesd2d( ictxt, jj, 1, work, jj, rdest,
163  $ icdest )
164 *
165  ELSE
166 *
167 * I am both source and destination, save where to start
168 * copying from for later use.
169 *
170  icpy = istart
171  END IF
172 *
173  istart = istart + nb
174  icdest = mod(icdest+nprow, npcol)
175  20 CONTINUE
176  END IF
177 *
178 * If I should receive info into VD
179 *
180  IF( myrow.EQ.rdest ) THEN
181 *
182  istart = 1
183 *
184 * Figure my distance from CDEST: the process in CSRC the same
185 * distance from RSRC will have my first block.
186 *
187  mydist = mod( npcol+mycol-cdest, npcol )
188  mq = numroc( m, nb, mycol, cdest, npcol )
189  irsrc = mod( rsrc+mydist, nprow )
190  DO 50 k = 1, rblkskip
191 *
192 * If I don't already possess the required data
193 *
194  IF( (mycol.NE.csrc).OR.(myrow.NE.irsrc) ) THEN
195 *
196 * Figure how many rows to receive, and receive them
197 * NOTE: may receive to much -- NB instead of JB
198 *
199  nblocks = (mq - istart + nb) / nb
200  jj = ((nblocks+rblkskip-1) / rblkskip)*nb
201  IF( jj.GT.0 )
202  $ CALL dgerv2d( ictxt, jj, n, work, jj, irsrc, csrc )
203 *
204 * Copy data to destination vector
205 *
206  jj = 1
207  DO 30 ii = istart, mq, nb*rblkskip
208  jb = min( nb, mq-ii+1 )
209  CALL dlacpy( 'G', jb, n, work(jj), jb,
210  $ vd(ii,1), ldvd )
211  jj = jj + nb*n
212  30 CONTINUE
213 *
214 * If I am both source and destination
215 *
216  ELSE
217  jj = icpy
218  DO 40 ii = istart, mq, nb*rblkskip
219  jb = min( nb, mq-ii+1 )
220  CALL dlacpy( 'G', jb, n, vs(jj,1), ldvs,
221  $ vd(ii,1), ldvd )
222  jj = jj + nb*cblkskip
223  40 CONTINUE
224  END IF
225  istart = istart + nb
226  irsrc = mod( irsrc+npcol, nprow )
227  50 CONTINUE
228  END IF
229 *
230 * If NPROW = NPCOL, there is a one-to-one correspondance between
231 * process rows and columns, so no work space or copying required
232 *
233  ELSE
234 *
235  IF( mycol.EQ.csrc ) THEN
236 *
237 * Figure my distance from RSRC: the process in RDEST the same
238 * distance from CDEST will want my piece of the vector.
239 *
240  mydist = mod( nprow+myrow-rsrc, nprow )
241  mp = numroc( m, nb, myrow, rsrc, nprow )
242  icdest = mod( cdest+mydist, npcol )
243 *
244  IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
245  CALL dgesd2d( ictxt, mp, n, vs, ldvs, rdest, icdest )
246  ELSE
247  CALL dlacpy( 'G', mp, n, vs, ldvs, vd, ldvd )
248  END IF
249  END IF
250 *
251  IF( myrow.EQ.rdest ) THEN
252 *
253 * Figure my distance from CDEST: the process in CSRC the same
254 * distance from RSRC will have my piece of the vector.
255 *
256  mydist = mod( npcol+mycol-cdest, npcol )
257  mq = numroc( m, nb, mycol, cdest, npcol )
258  irsrc = mod( rsrc+mydist, nprow )
259 *
260  IF( (myrow.NE.irsrc).OR.(mycol.NE.csrc) )
261  $ CALL dgerv2d( ictxt, mq, n, vd, ldvd, irsrc, csrc )
262 *
263  END IF
264 *
265  END IF
266 *
267  RETURN
268 *
269 * End of PDCOL2ROW
270 *
271  END
pdcol2row
subroutine pdcol2row(ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, CSRC, RDEST, CDEST, WORK)
Definition: pdcol2row.f:3
min
#define min(A, B)
Definition: pcgemr.c:181