ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pccol2row.f
Go to the documentation of this file.
1  SUBROUTINE pccol2row( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC,
2  $ CSRC, RDEST, CDEST, WORK)
3 *
4 * -- ScaLAPACK tools routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
11  $ rdest, rsrc
12 * ..
13 * .. Array Arguments ..
14  COMPLEX VD( LDVD, * ), VS( LDVS, * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * Take a block of vectors with M total rows which are distributed over
21 * a column of processes, and distribute those rows over a row of
22 * processes. This routine minimizes communication by sending all
23 * information it has that a given process in the RDEST needs at once.
24 * To do this it uses the least common multiple (LCM) concept. This is
25 * simply the realization that if I have part of a vector split over a
26 * process column consisting of P processes, and I want to send all of
27 * that vector that I own to a new vector distributed over Q processes
28 * within a process row, that after I find the process in RDEST that
29 * owns the row of the vector I'm currently looking at, he will want
30 * every ( (LCM(P,Q) / P ) block of my vector (the block being of size
31 * NB x N).
32 *
33 * Arguments
34 * =========
35 *
36 * Rem: MP, resp. NQ, denotes the number of local rows, resp. local
37 * ==== columns, necessary to store a global vector of dimension M
38 * across P processes, resp. N over Q processes.
39 *
40 * ICTXT (global input) INTEGER
41 * The BLACS context handle, indicating the global context of
42 * the operation. The context itself is global.
43 *
44 * M (global input) INTEGER
45 * The number of global rows each vector has.
46 *
47 * N (global input) INTEGER
48 * The number of vectors in the vector block.
49 *
50 * NB (global input) INTEGER
51 * The blocking factor used to divide the rows of the vector
52 * amongst the processes of a column.
53 *
54 * VS (local input) COMPLEX
55 * Array of dimension (LDVS,N), the block of vectors stored on
56 * process column CSRC to be put into memory VD, and stored
57 * on process row RDEST.
58 *
59 * LDVS (local input) INTEGER
60 * The leading dimension of VS, LDVS >= MAX( 1, MP ).
61 *
62 * VD (local output) COMPLEX
63 * Array of dimension (LDVD,N), on output, the contents of VS
64 * stored on process row RDEST will be here.
65 *
66 * LDVD (local input) INTEGER
67 * The leading dimension of VD, LDVD >= MAX( 1, MQ ).
68 *
69 * RSRC (global input) INTEGER
70 * The process row the distributed block of vectors VS begins
71 * on.
72 *
73 * CSRC (global input) INTEGER
74 * The process column VS is distributed over.
75 *
76 * RDEST (global input) INTEGER
77 * The process row to distribute VD over.
78 *
79 * CDEST (global input) INTEGER
80 * The process column that VD begins on.
81 *
82 * WORK (local workspace) COMPLEX
83 * Array of dimension (LDW), the required size of work varies:
84 * if( nprow.eq.npcol ) then
85 * LDW = 0; WORK not accessed.
86 * else
87 * lcm = least common multiple of process rows and columns.
88 * Mp = number of rows of VS on my process.
89 * nprow = number of process rows
90 * CEIL = the ceiling of given operation
91 * LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) )
92 * end if
93 *
94 * =====================================================================
95 *
96 * .. Local Scalars ..
97  INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB,
98  $ jj, k, lcm, mp, mq, mycol, mydist, myrow,
99  $ nblocks, npcol, nprow, rblkskip
100 * ..
101 * .. External Subroutines ..
102  EXTERNAL blacs_gridinfo, cgesd2d, cgerv2d, clacpy
103 * ..
104 * .. External Functions ..
105  INTEGER ILCM, NUMROC
106  EXTERNAL ilcm, numroc
107 * ..
108 * .. Executable Statements ..
109 *
110  icpy = 0
111 *
112 * Get grid parameters.
113 *
114  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
115 *
116 * If we are not in special case for NPROW = NPCOL where there
117 * is no copying required
118 *
119  IF( nprow.NE.npcol ) THEN
120  lcm = ilcm( nprow, npcol )
121  rblkskip = lcm / npcol
122  cblkskip = lcm / nprow
123 *
124 * If I have part of VS, the source vector(s)
125 *
126  IF( mycol.EQ.csrc ) THEN
127 *
128  istart = 1
129 *
130 * Figure my distance from RSRC: the process in RDEST the same
131 * distance from CDEST will want my first block
132 *
133  mydist = mod( nprow+myrow-rsrc, nprow )
134  mp = numroc( m, nb, myrow, rsrc, nprow )
135  icdest = mod( cdest+mydist, npcol )
136 *
137 * Loop over all possible destination processes
138 *
139  DO 20 k = 1, cblkskip
140  jj = 1
141 *
142 * If I am not destination process
143 *
144  IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
145 *
146 * Pack all data I own that destination needs
147 *
148  DO 10 ii = istart, mp, nb*cblkskip
149  jb = min(nb, mp-ii+1)
150  CALL clacpy( 'G', jb, n, vs(ii,1), ldvs,
151  $ work(jj), jb )
152  jj = jj + nb*n
153  10 CONTINUE
154 *
155 * Figure how many rows are to be sent and send them if
156 * necessary (NOTE: will send extra if NB > JB)
157 *
158  jj = jj - 1
159  IF( jj.GT.0 )
160  $ CALL cgesd2d( ictxt, jj, 1, work, jj, rdest,
161  $ icdest )
162 *
163  ELSE
164 *
165 * I am both source and destination, save where to start
166 * copying from for later use.
167 *
168  icpy = istart
169  END IF
170 *
171  istart = istart + nb
172  icdest = mod(icdest+nprow, npcol)
173  20 CONTINUE
174  END IF
175 *
176 * If I should receive info into VD
177 *
178  IF( myrow.EQ.rdest ) THEN
179 *
180  istart = 1
181 *
182 * Figure my distance from CDEST: the process in CSRC the same
183 * distance from RSRC will have my first block.
184 *
185  mydist = mod( npcol+mycol-cdest, npcol )
186  mq = numroc( m, nb, mycol, cdest, npcol )
187  irsrc = mod( rsrc+mydist, nprow )
188  DO 50 k = 1, rblkskip
189 *
190 * If I don't already possess the required data
191 *
192  IF( (mycol.NE.csrc).OR.(myrow.NE.irsrc) ) THEN
193 *
194 * Figure how many rows to receive, and receive them
195 * NOTE: may receive to much -- NB instead of JB
196 *
197  nblocks = (mq - istart + nb) / nb
198  jj = ((nblocks+rblkskip-1) / rblkskip)*nb
199  IF( jj.GT.0 )
200  $ CALL cgerv2d( ictxt, jj, n, work, jj, irsrc, csrc )
201 *
202 * Copy data to destination vector
203 *
204  jj = 1
205  DO 30 ii = istart, mq, nb*rblkskip
206  jb = min( nb, mq-ii+1 )
207  CALL clacpy( 'G', jb, n, work(jj), jb,
208  $ vd(ii,1), ldvd )
209  jj = jj + nb*n
210  30 CONTINUE
211 *
212 * If I am both source and destination
213 *
214  ELSE
215  jj = icpy
216  DO 40 ii = istart, mq, nb*rblkskip
217  jb = min( nb, mq-ii+1 )
218  CALL clacpy( 'G', jb, n, vs(jj,1), ldvs,
219  $ vd(ii,1), ldvd )
220  jj = jj + nb*cblkskip
221  40 CONTINUE
222  END IF
223  istart = istart + nb
224  irsrc = mod( irsrc+npcol, nprow )
225  50 CONTINUE
226  END IF
227 *
228 * If NPROW = NPCOL, there is a one-to-one correspondance between
229 * process rows and columns, so no work space or copying required
230 *
231  ELSE
232 *
233  IF( mycol.EQ.csrc ) THEN
234 *
235 * Figure my distance from RSRC: the process in RDEST the same
236 * distance from CDEST will want my piece of the vector.
237 *
238  mydist = mod( nprow+myrow-rsrc, nprow )
239  mp = numroc( m, nb, myrow, rsrc, nprow )
240  icdest = mod( cdest+mydist, npcol )
241 *
242  IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
243  CALL cgesd2d( ictxt, mp, n, vs, ldvs, rdest, icdest )
244  ELSE
245  CALL clacpy( 'G', mp, n, vs, ldvs, vd, ldvd )
246  END IF
247  END IF
248 *
249  IF( myrow.EQ.rdest ) THEN
250 *
251 * Figure my distance from CDEST: the process in CSRC the same
252 * distance from RSRC will have my piece of the vector.
253 *
254  mydist = mod( npcol+mycol-cdest, npcol )
255  mq = numroc( m, nb, mycol, cdest, npcol )
256  irsrc = mod( rsrc+mydist, nprow )
257 *
258  IF( (myrow.NE.irsrc).OR.(mycol.NE.csrc) )
259  $ CALL cgerv2d( ictxt, mq, n, vd, ldvd, irsrc, csrc )
260 *
261  END IF
262 *
263  END IF
264 *
265  RETURN
266 *
267 * End of PCCOL2ROW
268 *
269  END
pccol2row
subroutine pccol2row(ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, CSRC, RDEST, CDEST, WORK)
Definition: pccol2row.f:3
min
#define min(A, B)
Definition: pcgemr.c:181