ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pscol2row.f
Go to the documentation of this file.
1  SUBROUTINE pscol2row( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC,
2  $ CSRC, RDEST, CDEST, WORK)
3 *
4 * -- ScaLAPACK tools routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
11  $ rdest, rsrc
12 * ..
13 * .. Array Arguments ..
14  REAL VD( LDVD, * ), VS( LDVS, * ), WORK( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * Take a block of vectors with M total rows which are distributed over
21 * a column of processes, and distribute those rows over a row of
22 * processes. This routine minimizes communication by sending all
23 * information it has that a given process in the RDEST needs at once.
24 * To do this it uses the least common multiple (LCM) concept. This is
25 * simply the realization that if I have part of a vector split over a
26 * process column consisting of P processes, and I want to send all of
27 * that vector that I own to a new vector distributed over Q processes
28 * within a process row, that after I find the process in RDEST that
29 * owns the row of the vector I'm currently looking at, he will want
30 * every ( (LCM(P,Q) / P ) block of my vector (the block being of size
31 * NB x N).
32 *
33 * Arguments
34 * =========
35 *
36 * Rem: MP, resp. NQ, denotes the number of local rows, resp. local
37 * ==== columns, necessary to store a global vector of dimension M
38 * across P processes, resp. N over Q processes.
39 *
40 * ICTXT (global input) INTEGER
41 * The BLACS context handle, indicating the global context of
42 * the operation. The context itself is global.
43 *
44 * M (global input) INTEGER
45 * The number of global rows each vector has.
46 *
47 * N (global input) INTEGER
48 * The number of vectors in the vector block.
49 *
50 * NB (global input) INTEGER
51 * The blocking factor used to divide the rows of the vector
52 * amongst the processes of a column.
53 *
54 * VS (local input) REAL
55 * Array of dimension (LDVS,N), the block of vectors stored on
56 * process column CSRC to be put into memory VD, and stored
57 * on process row RDEST.
58 *
59 * LDVS (local input) INTEGER
60 * The leading dimension of VS, LDVS >= MAX( 1, MP ).
61 *
62 * VD (local output) REAL
63 * Array of dimension (LDVD,N), on output, the contents of VS
64 * stored on process row RDEST will be here.
65 *
66 * LDVD (local input) INTEGER
67 * The leading dimension of VD, LDVD >= MAX( 1, MQ ).
68 *
69 * RSRC (global input) INTEGER
70 * The process row the distributed block of vectors VS begins
71 * on.
72 *
73 * CSRC (global input) INTEGER
74 * The process column VS is distributed over.
75 *
76 * RDEST (global input) INTEGER
77 * The process row to distribute VD over.
78 *
79 * CDEST (global input) INTEGER
80 * The process column that VD begins on.
81 *
82 * WORK (local workspace) REAL
83 * Array of dimension (LDW), the required size of work varies:
84 * if( nprow.eq.npcol ) then
85 * LDW = 0; WORK not accessed.
86 * else
87 * lcm = least common multiple of process rows and columns.
88 * Mp = number of rows of VS on my process.
89 * nprow = number of process rows
90 * CEIL = the ceiling of given operation
91 * LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) )
92 * end if
93 *
94 * =====================================================================
95 *
96 * .. Local Scalars ..
97  INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB,
98  $ jj, k, lcm, mp, mq, mycol, mydist, myrow,
99  $ nblocks, npcol, nprow, rblkskip
100 * ..
101 * .. External Subroutines ..
102  EXTERNAL blacs_gridinfo, sgesd2d, sgerv2d, slacpy
103 * ..
104 * .. External Functions ..
105  INTEGER ILCM, NUMROC
106  EXTERNAL ilcm, numroc
107 * ..
108 * .. Executable Statements ..
109 *
110 *
111 * .. Initialize Variables ..
112 *
113  icpy = 0
114 *
115 * Get grid parameters.
116 *
117  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
118 *
119 * If we are not in special case for NPROW = NPCOL where there
120 * is no copying required
121 *
122  IF( nprow.NE.npcol ) THEN
123  lcm = ilcm( nprow, npcol )
124  rblkskip = lcm / npcol
125  cblkskip = lcm / nprow
126 *
127 * If I have part of VS, the source vector(s)
128 *
129  IF( mycol.EQ.csrc ) THEN
130 *
131  istart = 1
132 *
133 * Figure my distance from RSRC: the process in RDEST the same
134 * distance from CDEST will want my first block
135 *
136  mydist = mod( nprow+myrow-rsrc, nprow )
137  mp = numroc( m, nb, myrow, rsrc, nprow )
138  icdest = mod( cdest+mydist, npcol )
139 *
140 * Loop over all possible destination processes
141 *
142  DO 20 k = 1, cblkskip
143  jj = 1
144 *
145 * If I am not destination process
146 *
147  IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
148 *
149 * Pack all data I own that destination needs
150 *
151  DO 10 ii = istart, mp, nb*cblkskip
152  jb = min(nb, mp-ii+1)
153  CALL slacpy( 'G', jb, n, vs(ii,1), ldvs,
154  $ work(jj), jb )
155  jj = jj + nb*n
156  10 CONTINUE
157 *
158 * Figure how many rows are to be sent and send them if
159 * necessary (NOTE: will send extra if NB > JB)
160 *
161  jj = jj - 1
162  IF( jj.GT.0 )
163  $ CALL sgesd2d( ictxt, jj, 1, work, jj, rdest,
164  $ icdest )
165 *
166  ELSE
167 *
168 * I am both source and destination, save where to start
169 * copying from for later use.
170 *
171  icpy = istart
172  END IF
173 *
174  istart = istart + nb
175  icdest = mod(icdest+nprow, npcol)
176  20 CONTINUE
177  END IF
178 *
179 * If I should receive info into VD
180 *
181  IF( myrow.EQ.rdest ) THEN
182 *
183  istart = 1
184 *
185 * Figure my distance from CDEST: the process in CSRC the same
186 * distance from RSRC will have my first block.
187 *
188  mydist = mod( npcol+mycol-cdest, npcol )
189  mq = numroc( m, nb, mycol, cdest, npcol )
190  irsrc = mod( rsrc+mydist, nprow )
191  DO 50 k = 1, rblkskip
192 *
193 * If I don't already possess the required data
194 *
195  IF( (mycol.NE.csrc).OR.(myrow.NE.irsrc) ) THEN
196 *
197 * Figure how many rows to receive, and receive them
198 * NOTE: may receive to much -- NB instead of JB
199 *
200  nblocks = (mq - istart + nb) / nb
201  jj = ((nblocks+rblkskip-1) / rblkskip)*nb
202  IF( jj.GT.0 )
203  $ CALL sgerv2d( ictxt, jj, n, work, jj, irsrc, csrc )
204 *
205 * Copy data to destination vector
206 *
207  jj = 1
208  DO 30 ii = istart, mq, nb*rblkskip
209  jb = min( nb, mq-ii+1 )
210  CALL slacpy( 'G', jb, n, work(jj), jb,
211  $ vd(ii,1), ldvd )
212  jj = jj + nb*n
213  30 CONTINUE
214 *
215 * If I am both source and destination
216 *
217  ELSE
218  jj = icpy
219  DO 40 ii = istart, mq, nb*rblkskip
220  jb = min( nb, mq-ii+1 )
221  CALL slacpy( 'G', jb, n, vs(jj,1), ldvs,
222  $ vd(ii,1), ldvd )
223  jj = jj + nb*cblkskip
224  40 CONTINUE
225  END IF
226  istart = istart + nb
227  irsrc = mod( irsrc+npcol, nprow )
228  50 CONTINUE
229  END IF
230 *
231 * If NPROW = NPCOL, there is a one-to-one correspondance between
232 * process rows and columns, so no work space or copying required
233 *
234  ELSE
235 *
236  IF( mycol.EQ.csrc ) THEN
237 *
238 * Figure my distance from RSRC: the process in RDEST the same
239 * distance from CDEST will want my piece of the vector.
240 *
241  mydist = mod( nprow+myrow-rsrc, nprow )
242  mp = numroc( m, nb, myrow, rsrc, nprow )
243  icdest = mod( cdest+mydist, npcol )
244 *
245  IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
246  CALL sgesd2d( ictxt, mp, n, vs, ldvs, rdest, icdest )
247  ELSE
248  CALL slacpy( 'G', mp, n, vs, ldvs, vd, ldvd )
249  END IF
250  END IF
251 *
252  IF( myrow.EQ.rdest ) THEN
253 *
254 * Figure my distance from CDEST: the process in CSRC the same
255 * distance from RSRC will have my piece of the vector.
256 *
257  mydist = mod( npcol+mycol-cdest, npcol )
258  mq = numroc( m, nb, mycol, cdest, npcol )
259  irsrc = mod( rsrc+mydist, nprow )
260 *
261  IF( (myrow.NE.irsrc).OR.(mycol.NE.csrc) )
262  $ CALL sgerv2d( ictxt, mq, n, vd, ldvd, irsrc, csrc )
263 *
264  END IF
265 *
266  END IF
267 *
268  RETURN
269 *
270 * End of PSCOL2ROW
271 *
272  END
pscol2row
subroutine pscol2row(ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, CSRC, RDEST, CDEST, WORK)
Definition: pscol2row.f:3
min
#define min(A, B)
Definition: pcgemr.c:181