SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pdcol2row.f
Go to the documentation of this file.
1 SUBROUTINE pdcol2row( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC,
2 $ CSRC, RDEST, CDEST, WORK)
3*
4* -- ScaLAPACK tools routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
11 $ rdest, rsrc
12* ..
13* .. Array Arguments ..
14 DOUBLE PRECISION VD( LDVD, * ), VS( LDVS, * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* Take a block of vectors with M total rows which are distributed over
21* a column of processes, and distribute those rows over a row of
22* processes. This routine minimizes communication by sending all
23* information it has that a given process in the RDEST needs at once.
24* To do this it uses the least common multiple (LCM) concept. This is
25* simply the realization that if I have part of a vector split over a
26* process column consisting of P processes, and I want to send all of
27* that vector that I own to a new vector distributed over Q processes
28* within a process row, that after I find the process in RDEST that
29* owns the row of the vector I'm currently looking at, he will want
30* every ( (LCM(P,Q) / P ) block of my vector (the block being of size
31* NB x N).
32*
33* Arguments
34* =========
35*
36* Rem: MP, resp. NQ, denotes the number of local rows, resp. local
37* ==== columns, necessary to store a global vector of dimension M
38* across P processes, resp. N over Q processes.
39*
40* ICTXT (global input) INTEGER
41* The BLACS context handle, indicating the global context of
42* the operation. The context itself is global.
43*
44* M (global input) INTEGER
45* The number of global rows each vector has.
46*
47* N (global input) INTEGER
48* The number of vectors in the vector block.
49*
50* NB (global input) INTEGER
51* The blocking factor used to divide the rows of the vector
52* amongst the processes of a column.
53*
54* VS (local input) DOUBLE PRECISION
55* Array of dimension (LDVS,N), the block of vectors stored on
56* process column CSRC to be put into memory VD, and stored
57* on process row RDEST.
58*
59* LDVS (local input) INTEGER
60* The leading dimension of VS, LDVS >= MAX( 1, MP ).
61*
62* VD (local output) DOUBLE PRECISION
63* Array of dimension (LDVD,N), on output, the contents of VS
64* stored on process row RDEST will be here.
65*
66* LDVD (local input) INTEGER
67* The leading dimension of VD, LDVD >= MAX( 1, MQ ).
68*
69* RSRC (global input) INTEGER
70* The process row the distributed block of vectors VS begins
71* on.
72*
73* CSRC (global input) INTEGER
74* The process column VS is distributed over.
75*
76* RDEST (global input) INTEGER
77* The process row to distribute VD over.
78*
79* CDEST (global input) INTEGER
80* The process column that VD begins on.
81*
82* WORK (local workspace) DOUBLE PRECISION
83* Array of dimension (LDW), the required size of work varies:
84* if( nprow.eq.npcol ) then
85* LDW = 0; WORK not accessed.
86* else
87* lcm = least common multiple of process rows and columns.
88* Mp = number of rows of VS on my process.
89* nprow = number of process rows
90* CEIL = the ceiling of given operation
91* LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) )
92* end if
93*
94* =====================================================================
95*
96* .. Local Scalars ..
97 INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB,
98 $ jj, k, lcm, mp, mq, mycol, mydist, myrow,
99 $ nblocks, npcol, nprow, rblkskip
100* ..
101* .. External Subroutines ..
102 EXTERNAL blacs_gridinfo, dgesd2d, dgerv2d, dlacpy
103* ..
104* .. External Functions ..
105 INTEGER ILCM, NUMROC
106 EXTERNAL ilcm, numroc
107* ..
108* .. Executable Statements ..
109*
110* .. Initialize Variables ..
111*
112 icpy = 0
113*
114* Get grid parameters.
115*
116 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
117*
118* If we are not in special case for NPROW = NPCOL where there
119* is no copying required
120*
121 IF( nprow.NE.npcol ) THEN
122 lcm = ilcm( nprow, npcol )
123 rblkskip = lcm / npcol
124 cblkskip = lcm / nprow
125*
126* If I have part of VS, the source vector(s)
127*
128 IF( mycol.EQ.csrc ) THEN
129*
130 istart = 1
131*
132* Figure my distance from RSRC: the process in RDEST the same
133* distance from CDEST will want my first block
134*
135 mydist = mod( nprow+myrow-rsrc, nprow )
136 mp = numroc( m, nb, myrow, rsrc, nprow )
137 icdest = mod( cdest+mydist, npcol )
138*
139* Loop over all possible destination processes
140*
141 DO 20 k = 1, cblkskip
142 jj = 1
143*
144* If I am not destination process
145*
146 IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
147*
148* Pack all data I own that destination needs
149*
150 DO 10 ii = istart, mp, nb*cblkskip
151 jb = min(nb, mp-ii+1)
152 CALL dlacpy( 'G', jb, n, vs(ii,1), ldvs,
153 $ work(jj), jb )
154 jj = jj + nb*n
155 10 CONTINUE
156*
157* Figure how many rows are to be sent and send them if
158* necessary (NOTE: will send extra if NB > JB)
159*
160 jj = jj - 1
161 IF( jj.GT.0 )
162 $ CALL dgesd2d( ictxt, jj, 1, work, jj, rdest,
163 $ icdest )
164*
165 ELSE
166*
167* I am both source and destination, save where to start
168* copying from for later use.
169*
170 icpy = istart
171 END IF
172*
173 istart = istart + nb
174 icdest = mod(icdest+nprow, npcol)
175 20 CONTINUE
176 END IF
177*
178* If I should receive info into VD
179*
180 IF( myrow.EQ.rdest ) THEN
181*
182 istart = 1
183*
184* Figure my distance from CDEST: the process in CSRC the same
185* distance from RSRC will have my first block.
186*
187 mydist = mod( npcol+mycol-cdest, npcol )
188 mq = numroc( m, nb, mycol, cdest, npcol )
189 irsrc = mod( rsrc+mydist, nprow )
190 DO 50 k = 1, rblkskip
191*
192* If I don't already possess the required data
193*
194 IF( (mycol.NE.csrc).OR.(myrow.NE.irsrc) ) THEN
195*
196* Figure how many rows to receive, and receive them
197* NOTE: may receive to much -- NB instead of JB
198*
199 nblocks = (mq - istart + nb) / nb
200 jj = ((nblocks+rblkskip-1) / rblkskip)*nb
201 IF( jj.GT.0 )
202 $ CALL dgerv2d( ictxt, jj, n, work, jj, irsrc, csrc )
203*
204* Copy data to destination vector
205*
206 jj = 1
207 DO 30 ii = istart, mq, nb*rblkskip
208 jb = min( nb, mq-ii+1 )
209 CALL dlacpy( 'G', jb, n, work(jj), jb,
210 $ vd(ii,1), ldvd )
211 jj = jj + nb*n
212 30 CONTINUE
213*
214* If I am both source and destination
215*
216 ELSE
217 jj = icpy
218 DO 40 ii = istart, mq, nb*rblkskip
219 jb = min( nb, mq-ii+1 )
220 CALL dlacpy( 'G', jb, n, vs(jj,1), ldvs,
221 $ vd(ii,1), ldvd )
222 jj = jj + nb*cblkskip
223 40 CONTINUE
224 END IF
225 istart = istart + nb
226 irsrc = mod( irsrc+npcol, nprow )
227 50 CONTINUE
228 END IF
229*
230* If NPROW = NPCOL, there is a one-to-one correspondance between
231* process rows and columns, so no work space or copying required
232*
233 ELSE
234*
235 IF( mycol.EQ.csrc ) THEN
236*
237* Figure my distance from RSRC: the process in RDEST the same
238* distance from CDEST will want my piece of the vector.
239*
240 mydist = mod( nprow+myrow-rsrc, nprow )
241 mp = numroc( m, nb, myrow, rsrc, nprow )
242 icdest = mod( cdest+mydist, npcol )
243*
244 IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
245 CALL dgesd2d( ictxt, mp, n, vs, ldvs, rdest, icdest )
246 ELSE
247 CALL dlacpy( 'G', mp, n, vs, ldvs, vd, ldvd )
248 END IF
249 END IF
250*
251 IF( myrow.EQ.rdest ) THEN
252*
253* Figure my distance from CDEST: the process in CSRC the same
254* distance from RSRC will have my piece of the vector.
255*
256 mydist = mod( npcol+mycol-cdest, npcol )
257 mq = numroc( m, nb, mycol, cdest, npcol )
258 irsrc = mod( rsrc+mydist, nprow )
259*
260 IF( (myrow.NE.irsrc).OR.(mycol.NE.csrc) )
261 $ CALL dgerv2d( ictxt, mq, n, vd, ldvd, irsrc, csrc )
262*
263 END IF
264*
265 END IF
266*
267 RETURN
268*
269* End of PDCOL2ROW
270*
271 END
#define min(A, B)
Definition pcgemr.c:181
subroutine pdcol2row(ictxt, m, n, nb, vs, ldvs, vd, ldvd, rsrc, csrc, rdest, cdest, work)
Definition pdcol2row.f:3