SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzcol2row.f
Go to the documentation of this file.
1 SUBROUTINE pzcol2row( ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC,
2 $ CSRC, RDEST, CDEST, WORK)
3*
4* -- ScaLAPACK tools routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
11 $ rdest, rsrc
12* ..
13* .. Array Arguments ..
14 COMPLEX*16 VD( LDVD, * ), VS( LDVS, * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* Take a block of vectors with M total rows which are distributed over
21* a column of processes, and distribute those rows over a row of
22* processes. This routine minimizes communication by sending all
23* information it has that a given process in the RDEST needs at once.
24* To do this it uses the least common multiple (LCM) concept. This is
25* simply the realization that if I have part of a vector split over a
26* process column consisting of P processes, and I want to send all of
27* that vector that I own to a new vector distributed over Q processes
28* within a process row, that after I find the process in RDEST that
29* owns the row of the vector I'm currently looking at, he will want
30* every ( (LCM(P,Q) / P ) block of my vector (the block being of size
31* NB x N).
32*
33* Arguments
34* =========
35*
36* Rem: MP, resp. NQ, denotes the number of local rows, resp. local
37* ==== columns, necessary to store a global vector of dimension M
38* across P processes, resp. N over Q processes.
39*
40* ICTXT (global input) INTEGER
41* The BLACS context handle, indicating the global context of
42* the operation. The context itself is global.
43*
44* M (global input) INTEGER
45* The number of global rows each vector has.
46*
47* N (global input) INTEGER
48* The number of vectors in the vector block.
49*
50* NB (global input) INTEGER
51* The blocking factor used to divide the rows of the vector
52* amongst the processes of a column.
53*
54* VS (local input) COMPLEX*16
55* Array of dimension (LDVS,N), the block of vectors stored on
56* process column CSRC to be put into memory VD, and stored
57* on process row RDEST.
58*
59* LDVS (local input) INTEGER
60* The leading dimension of VS, LDVS >= MAX( 1, MP ).
61*
62* VD (local output) COMPLEX*16
63* Array of dimension (LDVD,N), on output, the contents of VS
64* stored on process row RDEST will be here.
65*
66* LDVD (local input) INTEGER
67* The leading dimension of VD, LDVD >= MAX( 1, MQ ).
68*
69* RSRC (global input) INTEGER
70* The process row the distributed block of vectors VS begins
71* on.
72*
73* CSRC (global input) INTEGER
74* The process column VS is distributed over.
75*
76* RDEST (global input) INTEGER
77* The process row to distribute VD over.
78*
79* CDEST (global input) INTEGER
80* The process column that VD begins on.
81*
82* WORK (local workspace) COMPLEX*16
83* Array of dimension (LDW), the required size of work varies:
84* if( nprow.eq.npcol ) then
85* LDW = 0; WORK not accessed.
86* else
87* lcm = least common multiple of process rows and columns.
88* Mp = number of rows of VS on my process.
89* nprow = number of process rows
90* CEIL = the ceiling of given operation
91* LDW = NB*N*CEIL( CEIL( Mp/NB )/(LCM/nprow) )
92* end if
93*
94* =====================================================================
95*
96* .. Local Scalars ..
97 INTEGER CBLKSKIP, ICPY, ICDEST, II, IRSRC, ISTART, JB,
98 $ jj, k, lcm, mp, mq, mycol, mydist, myrow,
99 $ nblocks, npcol, nprow, rblkskip
100* ..
101* .. External Subroutines ..
102 EXTERNAL blacs_gridinfo, zgesd2d, zgerv2d, zlacpy
103* ..
104* .. External Functions ..
105 INTEGER ILCM, NUMROC
106 EXTERNAL ilcm, numroc
107* ..
108* .. Executable Statements ..
109*
110 icpy = 0
111*
112* Get grid parameters.
113*
114 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
115*
116* If we are not in special case for NPROW = NPCOL where there
117* is no copying required
118*
119 IF( nprow.NE.npcol ) THEN
120 lcm = ilcm( nprow, npcol )
121 rblkskip = lcm / npcol
122 cblkskip = lcm / nprow
123*
124* If I have part of VS, the source vector(s)
125*
126 IF( mycol.EQ.csrc ) THEN
127*
128 istart = 1
129*
130* Figure my distance from RSRC: the process in RDEST the same
131* distance from CDEST will want my first block
132*
133 mydist = mod( nprow+myrow-rsrc, nprow )
134 mp = numroc( m, nb, myrow, rsrc, nprow )
135 icdest = mod( cdest+mydist, npcol )
136*
137* Loop over all possible destination processes
138*
139 DO 20 k = 1, cblkskip
140 jj = 1
141*
142* If I am not destination process
143*
144 IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
145*
146* Pack all data I own that destination needs
147*
148 DO 10 ii = istart, mp, nb*cblkskip
149 jb = min(nb, mp-ii+1)
150 CALL zlacpy( 'G', jb, n, vs(ii,1), ldvs,
151 $ work(jj), jb )
152 jj = jj + nb*n
153 10 CONTINUE
154*
155* Figure how many rows are to be sent and send them if
156* necessary (NOTE: will send extra if NB > JB)
157*
158 jj = jj - 1
159 IF( jj.GT.0 )
160 $ CALL zgesd2d( ictxt, jj, 1, work, jj, rdest,
161 $ icdest )
162*
163 ELSE
164*
165* I am both source and destination, save where to start
166* copying from for later use.
167*
168 icpy = istart
169 END IF
170*
171 istart = istart + nb
172 icdest = mod(icdest+nprow, npcol)
173 20 CONTINUE
174 END IF
175*
176* If I should receive info into VD
177*
178 IF( myrow.EQ.rdest ) THEN
179*
180 istart = 1
181*
182* Figure my distance from CDEST: the process in CSRC the same
183* distance from RSRC will have my first block.
184*
185 mydist = mod( npcol+mycol-cdest, npcol )
186 mq = numroc( m, nb, mycol, cdest, npcol )
187 irsrc = mod( rsrc+mydist, nprow )
188 DO 50 k = 1, rblkskip
189*
190* If I don't already possess the required data
191*
192 IF( (mycol.NE.csrc).OR.(myrow.NE.irsrc) ) THEN
193*
194* Figure how many rows to receive, and receive them
195* NOTE: may receive to much -- NB instead of JB
196*
197 nblocks = (mq - istart + nb) / nb
198 jj = ((nblocks+rblkskip-1) / rblkskip)*nb
199 IF( jj.GT.0 )
200 $ CALL zgerv2d( ictxt, jj, n, work, jj, irsrc, csrc )
201*
202* Copy data to destination vector
203*
204 jj = 1
205 DO 30 ii = istart, mq, nb*rblkskip
206 jb = min( nb, mq-ii+1 )
207 CALL zlacpy( 'G', jb, n, work(jj), jb,
208 $ vd(ii,1), ldvd )
209 jj = jj + nb*n
210 30 CONTINUE
211*
212* If I am both source and destination
213*
214 ELSE
215 jj = icpy
216 DO 40 ii = istart, mq, nb*rblkskip
217 jb = min( nb, mq-ii+1 )
218 CALL zlacpy( 'G', jb, n, vs(jj,1), ldvs,
219 $ vd(ii,1), ldvd )
220 jj = jj + nb*cblkskip
221 40 CONTINUE
222 END IF
223 istart = istart + nb
224 irsrc = mod( irsrc+npcol, nprow )
225 50 CONTINUE
226 END IF
227*
228* If NPROW = NPCOL, there is a one-to-one correspondance between
229* process rows and columns, so no work space or copying required
230*
231 ELSE
232*
233 IF( mycol.EQ.csrc ) THEN
234*
235* Figure my distance from RSRC: the process in RDEST the same
236* distance from CDEST will want my piece of the vector.
237*
238 mydist = mod( nprow+myrow-rsrc, nprow )
239 mp = numroc( m, nb, myrow, rsrc, nprow )
240 icdest = mod( cdest+mydist, npcol )
241*
242 IF( (mycol.NE.icdest).OR.(myrow.NE.rdest) ) THEN
243 CALL zgesd2d( ictxt, mp, n, vs, ldvs, rdest, icdest )
244 ELSE
245 CALL zlacpy( 'G', mp, n, vs, ldvs, vd, ldvd )
246 END IF
247 END IF
248*
249 IF( myrow.EQ.rdest ) THEN
250*
251* Figure my distance from CDEST: the process in CSRC the same
252* distance from RSRC will have my piece of the vector.
253*
254 mydist = mod( npcol+mycol-cdest, npcol )
255 mq = numroc( m, nb, mycol, cdest, npcol )
256 irsrc = mod( rsrc+mydist, nprow )
257*
258 IF( (myrow.NE.irsrc).OR.(mycol.NE.csrc) )
259 $ CALL zgerv2d( ictxt, mq, n, vd, ldvd, irsrc, csrc )
260*
261 END IF
262*
263 END IF
264*
265 RETURN
266*
267* End of PZCOL2ROW
268*
269 END
#define min(A, B)
Definition pcgemr.c:181
subroutine pzcol2row(ictxt, m, n, nb, vs, ldvs, vd, ldvd, rsrc, csrc, rdest, cdest, work)
Definition pzcol2row.f:3