SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pirow2col.f
Go to the documentation of this file.
1 SUBROUTINE pirow2col( ICTXT, M, N, NB, VS, LDVS, VD, LDVD,
2 $ RSRC, CSRC, RDEST, CDEST, WORK)
3*
4* -- ScaLAPACK tools routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER CDEST, CSRC, ICTXT, LDVD, LDVS, M, N, NB,
11 $ rdest, rsrc
12* ..
13* .. Array Arguments ..
14 INTEGER VD( LDVD, * ), VS( LDVS, * ), WORK( * )
15* ..
16*
17* Purpose
18* =======
19*
20* Take a block of vectors with M total rows which are distributed over
21* a row of processes, and distribute those rows over a column of
22* processes. This routine minimizes communication by sending all
23* information it has that a given process in the CDEST needs at once.
24* To do this it uses the least common multiple (LCM) concept. This is
25* simply the realization that if I have part of a vector split over a
26* process row consisting of Q processes, and I want to send all of that
27* vector that I own to a new vector distributed over P processes within
28* a process column, that after I find the process in RDEST that owns
29* the row of the vector I'm currently looking at, he will want every
30* ( (LCM(P,Q)/Q ) block of my vector (the block being of size NB x N).
31*
32* Arguments
33* =========
34*
35* Rem: MP, resp. NQ, denotes the number of local rows, resp. local
36* ==== columns, necessary to store a global vector of dimension M
37* across P processes, resp. N over Q processes.
38*
39* ICTXT (global input) INTEGER
40* The BLACS context handle, indicating the global context of
41* the operation. The context itself is global.
42*
43* M (global input) INTEGER
44* The number of global rows each vector has.
45*
46* N (global input) INTEGER
47* The number of vectors in the vector block
48*
49* NB (global input) INTEGER
50* The blocking factor used to divide the rows of the vector
51* amongst the processes of a row.
52*
53* VS (local input) @(typec)
54* Array of dimension (LDVS,N), the block of vectors stored on
55* process row RSRC to be put into memory VD, and stored on
56* process column CDEST.
57*
58* LDVS (local input) INTEGER
59* The leading dimension of VS.
60*
61* VD (local output) @(typec)
62* Array of dimension (LDVD,N), on output, the contents of VD
63* stored on process column CDEST will be here.
64*
65* LDVD (local input) INTEGER
66* The leading dimension of VD.
67*
68* RSRC (global input) INTEGER
69* The process row VS is distributed over.
70*
71* CSRC (global input) INTEGER
72* The process column the distributed block of vectors VS
73* begins on.
74*
75* RDEST (global input) INTEGER
76* The process row that VD begins on.
77*
78* CDEST (global input) INTEGER
79* The process column to distribute VD over.
80*
81* WORK (local workspace) @(typec)
82* Array, dimension (LDW). The required size of work varies:
83* if( nprow.eq.npcol ) then
84* LDW = 0; WORK not accessed.
85* else
86* lcm = least common multiple of process rows and columns.
87* Mq = number of rows of VS on my process.
88* npcol = number of process columns
89* CEIL = the ceiling of given operation
90* LDW = NB*N*CEIL( CEIL( Mq/NB )/(LCM/npcol) )
91* end if
92*
93* =====================================================================
94*
95* .. Local Scalars ..
96 INTEGER CBLKSKIP, ICPY, II, ISTART, ICSRC, IRDEST, JB,
97 $ jj, k, lcm, mp, mq, mycol, mydist, myrow,
98 $ nblocks, npcol, nprow, rblkskip
99* ..
100* .. External Subroutines ..
101 EXTERNAL blacs_gridinfo, igesd2d, igerv2d, ilacpy
102* ..
103* .. External Functions ..
104 INTEGER ILCM, NUMROC
105 EXTERNAL ilcm, numroc
106* ..
107* .. Executable Statements ..
108*
109*
110* .. Initialize Variables ..
111*
112 icpy = 0
113*
114 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
115*
116* If we are not in special case for NPROW = NPCOL where there is no
117* copying required
118*
119 IF( nprow .NE. npcol ) THEN
120 lcm = ilcm( nprow, npcol )
121 rblkskip = lcm / npcol
122 cblkskip = lcm / nprow
123*
124* If I have part of VS, the source vector(s)
125*
126 IF( myrow.EQ.rsrc ) THEN
127*
128 istart = 1
129*
130* Figure my distance from CSRC: the process in CDEST the same
131* distance from RDEST will want my first block
132*
133 mydist = mod( npcol+mycol-csrc, npcol )
134 mq = numroc( m, nb, mycol, csrc, npcol )
135 irdest = mod( rdest+mydist, nprow )
136*
137* Loop over all possible destination processes
138*
139 DO 20 k = 1, rblkskip
140 jj = 1
141*
142* If I am not destination process
143*
144 IF( (myrow.NE.irdest).OR.(mycol.NE.cdest) ) THEN
145*
146* Pack all data I own that destination needs
147*
148 DO 10 ii = istart, mq, nb*rblkskip
149 jb = min( nb, mq-ii+1 )
150 CALL ilacpy( 'G', jb, n, vs(ii,1), ldvs,
151 $ work(jj), jb )
152 jj = jj + nb*n
153 10 CONTINUE
154*
155* Figure how many rows are to be sent and send them if
156* necessary, NOTE: will send extra if NB > JB
157*
158 jj = jj - 1
159 IF( jj.GT.0 )
160 $ CALL igesd2d( ictxt, jj, 1, work, jj, irdest,
161 $ cdest )
162*
163* I am both source and destination, save where to start
164* copying from for later use
165*
166 ELSE
167 icpy = istart
168 END IF
169*
170 istart = istart + nb
171 irdest = mod( irdest+npcol, nprow )
172 20 CONTINUE
173 END IF
174*
175* If I should receive info into VD
176*
177 IF( mycol.EQ.cdest ) THEN
178*
179 istart = 1
180*
181* Figure my distance from CDEST: the process in CSRC the same
182* distance from RSRC will have my first block
183*
184 mydist = mod( nprow+myrow-rdest, nprow )
185 mp = numroc( m, nb, myrow, rdest, nprow )
186 icsrc = mod( csrc+mydist, npcol )
187*
188* Loop over all sending processes
189*
190 DO 50 k = 1, cblkskip
191*
192* If I don't already possess the required data
193*
194 IF( (myrow.NE.rsrc).OR.(mycol.NE.icsrc) ) THEN
195*
196* Figure how many rows to receive, and receive them
197* NOTE: may receive to much -- NB instead of JB
198*
199 nblocks = (mp - istart + nb) / nb
200 jj = ((nblocks+cblkskip-1) / cblkskip)*nb
201 IF( jj.GT.0 )
202 $ CALL igerv2d( ictxt, jj, n, work, jj, rsrc, icsrc )
203*
204* Copy data to destination vector
205*
206 jj = 1
207 DO 30 ii = istart, mp, nb*cblkskip
208 jb = min( nb, mp-ii+1 )
209 CALL ilacpy( 'G', jb, n, work(jj), jb, vd(ii,1),
210 $ ldvd )
211 jj = jj + nb*n
212 30 CONTINUE
213*
214* If I am both source and destination
215*
216 ELSE
217 jj = icpy
218 DO 40 ii = istart, mp, nb*cblkskip
219 jb = min( nb, mp-ii+1 )
220 CALL ilacpy( 'G', jb, n, vs(jj,1), ldvs, vd(ii,1),
221 $ ldvd )
222 jj = jj + nb*rblkskip
223 40 CONTINUE
224 END IF
225 istart = istart + nb
226 icsrc = mod( icsrc+nprow, npcol )
227 50 CONTINUE
228 END IF
229*
230* if NPROW = NPCOL, there is a one-to-one correspondance between
231* process rows and columns, so no work space or copying required
232*
233 ELSE
234*
235 IF( myrow.EQ.rsrc ) THEN
236*
237* Figure my distance from CSRC: the process in CDEST the same
238* distance from RDEST will want my piece of the vector
239*
240 mydist = mod( npcol+mycol-csrc, npcol )
241 mq = numroc( m, nb, mycol, csrc, npcol )
242 irdest = mod( rdest+mydist, nprow )
243 IF( (myrow.NE.irdest).OR.(mycol.NE.cdest) ) THEN
244 CALL igesd2d( ictxt, mq, n, vs, ldvs, irdest, cdest )
245 ELSE
246 CALL ilacpy( 'G', mq, n, vs, ldvs, vd, ldvd )
247 END IF
248 END IF
249 IF( mycol.EQ.cdest ) THEN
250*
251* Figure my distance from RDEST: the process in RSRC the same
252* distance from CSRC will have my piece of the vector
253*
254 mydist = mod( nprow+myrow-rdest, nprow )
255 mp = numroc( m, nb, myrow, rdest, nprow )
256 icsrc = mod( csrc+mydist, npcol )
257 IF( (mycol.NE.icsrc).OR.(myrow.NE. rsrc) )
258 $ CALL igerv2d( ictxt, mp, n, vd, ldvd, rsrc, icsrc )
259 END IF
260 END IF
261*
262 RETURN
263*
264* End of PIROW2COL
265*
266 END
subroutine ilacpy(uplo, m, n, a, lda, b, ldb)
Definition ilacpy.f:2
#define min(A, B)
Definition pcgemr.c:181
subroutine pirow2col(ictxt, m, n, nb, vs, ldvs, vd, ldvd, rsrc, csrc, rdest, cdest, work)
Definition pirow2col.f:3