SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pclaevswp.f
Go to the documentation of this file.
1*
2*
3 SUBROUTINE pclaevswp( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY,
4 $ RWORK, LRWORK )
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* April 15, 1997
10*
11* .. Scalar Arguments ..
12 INTEGER IZ, JZ, LDZI, LRWORK, N
13* ..
14* .. Array Arguments ..
15 INTEGER DESCZ( * ), KEY( * ), NVS( * )
16 REAL RWORK( * ), ZIN( LDZI, * )
17 COMPLEX Z( * )
18* ..
19*
20* Purpose
21* =======
22*
23* PCLAEVSWP moves the eigenvectors (potentially unsorted) from
24* where they are computed, to a ScaLAPACK standard block cyclic
25* array, sorted so that the corresponding eigenvalues are sorted.
26*
27* Notes
28* =====
29*
30*
31* Each global data object is described by an associated description
32* vector. This vector stores the information required to establish
33* the mapping between an object element and its corresponding process
34* and memory location.
35*
36* Let A be a generic term for any 2D block cyclicly distributed array.
37* Such a global array has an associated description vector DESCA.
38* In the following comments, the character _ should be read as
39* "of the global array".
40*
41* NOTATION STORED IN EXPLANATION
42* --------------- -------------- --------------------------------------
43* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
44* DTYPE_A = 1.
45* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
46* the BLACS process grid A is distribu-
47* ted over. The context itself is glo-
48* bal, but the handle (the integer
49* value) may vary.
50* M_A (global) DESCA( M_ ) The number of rows in the global
51* array A.
52* N_A (global) DESCA( N_ ) The number of columns in the global
53* array A.
54* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
55* the rows of the array.
56* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
57* the columns of the array.
58* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
59* row of the array A is distributed.
60* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
61* first column of the array A is
62* distributed.
63* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
64* array. LLD_A >= MAX(1,LOCr(M_A)).
65*
66* Let K be the number of rows or columns of a distributed matrix,
67* and assume that its process grid has dimension p x q.
68* LOCr( K ) denotes the number of elements of K that a process
69* would receive if K were distributed over the p processes of its
70* process column.
71* Similarly, LOCc( K ) denotes the number of elements of K that a
72* process would receive if K were distributed over the q processes of
73* its process row.
74* The values of LOCr() and LOCc() may be determined via a call to the
75* ScaLAPACK tool function, NUMROC:
76* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
77* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
78* An upper bound for these quantities may be computed by:
79* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
80* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
81*
82*
83* Arguments
84* =========
85*
86* NP = the number of rows local to a given process.
87* NQ = the number of columns local to a given process.
88*
89* N (global input) INTEGER
90* The order of the matrix A. N >= 0.
91*
92* ZIN (local input) REAL array,
93* dimension ( LDZI, NVS(iam) )
94* The eigenvectors on input. Each eigenvector resides entirely
95* in one process. Each process holds a contiguous set of
96* NVS(iam) eigenvectors. The first eigenvector which the
97* process holds is: sum for i=[0,iam-1) of NVS(i)
98*
99* LDZI (locl input) INTEGER
100* leading dimension of the ZIN array
101*
102* Z (local output) COMPLEX array
103* global dimension (N, N), local dimension (DESCZ(DLEN_), NQ)
104* The eigenvectors on output. The eigenvectors are distributed
105* in a block cyclic manner in both dimensions, with a
106* block size of NB.
107*
108* IZ (global input) INTEGER
109* Z's global row index, which points to the beginning of the
110* submatrix which is to be operated on.
111*
112* JZ (global input) INTEGER
113* Z's global column index, which points to the beginning of
114* the submatrix which is to be operated on.
115*
116* DESCZ (global and local input) INTEGER array of dimension DLEN_.
117* The array descriptor for the distributed matrix Z.
118*
119* NVS (global input) INTEGER array, dimension( nprocs+1 )
120* nvs(i) = number of processes
121* number of eigenvectors held by processes [0,i-1)
122* nvs(1) = number of eigen vectors held by [0,1-1) == 0
123* nvs(nprocs+1) = number of eigen vectors held by [0,nprocs) ==
124* total number of eigenvectors
125*
126* KEY (global input) INTEGER array, dimension( N )
127* Indicates the actual index (after sorting) for each of the
128* eigenvectors.
129*
130* RWORK (local workspace) REAL array, dimension (LRWORK)
131*
132* LRWORK (local input) INTEGER dimension of RWORK
133* .. Parameters ..
134 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
135 $ mb_, nb_, rsrc_, csrc_, lld_
136 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
137 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
138 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
139* ..
140* .. Local Scalars ..
141 INTEGER CYCLIC_I, CYCLIC_J, DIST, I, IAM, II, INCII, J,
142 $ maxi, maxii, mini, minii, mycol, myrow, nb,
143 $ nbufsize, npcol, nprocs, nprow, pcol, recvcol,
144 $ recvfrom, recvrow, sendcol, sendrow, sendto
145* ..
146* .. External Functions ..
147 INTEGER INDXG2L, INDXG2P
148 EXTERNAL indxg2l, indxg2p
149* ..
150* .. External Subroutines ..
151 EXTERNAL blacs_gridinfo, sgerv2d, sgesd2d
152* ..
153* .. Intrinsic Functions ..
154 INTRINSIC cmplx, max, min, mod
155* ..
156* .. Executable Statements ..
157* This is just to keep ftnchek happy
158 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
159 $ rsrc_.LT.0 )RETURN
160 CALL blacs_gridinfo( descz( ctxt_ ), nprow, npcol, myrow, mycol )
161 iam = myrow + mycol*nprow
162 iam = myrow*npcol + mycol
163*
164 nb = descz( mb_ )
165*
166 nprocs = nprow*npcol
167*
168* If PxSTEIN operates on a sub-matrix of a global matrix, the
169* key [] that contains the indicies of the eigenvectors is refe-
170* renced to the dimensions of the sub-matrix and not the global
171* distrubited matrix. Because of this, PxLAEVSWP will incorrectly
172* map the eigenvectors to the global eigenvector matrix, Z, unless
173* the key[] elements are shifted as below.
174*
175 DO 10 j = descz( n_ ), 1, -1
176 key( j ) = key( j-jz+1 ) + ( jz-1 )
177 10 CONTINUE
178*
179 DO 110 dist = 0, nprocs - 1
180*
181 sendto = mod( iam+dist, nprocs )
182 recvfrom = mod( nprocs+iam-dist, nprocs )
183*
184 sendrow = mod( sendto, nprow )
185 sendcol = sendto / nprow
186 recvrow = mod( recvfrom, nprow )
187 recvcol = recvfrom / nprow
188*
189 sendrow = sendto / npcol
190 sendcol = mod( sendto, npcol )
191 recvrow = recvfrom / npcol
192 recvcol = mod( recvfrom, npcol )
193*
194* Figure out what I have that process "sendto" wants
195*
196 nbufsize = 0
197*
198* We are looping through the eigenvectors that I presently own.
199*
200 DO 40 j = nvs( 1+iam ) + jz, nvs( 1+iam+1 ) + jz - 1
201 pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
202 $ npcol )
203 IF( sendcol.EQ.pcol ) THEN
204 minii = mod( sendrow+descz( rsrc_ ), nprow )*
205 $ descz( mb_ ) + 1
206 maxii = descz( m_ )
207 incii = descz( mb_ )*nprow
208 DO 30 ii = minii, maxii, incii
209 mini = max( ii, iz )
210 maxi = min( ii+descz( mb_ )-1, n+iz-1 )
211 DO 20 i = mini, maxi, 1
212 nbufsize = nbufsize + 1
213 rwork( nbufsize ) = zin( i+1-iz,
214 $ j-nvs( 1+iam )+1-jz )
215 20 CONTINUE
216 30 CONTINUE
217 END IF
218 40 CONTINUE
219*
220*
221 IF( myrow.NE.sendrow .OR. mycol.NE.sendcol )
222 $ CALL sgesd2d( descz( ctxt_ ), nbufsize, 1, rwork, nbufsize,
223 $ sendrow, sendcol )
224*
225*
226* Figure out what process "recvfrom" has that I want
227*
228 nbufsize = 0
229 DO 70 j = nvs( 1+recvfrom ) + jz,
230 $ nvs( 1+recvfrom+1 ) + jz - 1, 1
231 pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
232 $ npcol )
233 IF( mycol.EQ.pcol ) THEN
234 minii = mod( myrow+descz( rsrc_ ), nprow )*descz( mb_ ) +
235 $ 1
236 maxii = descz( m_ )
237 incii = descz( mb_ )*nprow
238 DO 60 ii = minii, maxii, incii
239 mini = max( ii, iz )
240 maxi = min( ii+nb-1, n+iz-1 )
241 DO 50 i = mini, maxi, 1
242 nbufsize = nbufsize + 1
243 50 CONTINUE
244 60 CONTINUE
245 END IF
246 70 CONTINUE
247*
248*
249*
250 IF( myrow.NE.recvrow .OR. mycol.NE.recvcol )
251 $ CALL sgerv2d( descz( ctxt_ ), 1, nbufsize, rwork, 1,
252 $ recvrow, recvcol )
253*
254 nbufsize = 0
255 DO 100 j = nvs( 1+recvfrom ) + jz,
256 $ nvs( 1+recvfrom+1 ) + jz - 1, 1
257 pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
258 $ npcol )
259 IF( mycol.EQ.pcol ) THEN
260 cyclic_j = indxg2l( key( j ), descz( mb_ ), -1, -1,
261 $ npcol )
262 cyclic_i = 1
263 minii = mod( myrow+descz( rsrc_ ), nprow )*descz( mb_ ) +
264 $ 1
265 maxii = descz( m_ )
266 incii = descz( mb_ )*nprow
267 DO 90 ii = minii, maxii, incii
268 mini = max( ii, iz )
269 cyclic_i = indxg2l( mini, descz( mb_ ), -1, -1,
270 $ nprow )
271 maxi = min( ii+nb-1, n+iz-1 )
272 DO 80 i = mini, maxi, 1
273 nbufsize = nbufsize + 1
274 z( cyclic_i+( cyclic_j-1 )*descz( lld_ ) )
275 $ = cmplx( rwork( nbufsize ) )
276 cyclic_i = cyclic_i + 1
277 80 CONTINUE
278 90 CONTINUE
279 END IF
280 100 CONTINUE
281*
282 110 CONTINUE
283 RETURN
284*
285* End of PCLAEVSWP
286*
287 END
float cmplx[2]
Definition pblas.h:136
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pclaevswp(n, zin, ldzi, z, iz, jz, descz, nvs, key, rwork, lrwork)
Definition pclaevswp.f:5