ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdlaevswp.f
Go to the documentation of this file.
1 *
2 *
3  SUBROUTINE pdlaevswp( N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY,
4  $ WORK, LWORK )
5 *
6 * -- ScaLAPACK routine (version 1.7) --
7 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8 * and University of California, Berkeley.
9 * April 15, 1997
10 *
11 * .. Scalar Arguments ..
12  INTEGER IZ, JZ, LDZI, LWORK, N
13 * ..
14 * .. Array Arguments ..
15  INTEGER DESCZ( * ), KEY( * ), NVS( * )
16  DOUBLE PRECISION WORK( * ), Z( * ), ZIN( LDZI, * )
17 * ..
18 *
19 * Purpose
20 * =======
21 *
22 * PDLAEVSWP moves the eigenvectors (potentially unsorted) from
23 * where they are computed, to a ScaLAPACK standard block cyclic
24 * array, sorted so that the corresponding eigenvalues are sorted.
25 *
26 * Notes
27 * =====
28 *
29 *
30 * Each global data object is described by an associated description
31 * vector. This vector stores the information required to establish
32 * the mapping between an object element and its corresponding process
33 * and memory location.
34 *
35 * Let A be a generic term for any 2D block cyclicly distributed array.
36 * Such a global array has an associated description vector DESCA.
37 * In the following comments, the character _ should be read as
38 * "of the global array".
39 *
40 * NOTATION STORED IN EXPLANATION
41 * --------------- -------------- --------------------------------------
42 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43 * DTYPE_A = 1.
44 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45 * the BLACS process grid A is distribu-
46 * ted over. The context itself is glo-
47 * bal, but the handle (the integer
48 * value) may vary.
49 * M_A (global) DESCA( M_ ) The number of rows in the global
50 * array A.
51 * N_A (global) DESCA( N_ ) The number of columns in the global
52 * array A.
53 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54 * the rows of the array.
55 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56 * the columns of the array.
57 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58 * row of the array A is distributed.
59 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60 * first column of the array A is
61 * distributed.
62 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63 * array. LLD_A >= MAX(1,LOCr(M_A)).
64 *
65 * Let K be the number of rows or columns of a distributed matrix,
66 * and assume that its process grid has dimension p x q.
67 * LOCr( K ) denotes the number of elements of K that a process
68 * would receive if K were distributed over the p processes of its
69 * process column.
70 * Similarly, LOCc( K ) denotes the number of elements of K that a
71 * process would receive if K were distributed over the q processes of
72 * its process row.
73 * The values of LOCr() and LOCc() may be determined via a call to the
74 * ScaLAPACK tool function, NUMROC:
75 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77 * An upper bound for these quantities may be computed by:
78 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80 *
81 *
82 * Arguments
83 * =========
84 *
85 * NP = the number of rows local to a given process.
86 * NQ = the number of columns local to a given process.
87 *
88 * N (global input) INTEGER
89 * The order of the matrix A. N >= 0.
90 *
91 * ZIN (local input) DOUBLE PRECISION array,
92 * dimension ( LDZI, NVS(iam) )
93 * The eigenvectors on input. Each eigenvector resides entirely
94 * in one process. Each process holds a contiguous set of
95 * NVS(iam) eigenvectors. The first eigenvector which the
96 * process holds is: sum for i=[0,iam-1) of NVS(i)
97 *
98 * LDZI (locl input) INTEGER
99 * leading dimension of the ZIN array
100 *
101 * Z (local output) DOUBLE PRECISION array
102 * global dimension (N, N), local dimension (DESCZ(DLEN_), NQ)
103 * The eigenvectors on output. The eigenvectors are distributed
104 * in a block cyclic manner in both dimensions, with a
105 * block size of NB.
106 *
107 * IZ (global input) INTEGER
108 * Z's global row index, which points to the beginning of the
109 * submatrix which is to be operated on.
110 *
111 * JZ (global input) INTEGER
112 * Z's global column index, which points to the beginning of
113 * the submatrix which is to be operated on.
114 *
115 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
116 * The array descriptor for the distributed matrix Z.
117 *
118 * NVS (global input) INTEGER array, dimension( nprocs+1 )
119 * nvs(i) = number of processes
120 * number of eigenvectors held by processes [0,i-1)
121 * nvs(1) = number of eigen vectors held by [0,1-1) == 0
122 * nvs(nprocs+1) = number of eigen vectors held by [0,nprocs) ==
123 * total number of eigenvectors
124 *
125 * KEY (global input) INTEGER array, dimension( N )
126 * Indicates the actual index (after sorting) for each of the
127 * eigenvectors.
128 *
129 * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK)
130 *
131 * LWORK (local input) INTEGER dimension of WORK
132 * .. Parameters ..
133  INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
134  $ mb_, nb_, rsrc_, csrc_, lld_
135  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
136  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
137  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
138 * ..
139 * .. Local Scalars ..
140  INTEGER CYCLIC_I, CYCLIC_J, DIST, I, IAM, II, INCII, J,
141  $ maxi, maxii, mini, minii, mycol, myrow, nb,
142  $ nbufsize, npcol, nprocs, nprow, pcol, recvcol,
143  $ recvfrom, recvrow, sendcol, sendrow, sendto
144 * ..
145 * .. External Functions ..
146  INTEGER INDXG2L, INDXG2P
147  EXTERNAL indxg2l, indxg2p
148 * ..
149 * .. External Subroutines ..
150  EXTERNAL blacs_gridinfo, dgerv2d, dgesd2d
151 * ..
152 * .. Intrinsic Functions ..
153  INTRINSIC max, min, mod
154 * ..
155 * .. Executable Statements ..
156 * This is just to keep ftnchek happy
157  IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
158  $ rsrc_.LT.0 )RETURN
159  CALL blacs_gridinfo( descz( ctxt_ ), nprow, npcol, myrow, mycol )
160  iam = myrow + mycol*nprow
161  iam = myrow*npcol + mycol
162 *
163  nb = descz( mb_ )
164 *
165  nprocs = nprow*npcol
166 *
167 * If PxSTEIN operates on a sub-matrix of a global matrix, the
168 * key [] that contains the indicies of the eigenvectors is refe-
169 * renced to the dimensions of the sub-matrix and not the global
170 * distrubited matrix. Because of this, PxLAEVSWP will incorrectly
171 * map the eigenvectors to the global eigenvector matrix, Z, unless
172 * the key[] elements are shifted as below.
173 *
174  DO 10 j = descz( n_ ), 1, -1
175  key( j ) = key( j-jz+1 ) + ( jz-1 )
176  10 CONTINUE
177 *
178  DO 110 dist = 0, nprocs - 1
179 *
180  sendto = mod( iam+dist, nprocs )
181  recvfrom = mod( nprocs+iam-dist, nprocs )
182 *
183  sendrow = mod( sendto, nprow )
184  sendcol = sendto / nprow
185  recvrow = mod( recvfrom, nprow )
186  recvcol = recvfrom / nprow
187 *
188  sendrow = sendto / npcol
189  sendcol = mod( sendto, npcol )
190  recvrow = recvfrom / npcol
191  recvcol = mod( recvfrom, npcol )
192 *
193 * Figure out what I have that process "sendto" wants
194 *
195  nbufsize = 0
196 *
197 * We are looping through the eigenvectors that I presently own.
198 *
199  DO 40 j = nvs( 1+iam ) + jz, nvs( 1+iam+1 ) + jz - 1
200  pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
201  $ npcol )
202  IF( sendcol.EQ.pcol ) THEN
203  minii = mod( sendrow+descz( rsrc_ ), nprow )*
204  $ descz( mb_ ) + 1
205  maxii = descz( m_ )
206  incii = descz( mb_ )*nprow
207  DO 30 ii = minii, maxii, incii
208  mini = max( ii, iz )
209  maxi = min( ii+descz( mb_ )-1, n+iz-1 )
210  DO 20 i = mini, maxi, 1
211  nbufsize = nbufsize + 1
212  work( nbufsize ) = zin( i+1-iz,
213  $ j-nvs( 1+iam )+1-jz )
214  20 CONTINUE
215  30 CONTINUE
216  END IF
217  40 CONTINUE
218 *
219 *
220  IF( myrow.NE.sendrow .OR. mycol.NE.sendcol )
221  $ CALL dgesd2d( descz( ctxt_ ), nbufsize, 1, work, nbufsize,
222  $ sendrow, sendcol )
223 *
224 *
225 * Figure out what process "recvfrom" has that I want
226 *
227  nbufsize = 0
228  DO 70 j = nvs( 1+recvfrom ) + jz,
229  $ nvs( 1+recvfrom+1 ) + jz - 1, 1
230  pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
231  $ npcol )
232  IF( mycol.EQ.pcol ) THEN
233  minii = mod( myrow+descz( rsrc_ ), nprow )*descz( mb_ ) +
234  $ 1
235  maxii = descz( m_ )
236  incii = descz( mb_ )*nprow
237  DO 60 ii = minii, maxii, incii
238  mini = max( ii, iz )
239  maxi = min( ii+nb-1, n+iz-1 )
240  DO 50 i = mini, maxi, 1
241  nbufsize = nbufsize + 1
242  50 CONTINUE
243  60 CONTINUE
244  END IF
245  70 CONTINUE
246 *
247 *
248 *
249  IF( myrow.NE.recvrow .OR. mycol.NE.recvcol )
250  $ CALL dgerv2d( descz( ctxt_ ), 1, nbufsize, work, 1, recvrow,
251  $ recvcol )
252 *
253  nbufsize = 0
254  DO 100 j = nvs( 1+recvfrom ) + jz,
255  $ nvs( 1+recvfrom+1 ) + jz - 1, 1
256  pcol = indxg2p( key( j ), descz( nb_ ), -1, descz( csrc_ ),
257  $ npcol )
258  IF( mycol.EQ.pcol ) THEN
259  cyclic_j = indxg2l( key( j ), descz( mb_ ), -1, -1,
260  $ npcol )
261  cyclic_i = 1
262  minii = mod( myrow+descz( rsrc_ ), nprow )*descz( mb_ ) +
263  $ 1
264  maxii = descz( m_ )
265  incii = descz( mb_ )*nprow
266  DO 90 ii = minii, maxii, incii
267  mini = max( ii, iz )
268  cyclic_i = indxg2l( mini, descz( mb_ ), -1, -1,
269  $ nprow )
270  maxi = min( ii+nb-1, n+iz-1 )
271  DO 80 i = mini, maxi, 1
272  nbufsize = nbufsize + 1
273  z( cyclic_i+( cyclic_j-1 )*descz( lld_ ) )
274  $ = work( nbufsize )
275  cyclic_i = cyclic_i + 1
276  80 CONTINUE
277  90 CONTINUE
278  END IF
279  100 CONTINUE
280 *
281  110 CONTINUE
282  RETURN
283 *
284 * End of PDLAEVSWP
285 *
286  END
max
#define max(A, B)
Definition: pcgemr.c:180
pdlaevswp
subroutine pdlaevswp(N, ZIN, LDZI, Z, IZ, JZ, DESCZ, NVS, KEY, WORK, LWORK)
Definition: pdlaevswp.f:5
min
#define min(A, B)
Definition: pcgemr.c:181