ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pclapiv.f
Go to the documentation of this file.
1  SUBROUTINE pclapiv( DIREC, ROWCOL, PIVROC, M, N, A, IA, JA,
2  $ DESCA, IPIV, IP, JP, DESCIP, IWORK )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * November 15, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER*1 DIREC, PIVROC, ROWCOL
11  INTEGER IA, IP, JA, JP, M, N
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * ), DESCIP( * ), IPIV( * ), IWORK( * )
15  COMPLEX A( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PCLAPIV applies either P (permutation matrix indicated by IPIV)
22 * or inv( P ) to a general M-by-N distributed matrix
23 * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), resulting in row or column
24 * pivoting. The pivot vector may be distributed across a process row
25 * or a column. The pivot vector should be aligned with the distributed
26 * matrix A. This routine will transpose the pivot vector if necessary.
27 * For example if the row pivots should be applied to the columns of
28 * sub( A ), pass ROWCOL='C' and PIVROC='C'.
29 *
30 * Notes
31 * =====
32 *
33 * Each global data object is described by an associated description
34 * vector. This vector stores the information required to establish
35 * the mapping between an object element and its corresponding process
36 * and memory location.
37 *
38 * Let A be a generic term for any 2D block cyclicly distributed array.
39 * Such a global array has an associated description vector DESCA.
40 * In the following comments, the character _ should be read as
41 * "of the global array".
42 *
43 * NOTATION STORED IN EXPLANATION
44 * --------------- -------------- --------------------------------------
45 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46 * DTYPE_A = 1.
47 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48 * the BLACS process grid A is distribu-
49 * ted over. The context itself is glo-
50 * bal, but the handle (the integer
51 * value) may vary.
52 * M_A (global) DESCA( M_ ) The number of rows in the global
53 * array A.
54 * N_A (global) DESCA( N_ ) The number of columns in the global
55 * array A.
56 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57 * the rows of the array.
58 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59 * the columns of the array.
60 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61 * row of the array A is distributed.
62 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63 * first column of the array A is
64 * distributed.
65 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66 * array. LLD_A >= MAX(1,LOCr(M_A)).
67 *
68 * Let K be the number of rows or columns of a distributed matrix,
69 * and assume that its process grid has dimension p x q.
70 * LOCr( K ) denotes the number of elements of K that a process
71 * would receive if K were distributed over the p processes of its
72 * process column.
73 * Similarly, LOCc( K ) denotes the number of elements of K that a
74 * process would receive if K were distributed over the q processes of
75 * its process row.
76 * The values of LOCr() and LOCc() may be determined via a call to the
77 * ScaLAPACK tool function, NUMROC:
78 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80 * An upper bound for these quantities may be computed by:
81 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83 *
84 * Restrictions
85 * ============
86 *
87 * IPIV must always be a distributed vector (not a matrix). Thus:
88 * IF( ROWPIV .EQ. 'C' ) THEN
89 * JP must be 1
90 * ELSE
91 * IP must be 1
92 * END IF
93 *
94 * The following restrictions apply when IPIV must be transposed:
95 * IF( ROWPIV.EQ.'C' .AND. PIVROC.EQ.'C') THEN
96 * DESCIP(MB_) must equal DESCA(NB_)
97 * ELSE IF( ROWPIV.EQ.'R" .AND. PIVROC.EQ.'R') THEN
98 * DESCIP(NB_) must equal DESCA(MB_)
99 * END IF
100 *
101 * Arguments
102 * =========
103 *
104 * DIREC (global input) CHARACTER*1
105 * Specifies in which order the permutation is applied:
106 * = 'F' (Forward) Applies pivots Forward from top of matrix.
107 * Computes P*sub( A ).
108 * = 'B' (Backward) Applies pivots Backward from bottom of
109 * matrix. Computes inv( P )*sub( A ).
110 *
111 * ROWCOL (global input) CHARACTER*1
112 * Specifies if the rows or columns are to be permuted:
113 * = 'R' Rows will be permuted,
114 * = 'C' Columns will be permuted.
115 *
116 * PIVROC (global input) CHARACTER*1
117 * Specifies whether IPIV is distributed over a process row
118 * or column:
119 * = 'R' IPIV distributed over a process row
120 * = 'C' IPIV distributed over a process column
121 *
122 * M (global input) INTEGER
123 * The number of rows to be operated on, i.e. the number of
124 * rows of the distributed submatrix sub( A ). M >= 0.
125 *
126 * N (global input) INTEGER
127 * The number of columns to be operated on, i.e. the number of
128 * columns of the distributed submatrix sub( A ). N >= 0.
129 *
130 * A (local input/local output) COMPLEX pointer into the
131 * local memory to an array of dimension (LLD_A, LOCc(JA+N-1)).
132 * On entry, this array contains the local pieces of the
133 * distributed submatrix sub( A ) to which the row or column
134 * interchanges will be applied. On exit, the local pieces
135 * of the permuted distributed submatrix.
136 *
137 * IA (global input) INTEGER
138 * The row index in the global array A indicating the first
139 * row of sub( A ).
140 *
141 * JA (global input) INTEGER
142 * The column index in the global array A indicating the
143 * first column of sub( A ).
144 *
145 * DESCA (global and local input) INTEGER array of dimension DLEN_.
146 * The array descriptor for the distributed matrix A.
147 *
148 * IPIV (local input) INTEGER array, dimension (LIPIV) where LIPIV is
149 * when ROWCOL='R' or 'r':
150 * >= LOCr( IA+M-1 ) + MB_A if PIVROC='C' or 'c',
151 * >= LOCc( M + MOD(JP-1,NB_P) ) if PIVROC='R' or 'r', and,
152 * when ROWCOL='C' or 'c':
153 * >= LOCr( N + MOD(IP-1,MB_P) ) if PIVROC='C' or 'c',
154 * >= LOCc( JA+N-1 ) + NB_A if PIVROC='R' or 'r'.
155 * This array contains the pivoting information. IPIV(i) is the
156 * global row (column), local row (column) i was swapped with.
157 * When ROWCOL='R' or 'r' and PIVROC='C' or 'c', or ROWCOL='C'
158 * or 'c' and PIVROC='R' or 'r', the last piece of this array of
159 * size MB_A (resp. NB_A) is used as workspace. In those cases,
160 * this array is tied to the distributed matrix A.
161 *
162 * IP (global input) INTEGER
163 * The row index in the global array P indicating the first
164 * row of sub( P ).
165 *
166 * JP (global input) INTEGER
167 * The column index in the global array P indicating the
168 * first column of sub( P ).
169 *
170 * DESCIP (global and local input) INTEGER array of dimension DLEN_.
171 * The array descriptor for the distributed vector IPIV.
172 *
173 * IWORK (local workspace) INTEGER array, dimension (LDW)
174 * where LDW is equal to the workspace necessary for
175 * transposition, and the storage of the tranposed IPIV:
176 *
177 * Let LCM be the least common multiple of NPROW and NPCOL.
178 * IF( ROWCOL.EQ.'R' .AND. PIVROC.EQ.'R' ) THEN
179 * IF( NPROW.EQ.NPCOL ) THEN
180 * LDW = LOCr( N_P + MOD(JP-1, NB_P) ) + NB_P
181 * ELSE
182 * LDW = LOCr( N_P + MOD(JP-1, NB_P) ) +
183 * NB_P * CEIL( CEIL(LOCc(N_P)/NB_P) / (LCM/NPCOL) )
184 * END IF
185 * ELSE IF( ROWCOL.EQ.'C' .AND. PIVROC.EQ.'C' ) THEN
186 * IF( NPROW.EQ.NPCOL ) THEN
187 * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) + MB_P
188 * ELSE
189 * LDW = LOCc( M_P + MOD(IP-1, MB_P) ) +
190 * MB_P * CEIL( CEIL(LOCr(M_P)/MB_P) / (LCM/NPROW) )
191 * END IF
192 * ELSE
193 * IWORK is not referenced.
194 * END IF
195 *
196 * =====================================================================
197 *
198 * .. Parameters ..
199  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
200  $ lld_, mb_, m_, nb_, n_, rsrc_
201  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
202  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
203  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
204 * ..
205 * .. Local Scalars ..
206  LOGICAL ROWPVT
207  INTEGER I, ICTXT, ICURCOL, ICURROW, IIP, ITMP, IPT,
208  $ jjp, jpt, mycol, myrow, npcol, nprow
209 * ..
210 * .. Local Arrays ..
211  INTEGER DESCPT( DLEN_ )
212 * ..
213 * .. External Subroutines ..
214  EXTERNAL blacs_gridinfo, igebr2d, igebs2d,
216 * ..
217 * .. External Functions ..
218  LOGICAL LSAME
219  INTEGER NUMROC, INDXG2P
220  EXTERNAL lsame, numroc, indxg2p
221 * ..
222 * .. Intrinsic Functions ..
223  INTRINSIC max, mod
224 * ..
225 * .. Executable Statements ..
226 *
227 * Get grid parameters
228 *
229  ictxt = desca( ctxt_ )
230  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
231  rowpvt = lsame( rowcol, 'R' )
232 *
233 * If we're pivoting the rows of sub( A )
234 *
235  IF( rowpvt ) THEN
236  IF( m.LE.1 .OR. n.LT.1 )
237  $ RETURN
238 *
239 * If the pivot vector is already distributed correctly
240 *
241  IF( lsame( pivroc, 'C' ) ) THEN
242  CALL pclapv2( direc, rowcol, m, n, a, ia, ja, desca, ipiv,
243  $ ip, jp, descip )
244 *
245 * Otherwise, we must redistribute IPIV to match PCLAPV2
246 *
247  ELSE
248 *
249 * Take IPIV distributed over row 0, and store it in
250 * iwork, distributed over column 0
251 *
252  ipt = mod( jp-1, desca(mb_) )
253  descpt(m_) = m + ipt + nprow*desca(mb_)
254  descpt(n_) = 1
255  descpt(mb_) = desca(mb_)
256  descpt(nb_) = 1
257  descpt(rsrc_) = indxg2p( ia, desca(mb_), ia, desca(rsrc_),
258  $ nprow )
259  descpt(csrc_) = mycol
260  descpt(ctxt_) = ictxt
261  descpt(lld_) = numroc( descpt(m_), descpt(mb_), myrow,
262  $ descpt(rsrc_), nprow )
263  itmp = numroc( descip(n_), descip(nb_), mycol,
264  $ descip(csrc_), npcol )
265  CALL infog2l( ip, jp-ipt, descip, nprow, npcol, myrow,
266  $ mycol, iip, jjp, icurrow, icurcol )
267  CALL pirow2col( ictxt, m+ipt, 1, descip(nb_), ipiv(jjp),
268  $ itmp, iwork, descpt(lld_), 0, icurcol,
269  $ descpt(rsrc_),
270  $ mycol, iwork(descpt(lld_)-descpt(mb_)+1) )
271 *
272 * Send column-distributed pivots to all columns
273 *
274  itmp = descpt(lld_) - descpt(mb_)
275  IF( mycol.EQ.0 ) THEN
276  CALL igebs2d( ictxt, 'Row', ' ', itmp, 1, iwork, itmp )
277  ELSE
278  CALL igebr2d( ictxt, 'Row', ' ', itmp, 1, iwork, itmp,
279  $ myrow, 0 )
280  END IF
281 *
282 * Adjust pivots so they are relative to the start of IWORK,
283 * not IPIV
284 *
285  ipt = ipt + 1
286  DO 10 i = 1, itmp
287  iwork(i) = iwork(i) - jp + ipt
288  10 CONTINUE
289  CALL pclapv2( direc, rowcol, m, n, a, ia, ja, desca, iwork,
290  $ ipt, 1, descpt )
291  END IF
292 *
293 * Otherwise, we're pivoting the columns of sub( A )
294 *
295  ELSE
296  IF( m.LT.1 .OR. n.LE.1 )
297  $ RETURN
298 *
299 * If the pivot vector is already distributed correctly
300 *
301  IF( lsame( pivroc, 'R' ) ) THEN
302  CALL pclapv2( direc, rowcol, m, n, a, ia, ja, desca, ipiv,
303  $ ip, jp, descip )
304 *
305 * Otherwise, we must redistribute IPIV to match PCLAPV2
306 *
307  ELSE
308 *
309 * Take IPIV distributed over column 0, and store it in
310 * iwork, distributed over row 0
311 *
312  jpt = mod( ip-1, desca(nb_) )
313  descpt(m_) = 1
314  descpt(n_) = n + jpt + npcol*desca(nb_)
315  descpt(mb_) = 1
316  descpt(nb_) = desca(nb_)
317  descpt(rsrc_) = myrow
318  descpt(csrc_) = indxg2p( ja, desca(nb_), ja, desca(csrc_),
319  $ npcol )
320  descpt(ctxt_) = ictxt
321  descpt(lld_) = 1
322  CALL infog2l( ip-jpt, jp, descip, nprow, npcol, myrow,
323  $ mycol, iip, jjp, icurrow, icurcol )
324  itmp = numroc( n+jpt, descpt(nb_), mycol, descpt(csrc_),
325  $ npcol )
326  CALL picol2row( ictxt, n+jpt, 1, descip(mb_), ipiv(iip),
327  $ descip(lld_), iwork, max(1, itmp), icurrow,
328  $ 0, 0, descpt(csrc_), iwork(itmp+1) )
329 *
330 * Send row-distributed pivots to all rows
331 *
332  IF( myrow.EQ.0 ) THEN
333  CALL igebs2d( ictxt, 'Column', ' ', itmp, 1, iwork,
334  $ itmp )
335  ELSE
336  CALL igebr2d( ictxt, 'Column', ' ', itmp, 1, iwork,
337  $ itmp, 0, mycol )
338  END IF
339 *
340 * Adjust pivots so they are relative to the start of IWORK,
341 * not IPIV
342 *
343  jpt = jpt + 1
344  DO 20 i = 1, itmp
345  iwork(i) = iwork(i) - ip + jpt
346  20 CONTINUE
347  CALL pclapv2( direc, rowcol, m, n, a, ia, ja, desca, iwork,
348  $ 1, jpt, descpt )
349  END IF
350  END IF
351 *
352  RETURN
353 *
354 * End of PCLAPIV
355 *
356  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pclapv2
subroutine pclapv2(DIREC, ROWCOL, M, N, A, IA, JA, DESCA, IPIV, IP, JP, DESCIP)
Definition: pclapv2.f:3
picol2row
subroutine picol2row(ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, CSRC, RDEST, CDEST, WORK)
Definition: picol2row.f:3
pclapiv
subroutine pclapiv(DIREC, ROWCOL, PIVROC, M, N, A, IA, JA, DESCA, IPIV, IP, JP, DESCIP, IWORK)
Definition: pclapiv.f:3
pirow2col
subroutine pirow2col(ICTXT, M, N, NB, VS, LDVS, VD, LDVD, RSRC, CSRC, RDEST, CDEST, WORK)
Definition: pirow2col.f:3