ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcpoequ.f
Go to the documentation of this file.
1  SUBROUTINE pcpoequ( N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX,
2  $ INFO )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, INFO, JA, N
11  REAL AMAX, SCOND
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * )
15  REAL SC( * ), SR( * )
16  COMPLEX A( * )
17 * ..
18 *
19 * Purpose
20 * =======
21 *
22 * PCPOEQU computes row and column scalings intended to
23 * equilibrate a distributed Hermitian positive definite matrix
24 * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) and reduce its condition number
25 * (with respect to the two-norm). SR and SC contain the scale
26 * factors, S(i) = 1/sqrt(A(i,i)), chosen so that the scaled distri-
27 * buted matrix B with elements B(i,j) = S(i)*A(i,j)*S(j) has ones on
28 * the diagonal. This choice of SR and SC puts the condition number
29 * of B within a factor N of the smallest possible condition number
30 * over all possible diagonal scalings.
31 *
32 * The scaling factor are stored along process rows in SR and along
33 * process columns in SC. The duplication of information simplifies
34 * greatly the application of the factors.
35 *
36 * Notes
37 * =====
38 *
39 * Each global data object is described by an associated description
40 * vector. This vector stores the information required to establish
41 * the mapping between an object element and its corresponding process
42 * and memory location.
43 *
44 * Let A be a generic term for any 2D block cyclicly distributed array.
45 * Such a global array has an associated description vector DESCA.
46 * In the following comments, the character _ should be read as
47 * "of the global array".
48 *
49 * NOTATION STORED IN EXPLANATION
50 * --------------- -------------- --------------------------------------
51 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
52 * DTYPE_A = 1.
53 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
54 * the BLACS process grid A is distribu-
55 * ted over. The context itself is glo-
56 * bal, but the handle (the integer
57 * value) may vary.
58 * M_A (global) DESCA( M_ ) The number of rows in the global
59 * array A.
60 * N_A (global) DESCA( N_ ) The number of columns in the global
61 * array A.
62 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
63 * the rows of the array.
64 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
65 * the columns of the array.
66 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
67 * row of the array A is distributed.
68 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
69 * first column of the array A is
70 * distributed.
71 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
72 * array. LLD_A >= MAX(1,LOCr(M_A)).
73 *
74 * Let K be the number of rows or columns of a distributed matrix,
75 * and assume that its process grid has dimension p x q.
76 * LOCr( K ) denotes the number of elements of K that a process
77 * would receive if K were distributed over the p processes of its
78 * process column.
79 * Similarly, LOCc( K ) denotes the number of elements of K that a
80 * process would receive if K were distributed over the q processes of
81 * its process row.
82 * The values of LOCr() and LOCc() may be determined via a call to the
83 * ScaLAPACK tool function, NUMROC:
84 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
85 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
86 * An upper bound for these quantities may be computed by:
87 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
88 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
89 *
90 * Arguments
91 * =========
92 *
93 * N (global input) INTEGER
94 * The number of rows and columns to be operated on i.e the
95 * order of the distributed submatrix sub( A ). N >= 0.
96 *
97 * A (local input) COMPLEX pointer into the local memory to an
98 * array of local dimension ( LLD_A, LOCc(JA+N-1) ), the
99 * N-by-N Hermitian positive definite distributed matrix
100 * sub( A ) whose scaling factors are to be computed. Only the
101 * diagonal elements of sub( A ) are referenced.
102 *
103 * IA (global input) INTEGER
104 * The row index in the global array A indicating the first
105 * row of sub( A ).
106 *
107 * JA (global input) INTEGER
108 * The column index in the global array A indicating the
109 * first column of sub( A ).
110 *
111 * DESCA (global and local input) INTEGER array of dimension DLEN_.
112 * The array descriptor for the distributed matrix A.
113 *
114 * SR (local output) REAL array, dimension LOCr(M_A)
115 * If INFO = 0, SR(IA:IA+N-1) contains the row scale factors
116 * for sub( A ). SR is aligned with the distributed matrix A,
117 * and replicated across every process column. SR is tied to the
118 * distributed matrix A.
119 *
120 * SC (local output) REAL array, dimension LOCc(N_A)
121 * If INFO = 0, SC(JA:JA+N-1) contains the column scale factors
122 * for A(IA:IA+M-1,JA:JA+N-1). SC is aligned with the distribu-
123 * ted matrix A, and replicated down every process row. SC is
124 * tied to the distributed matrix A.
125 *
126 * SCOND (global output) REAL
127 * If INFO = 0, SCOND contains the ratio of the smallest SR(i)
128 * (or SC(j)) to the largest SR(i) (or SC(j)), with
129 * IA <= i <= IA+N-1 and JA <= j <= JA+N-1. If SCOND >= 0.1
130 * and AMAX is neither too large nor too small, it is not worth
131 * scaling by SR (or SC).
132 *
133 * AMAX (global output) REAL
134 * Absolute value of largest matrix element. If AMAX is very
135 * close to overflow or very close to underflow, the matrix
136 * should be scaled.
137 *
138 * INFO (global output) INTEGER
139 * = 0: successful exit
140 * < 0: If the i-th argument is an array and the j-entry had
141 * an illegal value, then INFO = -(i*100+j), if the i-th
142 * argument is a scalar and had an illegal value, then
143 * INFO = -i.
144 * > 0: If INFO = K, the K-th diagonal entry of sub( A ) is
145 * nonpositive.
146 *
147 * =====================================================================
148 *
149 * .. Parameters ..
150  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
151  $ lld_, mb_, m_, nb_, n_, rsrc_
152  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
153  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
154  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
155  REAL ZERO, ONE
156  parameter( zero = 0.0e+0, one = 1.0e+0 )
157 * ..
158 * .. Local Scalars ..
159  CHARACTER ALLCTOP, COLCTOP, ROWCTOP
160  INTEGER IACOL, IAROW, ICOFF, ICTXT, ICURCOL, ICURROW,
161  $ idumm, ii, iia, ioffa, ioffd, iroff, j, jb, jj,
162  $ jja, jn, lda, ll, mycol, myrow, np, npcol,
163  $ nprow, nq
164  REAL AII, SMIN
165 * ..
166 * .. Local Arrays ..
167  INTEGER DESCSC( DLEN_ ), DESCSR( DLEN_ )
168 * ..
169 * .. External Subroutines ..
170  EXTERNAL blacs_gridinfo, chk1mat, descset, igamn2d,
171  $ infog2l, pchk1mat, pb_topget, pxerbla,
172  $ sgamn2d, sgamx2d, sgsum2d
173 * ..
174 * .. External Functions ..
175  INTEGER ICEIL, NUMROC
176  REAL PSLAMCH
177  EXTERNAL iceil, numroc, pslamch
178 * ..
179 * .. Intrinsic Functions ..
180  INTRINSIC max, min, mod, real, sqrt
181 * ..
182 * .. Executable Statements ..
183 *
184 * Get grid parameters
185 *
186  ictxt = desca( ctxt_ )
187  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
188 *
189 * Test the input parameters.
190 *
191  info = 0
192  IF( nprow.EQ.-1 ) THEN
193  info = -(500+ctxt_)
194  ELSE
195  CALL chk1mat( n, 1, n, 1, ia, ja, desca, 5, info )
196  CALL pchk1mat( n, 1, n, 1, ia, ja, desca, 5, 0, idumm, idumm,
197  $ info )
198  END IF
199 *
200  IF( info.NE.0 ) THEN
201  CALL pxerbla( ictxt, 'PCPOEQU', -info )
202  RETURN
203  END IF
204 *
205 * Quick return if possible
206 *
207  IF( n.EQ.0 ) THEN
208  scond = one
209  amax = zero
210  RETURN
211  END IF
212 *
213  CALL pb_topget( ictxt, 'Combine', 'All', allctop )
214  CALL pb_topget( ictxt, 'Combine', 'Rowwise', rowctop )
215  CALL pb_topget( ictxt, 'Combine', 'Columnwise', colctop )
216 *
217 * Compute some local indexes
218 *
219  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
220  $ iarow, iacol )
221  iroff = mod( ia-1, desca( mb_ ) )
222  icoff = mod( ja-1, desca( nb_ ) )
223  np = numroc( n+iroff, desca( mb_ ), myrow, iarow, nprow )
224  nq = numroc( n+icoff, desca( nb_ ), mycol, iacol, npcol )
225  IF( myrow.EQ.iarow )
226  $ np = np - iroff
227  IF( mycol.EQ.iacol )
228  $ nq = nq - icoff
229  jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
230  lda = desca( lld_ )
231 *
232 * Assign descriptors for SR and SC arrays
233 *
234  CALL descset( descsr, n, 1, desca( mb_ ), 1, 0, 0, ictxt,
235  $ max( 1, np ) )
236  CALL descset( descsc, 1, n, 1, desca( nb_ ), 0, 0, ictxt, 1 )
237 *
238 * Initialize the scaling factors to zero.
239 *
240  DO 10 ii = iia, iia+np-1
241  sr( ii ) = zero
242  10 CONTINUE
243 *
244  DO 20 jj = jja, jja+nq-1
245  sc( jj ) = zero
246  20 CONTINUE
247 *
248 * Find the minimum and maximum diagonal elements.
249 * Handle first block separately.
250 *
251  ii = iia
252  jj = jja
253  jb = jn-ja+1
254  smin = one / pslamch( ictxt, 'S' )
255  amax = zero
256 *
257  ioffa = ii+(jj-1)*lda
258  IF( myrow.EQ.iarow .AND. mycol.EQ.iacol ) THEN
259  ioffd = ioffa
260  DO 30 ll = 0, jb-1
261  aii = real( a( ioffd ) )
262  sr( ii+ll ) = aii
263  sc( jj+ll ) = aii
264  smin = min( smin, aii )
265  amax = max( amax, aii )
266  IF( aii.LE.zero .AND. info.EQ.0 )
267  $ info = ll + 1
268  ioffd = ioffd + lda + 1
269  30 CONTINUE
270  END IF
271 *
272  IF( myrow.EQ.iarow ) THEN
273  ii = ii + jb
274  ioffa = ioffa + jb
275  END IF
276  IF( mycol.EQ.iacol ) THEN
277  jj = jj + jb
278  ioffa = ioffa + jb*lda
279  END IF
280  icurrow = mod( iarow+1, nprow )
281  icurcol = mod( iacol+1, npcol )
282 *
283 * Loop over remaining blocks of columns
284 *
285  DO 50 j = jn+1, ja+n-1, desca( nb_ )
286  jb = min( n-j+ja, desca( nb_ ) )
287 *
288  IF( myrow.EQ.icurrow .AND. mycol.EQ.icurcol ) THEN
289  ioffd = ioffa
290  DO 40 ll = 0, jb-1
291  aii = real( a( ioffd ) )
292  sr( ii+ll ) = aii
293  sc( jj+ll ) = aii
294  smin = min( smin, aii )
295  amax = max( amax, aii )
296  IF( aii.LE.zero .AND. info.EQ.0 )
297  $ info = j + ll - ja + 1
298  ioffd = ioffd + lda + 1
299  40 CONTINUE
300  END IF
301 *
302  IF( myrow.EQ.icurrow ) THEN
303  ii = ii + jb
304  ioffa = ioffa + jb
305  END IF
306  IF( mycol.EQ.icurcol ) THEN
307  jj = jj + jb
308  ioffa = ioffa + jb*lda
309  END IF
310  icurrow = mod( icurrow+1, nprow )
311  icurcol = mod( icurcol+1, npcol )
312 *
313  50 CONTINUE
314 *
315 * Compute scaling factors
316 *
317  CALL sgsum2d( ictxt, 'Columnwise', colctop, 1, nq, sc( jja ),
318  $ 1, -1, mycol )
319  CALL sgsum2d( ictxt, 'Rowwise', rowctop, np, 1, sr( iia ),
320  $ max( 1, np ), -1, mycol )
321 *
322  CALL sgamx2d( ictxt, 'All', allctop, 1, 1, amax, 1, idumm, idumm,
323  $ -1, -1, mycol )
324  CALL sgamn2d( ictxt, 'All', allctop, 1, 1, smin, 1, idumm, idumm,
325  $ -1, -1, mycol )
326 *
327  IF( smin.LE.zero ) THEN
328 *
329 * Find the first non-positive diagonal element and return.
330 *
331  CALL igamn2d( ictxt, 'All', allctop, 1, 1, info, 1, ii, jj, -1,
332  $ -1, mycol )
333  RETURN
334 *
335  ELSE
336 *
337 * Set the scale factors to the reciprocals
338 * of the diagonal elements.
339 *
340  DO 60 ii = iia, iia+np-1
341  sr( ii ) = one / sqrt( sr( ii ) )
342  60 CONTINUE
343 *
344  DO 70 jj = jja, jja+nq-1
345  sc( jj ) = one / sqrt( sc( jj ) )
346  70 CONTINUE
347 *
348 * Compute SCOND = min(S(I)) / max(S(I))
349 *
350  scond = sqrt( smin ) / sqrt( amax )
351 *
352  END IF
353 *
354  RETURN
355 *
356 * End of PCPOEQU
357 *
358  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pcpoequ
subroutine pcpoequ(N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, INFO)
Definition: pcpoequ.f:3
pchk1mat
subroutine pchk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181