ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslaqsy.f
Go to the documentation of this file.
1  SUBROUTINE pslaqsy( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND,
2  $ AMAX, EQUED )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER EQUED, UPLO
11  INTEGER IA, JA, N
12  REAL AMAX, SCOND
13 * ..
14 * .. Array Arguments ..
15  INTEGER DESCA( * )
16  REAL A( * ), SC( * ), SR( * )
17 * ..
18 *
19 * Purpose
20 * =======
21 *
22 * PSLAQSY equilibrates a symmetric distributed matrix
23 * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the scaling factors in the
24 * vectors SR and SC.
25 *
26 * Notes
27 * =====
28 *
29 * Each global data object is described by an associated description
30 * vector. This vector stores the information required to establish
31 * the mapping between an object element and its corresponding process
32 * and memory location.
33 *
34 * Let A be a generic term for any 2D block cyclicly distributed array.
35 * Such a global array has an associated description vector DESCA.
36 * In the following comments, the character _ should be read as
37 * "of the global array".
38 *
39 * NOTATION STORED IN EXPLANATION
40 * --------------- -------------- --------------------------------------
41 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
42 * DTYPE_A = 1.
43 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
44 * the BLACS process grid A is distribu-
45 * ted over. The context itself is glo-
46 * bal, but the handle (the integer
47 * value) may vary.
48 * M_A (global) DESCA( M_ ) The number of rows in the global
49 * array A.
50 * N_A (global) DESCA( N_ ) The number of columns in the global
51 * array A.
52 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
53 * the rows of the array.
54 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
55 * the columns of the array.
56 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
57 * row of the array A is distributed.
58 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
59 * first column of the array A is
60 * distributed.
61 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
62 * array. LLD_A >= MAX(1,LOCr(M_A)).
63 *
64 * Let K be the number of rows or columns of a distributed matrix,
65 * and assume that its process grid has dimension p x q.
66 * LOCr( K ) denotes the number of elements of K that a process
67 * would receive if K were distributed over the p processes of its
68 * process column.
69 * Similarly, LOCc( K ) denotes the number of elements of K that a
70 * process would receive if K were distributed over the q processes of
71 * its process row.
72 * The values of LOCr() and LOCc() may be determined via a call to the
73 * ScaLAPACK tool function, NUMROC:
74 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
75 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
76 * An upper bound for these quantities may be computed by:
77 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
78 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
79 *
80 * Arguments
81 * =========
82 *
83 * UPLO (global input) CHARACTER
84 * Specifies whether the upper or lower triangular part of the
85 * symmetric distributed matrix sub( A ) is to be referenced:
86 * = 'U': Upper triangular
87 * = 'L': Lower triangular
88 *
89 * N (global input) INTEGER
90 * The number of rows and columns to be operated on, i.e. the
91 * order of the distributed submatrix sub( A ). N >= 0.
92 *
93 * A (input/output) REAL pointer into the local
94 * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)).
95 * On entry, the local pieces of the distributed symmetric
96 * matrix sub( A ). If UPLO = 'U', the leading N-by-N upper
97 * triangular part of sub( A ) contains the upper triangular
98 * part of the matrix, and the strictly lower triangular part
99 * of sub( A ) is not referenced. If UPLO = 'L', the leading
100 * N-by-N lower triangular part of sub( A ) contains the lower
101 * triangular part of the matrix, and the strictly upper trian-
102 * gular part of sub( A ) is not referenced.
103 * On exit, if EQUED = 'Y', the equilibrated matrix:
104 * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)).
105 *
106 * IA (global input) INTEGER
107 * The row index in the global array A indicating the first
108 * row of sub( A ).
109 *
110 * JA (global input) INTEGER
111 * The column index in the global array A indicating the
112 * first column of sub( A ).
113 *
114 * DESCA (global and local input) INTEGER array of dimension DLEN_.
115 * The array descriptor for the distributed matrix A.
116 *
117 * SR (local input) REAL array, dimension LOCr(M_A)
118 * The scale factors for A(IA:IA+M-1,JA:JA+N-1). SR is aligned
119 * with the distributed matrix A, and replicated across every
120 * process column. SR is tied to the distributed matrix A.
121 *
122 * SC (local input) REAL array, dimension LOCc(N_A)
123 * The scale factors for sub( A ). SC is aligned with the dis-
124 * tributed matrix A, and replicated down every process row.
125 * SC is tied to the distributed matrix A.
126 *
127 * SCOND (global input) REAL
128 * Ratio of the smallest SR(i) (respectively SC(j)) to the
129 * largest SR(i) (respectively SC(j)), with IA <= i <= IA+N-1
130 * and JA <= j <= JA+N-1.
131 *
132 * AMAX (global input) REAL
133 * Absolute value of the largest distributed submatrix entry.
134 *
135 * EQUED (output) CHARACTER*1
136 * Specifies whether or not equilibration was done.
137 * = 'N': No equilibration.
138 * = 'Y': Equilibration was done, i.e., sub( A ) has been re-
139 * placed by:
140 * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)).
141 *
142 * Internal Parameters
143 * ===================
144 *
145 * THRESH is a threshold value used to decide if scaling should be done
146 * based on the ratio of the scaling factors. If SCOND < THRESH,
147 * scaling is done.
148 *
149 * LARGE and SMALL are threshold values used to decide if scaling should
150 * be done based on the absolute size of the largest matrix element.
151 * If AMAX > LARGE or AMAX < SMALL, scaling is done.
152 *
153 * =====================================================================
154 *
155 * .. Parameters ..
156  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
157  $ lld_, mb_, m_, nb_, n_, rsrc_
158  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
159  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
160  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
161  REAL ONE, THRESH
162  parameter( one = 1.0e+0, thresh = 0.1e+0 )
163 * ..
164 * .. Local Scalars ..
165  INTEGER IACOL, IAROW, ICTXT, II, IIA, IOFFA, IROFF, J,
166  $ jb, jj, jja, jn, kk, lda, ll, mycol, myrow, np,
167  $ npcol, nprow
168  REAL CJ, LARGE, SMALL
169 * ..
170 * .. External Subroutines ..
171  EXTERNAL blacs_gridinfo, infog2l
172 * ..
173 * .. External Functions ..
174  LOGICAL LSAME
175  INTEGER ICEIL, NUMROC
176  REAL PSLAMCH
177  EXTERNAL iceil, lsame, numroc, pslamch
178 * ..
179 * .. Intrinsic Functions ..
180  INTRINSIC min, mod
181 * ..
182 * .. Executable Statements ..
183 *
184 * Quick return if possible
185 *
186  IF( n.LE.0 ) THEN
187  equed = 'N'
188  RETURN
189  END IF
190 *
191 * Get grid parameters and compute local indexes
192 *
193  ictxt = desca( ctxt_ )
194  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
195  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
196  $ iarow, iacol )
197  lda = desca( lld_ )
198 *
199 * Initialize LARGE and SMALL.
200 *
201  small = pslamch( ictxt, 'Safe minimum' ) /
202  $ pslamch( ictxt, 'Precision' )
203  large = one / small
204 *
205  IF( scond.GE.thresh .AND. amax.GE.small .AND. amax.LE.large ) THEN
206 *
207 * No equilibration
208 *
209  equed = 'N'
210 *
211  ELSE
212 *
213  ii = iia
214  jj = jja
215  jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
216  jb = jn-ja+1
217 *
218 * Replace A by diag(S) * A * diag(S).
219 *
220  IF( lsame( uplo, 'U' ) ) THEN
221 *
222 * Upper triangle of A(IA:IA+N-1,JA:JA+N-1) is stored.
223 * Handle first block separately
224 *
225  ioffa = (jj-1)*lda
226  IF( mycol.EQ.iacol ) THEN
227  IF( myrow.EQ.iarow ) THEN
228  DO 20 ll = jj, jj + jb -1
229  cj = sc( ll )
230  DO 10 kk = iia, ii+ll-jj+1
231  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
232  10 CONTINUE
233  ioffa = ioffa + lda
234  20 CONTINUE
235  ELSE
236  ioffa = ioffa + jb*lda
237  END IF
238  jj = jj + jb
239  END IF
240 *
241  IF( myrow.EQ.iarow )
242  $ ii = ii + jb
243  iarow = mod( iarow+1, nprow )
244  iacol = mod( iacol+1, npcol )
245 *
246 * Loop over remaining block of columns
247 *
248  DO 70 j = jn+1, ja+n-1, desca( nb_ )
249  jb = min( ja+n-j, desca( nb_ ) )
250 *
251  IF( mycol.EQ.iacol ) THEN
252  IF( myrow.EQ.iarow ) THEN
253  DO 40 ll = jj, jj + jb -1
254  cj = sc( ll )
255  DO 30 kk = iia, ii+ll-jj+1
256  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
257  30 CONTINUE
258  ioffa = ioffa + lda
259  40 CONTINUE
260  ELSE
261  DO 60 ll = jj, jj + jb -1
262  cj = sc( ll )
263  DO 50 kk = iia, ii-1
264  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
265  50 CONTINUE
266  ioffa = ioffa + lda
267  60 CONTINUE
268  END IF
269  jj = jj + jb
270  END IF
271 *
272  IF( myrow.EQ.iarow )
273  $ ii = ii + jb
274  iarow = mod( iarow+1, nprow )
275  iacol = mod( iacol+1, npcol )
276 *
277  70 CONTINUE
278 *
279  ELSE
280 *
281 * Lower triangle of A(IA:IA+N-1,JA:JA+N-1) is stored.
282 * Handle first block separately
283 *
284  iroff = mod( ia-1, desca( mb_ ) )
285  np = numroc( n+iroff, desca( mb_ ), myrow, iarow, nprow )
286  IF( myrow.EQ.iarow )
287  $ np = np-iroff
288 *
289  ioffa = (jj-1)*lda
290  IF( mycol.EQ.iacol ) THEN
291  IF( myrow.EQ.iarow ) THEN
292  DO 90 ll = jj, jj + jb -1
293  cj = sc( ll )
294  DO 80 kk = ii+ll-jj, iia+np-1
295  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
296  80 CONTINUE
297  ioffa = ioffa + lda
298  90 CONTINUE
299  ELSE
300  DO 110 ll = jj, jj + jb -1
301  cj = sc( ll )
302  DO 100 kk = ii, iia+np-1
303  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
304  100 CONTINUE
305  ioffa = ioffa + lda
306  110 CONTINUE
307  END IF
308  jj = jj + jb
309  END IF
310 *
311  IF( myrow.EQ.iarow )
312  $ ii = ii + jb
313  iarow = mod( iarow+1, nprow )
314  iacol = mod( iacol+1, npcol )
315 *
316 * Loop over remaining block of columns
317 *
318  DO 160 j = jn+1, ja+n-1, desca( nb_ )
319  jb = min( ja+n-j, desca( nb_ ) )
320 *
321  IF( mycol.EQ.iacol ) THEN
322  IF( myrow.EQ.iarow ) THEN
323  DO 130 ll = jj, jj + jb -1
324  cj = sc( ll )
325  DO 120 kk = ii+ll-jj, iia+np-1
326  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
327  120 CONTINUE
328  ioffa = ioffa + lda
329  130 CONTINUE
330  ELSE
331  DO 150 ll = jj, jj + jb -1
332  cj = sc( ll )
333  DO 140 kk = ii, iia+np-1
334  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
335  140 CONTINUE
336  ioffa = ioffa + lda
337  150 CONTINUE
338  END IF
339  jj = jj + jb
340  END IF
341 *
342  IF( myrow.EQ.iarow )
343  $ ii = ii + jb
344  iarow = mod( iarow+1, nprow )
345  iacol = mod( iacol+1, npcol )
346 *
347  160 CONTINUE
348 *
349  END IF
350 *
351  equed = 'Y'
352 *
353  END IF
354 *
355  RETURN
356 *
357 * End of PSLAQSY
358 *
359  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pslaqsy
subroutine pslaqsy(UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, EQUED)
Definition: pslaqsy.f:3
min
#define min(A, B)
Definition: pcgemr.c:181