ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzlaqsy.f
Go to the documentation of this file.
1  SUBROUTINE pzlaqsy( UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND,
2  $ AMAX, EQUED )
3 *
4 * -- ScaLAPACK auxiliary routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER EQUED, UPLO
11  INTEGER IA, JA, N
12  DOUBLE PRECISION AMAX, SCOND
13 * ..
14 * .. Array Arguments ..
15  INTEGER DESCA( * )
16  DOUBLE PRECISION SC( * ), SR( * )
17  COMPLEX*16 A( * )
18 * ..
19 *
20 * Purpose
21 * =======
22 *
23 * PZLAQSY equilibrates a symmetric distributed matrix
24 * sub( A ) = A(IA:IA+N-1,JA:JA+N-1) using the scaling factors in the
25 * vectors SR and SC.
26 *
27 * Notes
28 * =====
29 *
30 * Each global data object is described by an associated description
31 * vector. This vector stores the information required to establish
32 * the mapping between an object element and its corresponding process
33 * and memory location.
34 *
35 * Let A be a generic term for any 2D block cyclicly distributed array.
36 * Such a global array has an associated description vector DESCA.
37 * In the following comments, the character _ should be read as
38 * "of the global array".
39 *
40 * NOTATION STORED IN EXPLANATION
41 * --------------- -------------- --------------------------------------
42 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43 * DTYPE_A = 1.
44 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45 * the BLACS process grid A is distribu-
46 * ted over. The context itself is glo-
47 * bal, but the handle (the integer
48 * value) may vary.
49 * M_A (global) DESCA( M_ ) The number of rows in the global
50 * array A.
51 * N_A (global) DESCA( N_ ) The number of columns in the global
52 * array A.
53 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54 * the rows of the array.
55 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56 * the columns of the array.
57 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58 * row of the array A is distributed.
59 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60 * first column of the array A is
61 * distributed.
62 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63 * array. LLD_A >= MAX(1,LOCr(M_A)).
64 *
65 * Let K be the number of rows or columns of a distributed matrix,
66 * and assume that its process grid has dimension p x q.
67 * LOCr( K ) denotes the number of elements of K that a process
68 * would receive if K were distributed over the p processes of its
69 * process column.
70 * Similarly, LOCc( K ) denotes the number of elements of K that a
71 * process would receive if K were distributed over the q processes of
72 * its process row.
73 * The values of LOCr() and LOCc() may be determined via a call to the
74 * ScaLAPACK tool function, NUMROC:
75 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77 * An upper bound for these quantities may be computed by:
78 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80 *
81 * Arguments
82 * =========
83 *
84 * UPLO (global input) CHARACTER
85 * Specifies whether the upper or lower triangular part of the
86 * symmetric distributed matrix sub( A ) is to be referenced:
87 * = 'U': Upper triangular
88 * = 'L': Lower triangular
89 *
90 * N (global input) INTEGER
91 * The number of rows and columns to be operated on, i.e. the
92 * order of the distributed submatrix sub( A ). N >= 0.
93 *
94 * A (input/output) COMPLEX*16 pointer into the local
95 * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)).
96 * On entry, the local pieces of the distributed symmetric
97 * matrix sub( A ). If UPLO = 'U', the leading N-by-N upper
98 * triangular part of sub( A ) contains the upper triangular
99 * part of the matrix, and the strictly lower triangular part
100 * of sub( A ) is not referenced. If UPLO = 'L', the leading
101 * N-by-N lower triangular part of sub( A ) contains the lower
102 * triangular part of the matrix, and the strictly upper trian-
103 * gular part of sub( A ) is not referenced.
104 * On exit, if EQUED = 'Y', the equilibrated matrix:
105 * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)).
106 *
107 * IA (global input) INTEGER
108 * The row index in the global array A indicating the first
109 * row of sub( A ).
110 *
111 * JA (global input) INTEGER
112 * The column index in the global array A indicating the
113 * first column of sub( A ).
114 *
115 * DESCA (global and local input) INTEGER array of dimension DLEN_.
116 * The array descriptor for the distributed matrix A.
117 *
118 * SR (local input) DOUBLE PRECISION array, dimension LOCr(M_A)
119 * The scale factors for A(IA:IA+M-1,JA:JA+N-1). SR is aligned
120 * with the distributed matrix A, and replicated across every
121 * process column. SR is tied to the distributed matrix A.
122 *
123 * SC (local input) DOUBLE PRECISION array, dimension LOCc(N_A)
124 * The scale factors for sub( A ). SC is aligned with the dis-
125 * tributed matrix A, and replicated down every process row.
126 * SC is tied to the distributed matrix A.
127 *
128 * SCOND (global input) DOUBLE PRECISION
129 * Ratio of the smallest SR(i) (respectively SC(j)) to the
130 * largest SR(i) (respectively SC(j)), with IA <= i <= IA+N-1
131 * and JA <= j <= JA+N-1.
132 *
133 * AMAX (global input) DOUBLE PRECISION
134 * Absolute value of the largest distributed submatrix entry.
135 *
136 * EQUED (output) CHARACTER*1
137 * Specifies whether or not equilibration was done.
138 * = 'N': No equilibration.
139 * = 'Y': Equilibration was done, i.e., sub( A ) has been re-
140 * placed by:
141 * diag(SR(IA:IA+N-1)) * sub( A ) * diag(SC(JA:JA+N-1)).
142 *
143 * Internal Parameters
144 * ===================
145 *
146 * THRESH is a threshold value used to decide if scaling should be done
147 * based on the ratio of the scaling factors. If SCOND < THRESH,
148 * scaling is done.
149 *
150 * LARGE and SMALL are threshold values used to decide if scaling should
151 * be done based on the absolute size of the largest matrix element.
152 * If AMAX > LARGE or AMAX < SMALL, scaling is done.
153 *
154 * =====================================================================
155 *
156 * .. Parameters ..
157  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
158  $ lld_, mb_, m_, nb_, n_, rsrc_
159  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
160  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
161  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
162  DOUBLE PRECISION ONE, THRESH
163  parameter( one = 1.0d+0, thresh = 0.1d+0 )
164 * ..
165 * .. Local Scalars ..
166  INTEGER IACOL, IAROW, ICTXT, II, IIA, IOFFA, IROFF, J,
167  $ jb, jj, jja, jn, kk, lda, ll, mycol, myrow, np,
168  $ npcol, nprow
169  DOUBLE PRECISION CJ, LARGE, SMALL
170 * ..
171 * .. External Subroutines ..
172  EXTERNAL blacs_gridinfo, infog2l
173 * ..
174 * .. External Functions ..
175  LOGICAL LSAME
176  INTEGER ICEIL, NUMROC
177  DOUBLE PRECISION PDLAMCH
178  EXTERNAL iceil, lsame, numroc, pdlamch
179 * ..
180 * .. Intrinsic Functions ..
181  INTRINSIC min, mod
182 * ..
183 * .. Executable Statements ..
184 *
185 * Quick return if possible
186 *
187  IF( n.LE.0 ) THEN
188  equed = 'N'
189  RETURN
190  END IF
191 *
192 * Get grid parameters and compute local indexes
193 *
194  ictxt = desca( ctxt_ )
195  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
196  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
197  $ iarow, iacol )
198  lda = desca( lld_ )
199 *
200 * Initialize LARGE and SMALL.
201 *
202  small = pdlamch( ictxt, 'Safe minimum' ) /
203  $ pdlamch( ictxt, 'Precision' )
204  large = one / small
205 *
206  IF( scond.GE.thresh .AND. amax.GE.small .AND. amax.LE.large ) THEN
207 *
208 * No equilibration
209 *
210  equed = 'N'
211 *
212  ELSE
213 *
214  ii = iia
215  jj = jja
216  jn = min( iceil( ja, desca( nb_ ) ) * desca( nb_ ), ja+n-1 )
217  jb = jn-ja+1
218 *
219 * Replace A by diag(S) * A * diag(S).
220 *
221  IF( lsame( uplo, 'U' ) ) THEN
222 *
223 * Upper triangle of A(IA:IA+N-1,JA:JA+N-1) is stored.
224 * Handle first block separately
225 *
226  ioffa = (jj-1)*lda
227  IF( mycol.EQ.iacol ) THEN
228  IF( myrow.EQ.iarow ) THEN
229  DO 20 ll = jj, jj + jb -1
230  cj = sc( ll )
231  DO 10 kk = iia, ii+ll-jj+1
232  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
233  10 CONTINUE
234  ioffa = ioffa + lda
235  20 CONTINUE
236  ELSE
237  ioffa = ioffa + jb*lda
238  END IF
239  jj = jj + jb
240  END IF
241 *
242  IF( myrow.EQ.iarow )
243  $ ii = ii + jb
244  iarow = mod( iarow+1, nprow )
245  iacol = mod( iacol+1, npcol )
246 *
247 * Loop over remaining block of columns
248 *
249  DO 70 j = jn+1, ja+n-1, desca( nb_ )
250  jb = min( ja+n-j, desca( nb_ ) )
251 *
252  IF( mycol.EQ.iacol ) THEN
253  IF( myrow.EQ.iarow ) THEN
254  DO 40 ll = jj, jj + jb -1
255  cj = sc( ll )
256  DO 30 kk = iia, ii+ll-jj+1
257  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
258  30 CONTINUE
259  ioffa = ioffa + lda
260  40 CONTINUE
261  ELSE
262  DO 60 ll = jj, jj + jb -1
263  cj = sc( ll )
264  DO 50 kk = iia, ii-1
265  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
266  50 CONTINUE
267  ioffa = ioffa + lda
268  60 CONTINUE
269  END IF
270  jj = jj + jb
271  END IF
272 *
273  IF( myrow.EQ.iarow )
274  $ ii = ii + jb
275  iarow = mod( iarow+1, nprow )
276  iacol = mod( iacol+1, npcol )
277 *
278  70 CONTINUE
279 *
280  ELSE
281 *
282 * Lower triangle of A(IA:IA+N-1,JA:JA+N-1) is stored.
283 * Handle first block separately
284 *
285  iroff = mod( ia-1, desca( mb_ ) )
286  np = numroc( n+iroff, desca( mb_ ), myrow, iarow, nprow )
287  IF( myrow.EQ.iarow )
288  $ np = np-iroff
289 *
290  ioffa = (jj-1)*lda
291  IF( mycol.EQ.iacol ) THEN
292  IF( myrow.EQ.iarow ) THEN
293  DO 90 ll = jj, jj + jb -1
294  cj = sc( ll )
295  DO 80 kk = ii+ll-jj, iia+np-1
296  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
297  80 CONTINUE
298  ioffa = ioffa + lda
299  90 CONTINUE
300  ELSE
301  DO 110 ll = jj, jj + jb -1
302  cj = sc( ll )
303  DO 100 kk = ii, iia+np-1
304  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
305  100 CONTINUE
306  ioffa = ioffa + lda
307  110 CONTINUE
308  END IF
309  jj = jj + jb
310  END IF
311 *
312  IF( myrow.EQ.iarow )
313  $ ii = ii + jb
314  iarow = mod( iarow+1, nprow )
315  iacol = mod( iacol+1, npcol )
316 *
317 * Loop over remaining block of columns
318 *
319  DO 160 j = jn+1, ja+n-1, desca( nb_ )
320  jb = min( ja+n-j, desca( nb_ ) )
321 *
322  IF( mycol.EQ.iacol ) THEN
323  IF( myrow.EQ.iarow ) THEN
324  DO 130 ll = jj, jj + jb -1
325  cj = sc( ll )
326  DO 120 kk = ii+ll-jj, iia+np-1
327  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
328  120 CONTINUE
329  ioffa = ioffa + lda
330  130 CONTINUE
331  ELSE
332  DO 150 ll = jj, jj + jb -1
333  cj = sc( ll )
334  DO 140 kk = ii, iia+np-1
335  a( ioffa + kk ) = cj*sr( kk )*a( ioffa + kk )
336  140 CONTINUE
337  ioffa = ioffa + lda
338  150 CONTINUE
339  END IF
340  jj = jj + jb
341  END IF
342 *
343  IF( myrow.EQ.iarow )
344  $ ii = ii + jb
345  iarow = mod( iarow+1, nprow )
346  iacol = mod( iacol+1, npcol )
347 *
348  160 CONTINUE
349 *
350  END IF
351 *
352  equed = 'Y'
353 *
354  END IF
355 *
356  RETURN
357 *
358 * End of PZLAQSY
359 *
360  END
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pzlaqsy
subroutine pzlaqsy(UPLO, N, A, IA, JA, DESCA, SR, SC, SCOND, AMAX, EQUED)
Definition: pzlaqsy.f:3
min
#define min(A, B)
Definition: pcgemr.c:181