ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzlaschk.f
Go to the documentation of this file.
1  SUBROUTINE pzlaschk( SYMM, DIAG, N, NRHS, X, IX, JX, DESCX,
2  $ IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID,
3  $ WORK )
4 *
5 * -- ScaLAPACK auxiliary routine (version 1.7) --
6 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
7 * and University of California, Berkeley.
8 * May 1, 1997
9 *
10 * .. Scalar Arguments ..
11  CHARACTER DIAG, SYMM
12  INTEGER IA, IASEED, IBSEED, IX, JA, JX, N, NRHS
13  DOUBLE PRECISION ANORM, RESID
14 * ..
15 * .. Array Arguments ..
16  INTEGER DESCA( * ), DESCX( * )
17  COMPLEX*16 WORK( * ), X( * )
18 * ..
19 *
20 * Purpose
21 * =======
22 *
23 * PZLASCHK computes the residual
24 * || sub( A )*sub( X ) - B || / (|| sub( A ) ||*|| sub( X ) ||*eps*N)
25 * to check the accuracy of the factorization and solve steps in the
26 * LU and Cholesky decompositions, where sub( A ) denotes
27 * A(IA:IA+N-1,JA,JA+N-1), sub( X ) denotes X(IX:IX+N-1, JX:JX+NRHS-1).
28 *
29 * Notes
30 * =====
31 *
32 * Each global data object is described by an associated description
33 * vector. This vector stores the information required to establish
34 * the mapping between an object element and its corresponding process
35 * and memory location.
36 *
37 * Let A be a generic term for any 2D block cyclicly distributed array.
38 * Such a global array has an associated description vector DESCA.
39 * In the following comments, the character _ should be read as
40 * "of the global array".
41 *
42 * NOTATION STORED IN EXPLANATION
43 * --------------- -------------- --------------------------------------
44 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
45 * DTYPE_A = 1.
46 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
47 * the BLACS process grid A is distribu-
48 * ted over. The context itself is glo-
49 * bal, but the handle (the integer
50 * value) may vary.
51 * M_A (global) DESCA( M_ ) The number of rows in the global
52 * array A.
53 * N_A (global) DESCA( N_ ) The number of columns in the global
54 * array A.
55 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
56 * the rows of the array.
57 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
58 * the columns of the array.
59 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
60 * row of the array A is distributed.
61 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
62 * first column of the array A is
63 * distributed.
64 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
65 * array. LLD_A >= MAX(1,LOCr(M_A)).
66 *
67 * Let K be the number of rows or columns of a distributed matrix,
68 * and assume that its process grid has dimension p x q.
69 * LOCr( K ) denotes the number of elements of K that a process
70 * would receive if K were distributed over the p processes of its
71 * process column.
72 * Similarly, LOCc( K ) denotes the number of elements of K that a
73 * process would receive if K were distributed over the q processes of
74 * its process row.
75 * The values of LOCr() and LOCc() may be determined via a call to the
76 * ScaLAPACK tool function, NUMROC:
77 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
78 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
79 * An upper bound for these quantities may be computed by:
80 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
81 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
82 *
83 * Arguments
84 * =========
85 *
86 * SYMM (global input) CHARACTER
87 * if SYMM = 'H', sub( A ) is a hermitian distributed matrix,
88 * otherwise sub( A ) is a general distributed matrix.
89 *
90 * DIAG (global input) CHARACTER
91 * If DIAG = 'D', sub( A ) is diagonally dominant.
92 *
93 * N (global input) INTEGER
94 * The number of columns to be operated on, i.e. the number of
95 * columns of the distributed submatrix sub( A ). N >= 0.
96 *
97 * NRHS (global input) INTEGER
98 * The number of right-hand-sides, i.e the number of columns
99 * of the distributed matrix sub( X ). NRHS >= 0.
100 *
101 * X (local input) COMPLEX*16 pointer into the local memory
102 * to an array of dimension (LLD_X,LOCc(JX+NRHS-1). This array
103 * contains the local pieces of the answer vector(s) sub( X ) of
104 * sub( A ) sub( X ) - B, split up over a column of processes.
105 *
106 * IX (global input) INTEGER
107 * The row index in the global array X indicating the first
108 * row of sub( X ).
109 *
110 * JX (global input) INTEGER
111 * The column index in the global array X indicating the
112 * first column of sub( X ).
113 *
114 * DESCX (global and local input) INTEGER array of dimension DLEN_.
115 * The array descriptor for the distributed matrix X.
116 *
117 * IASEED (global input) INTEGER
118 * The seed number to generate the original matrix Ao.
119 *
120 * IA (global input) INTEGER
121 * The row index in the global array A indicating the first
122 * row of sub( A ).
123 *
124 * JA (global input) INTEGER
125 * The column index in the global array A indicating the
126 * first column of sub( A ).
127 *
128 * DESCA (global and local input) INTEGER array of dimension DLEN_.
129 * The array descriptor for the distributed matrix A.
130 *
131 * IBSEED (global input) INTEGER
132 * The seed number to generate the original matrix B.
133 *
134 * ANORM (global input) DOUBLE PRECISION
135 * The 1-norm or infinity norm of the distributed matrix
136 * sub( A ).
137 *
138 * RESID (global output) DOUBLE PRECISION
139 * The residual error:
140 * ||sub( A )*sub( X )-B|| / (||sub( A )||*||sub( X )||*eps*N).
141 *
142 * WORK (local workspace) COMPLEX*16 array, dimension (LWORK)
143 * LWORK >= MAX(1,Np)*NB_X + Nq*NB_X + MAX( MAX(NQ*MB_A,2*NB_X),
144 * NB_X * NUMROC( NUMROC(N,MB_X,0,0,NPCOL), MB_X, 0, 0, LCMQ ) )
145 *
146 * =====================================================================
147 *
148 * .. Parameters ..
149  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
150  $ LLD_, MB_, M_, NB_, N_, RSRC_
151  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
152  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
153  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
154  COMPLEX*16 ZERO, ONE
155  PARAMETER ( ONE = ( 1.0d+0, 0.0d+0 ),
156  $ zero = ( 0.0d+0, 0.0d+0 ) )
157 * ..
158 * .. Local Scalars ..
159  INTEGER IACOL, IAROW, IB, ICOFF, ICTXT, ICURCOL, IDUMM,
160  $ II, IIA, IIX, IOFFX, IPA, IPB, IPW, IPX, IROFF,
161  $ ixcol, ixrow, j, jbrhs, jj, jja, jjx, ldx,
162  $ mycol, myrow, np, npcol, nprow, nq
163  DOUBLE PRECISION DIVISOR, EPS, RESID1
164  COMPLEX*16 BETA
165 * ..
166 * .. External Subroutines ..
167  EXTERNAL blacs_gridinfo, dgebr2d, dgebs2d,
168  $ dgerv2d, dgesd2d, pbztran,
169  $ pzmatgen, zgamx2d, zgemm, zgsum2d,
170  $ zlaset
171 * ..
172 * .. External Functions ..
173  INTEGER IZAMAX, NUMROC
174  DOUBLE PRECISION PDLAMCH
175  EXTERNAL izamax, numroc, pdlamch
176 * ..
177 * .. Intrinsic Functions ..
178  INTRINSIC abs, dble, max, min, mod
179 * ..
180 * .. Executable Statements ..
181 *
182 * Get needed initial parameters
183 *
184  ictxt = desca( ctxt_ )
185  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
186 *
187  eps = pdlamch( ictxt, 'eps' )
188  resid = 0.0d+0
189  divisor = anorm * eps * dble( n )
190 *
191  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
192  $ iarow, iacol )
193  CALL infog2l( ix, jx, descx, nprow, npcol, myrow, mycol, iix, jjx,
194  $ ixrow, ixcol )
195  iroff = mod( ia-1, desca( mb_ ) )
196  icoff = mod( ja-1, desca( nb_ ) )
197  np = numroc( n+iroff, desca( mb_ ), myrow, iarow, nprow )
198  nq = numroc( n+icoff, desca( nb_ ), mycol, iacol, npcol )
199 *
200  ldx = max( 1, np )
201  ipb = 1
202  ipx = ipb + np * descx( nb_ )
203  ipa = ipx + nq * descx( nb_ )
204 *
205  IF( myrow.EQ.iarow )
206  $ np = np - iroff
207  IF( mycol.EQ.iacol )
208  $ nq = nq - icoff
209 *
210  icurcol = ixcol
211 *
212 * Loop over the rhs
213 *
214  DO 40 j = 1, nrhs, descx( nb_ )
215  jbrhs = min( descx( nb_ ), nrhs-j+1 )
216 *
217 * Transpose x from ICURCOL to all rows
218 *
219  ioffx = iix + ( jjx - 1 ) * descx( lld_ )
220  CALL pbztran( ictxt, 'Column', 'Transpose', n, jbrhs,
221  $ descx( mb_ ), x( ioffx ), descx( lld_ ), zero,
222  $ work( ipx ), jbrhs, ixrow, icurcol, -1, iacol,
223  $ work( ipa ) )
224 *
225 * Regenerate B in IXCOL
226 *
227  IF( mycol.EQ.icurcol ) THEN
228  CALL pzmatgen( ictxt, 'N', 'N', descx( m_ ), descx( n_ ),
229  $ descx( mb_ ), descx( nb_ ), work( ipb ), ldx,
230  $ ixrow, ixcol, ibseed, iix-1, np, jjx-1,
231  $ jbrhs, myrow, mycol, nprow, npcol )
232  beta = one
233  ELSE
234  beta = zero
235  END IF
236 *
237  IF( nq.GT.0 ) THEN
238  DO 10 ii = iia, iia+np-1, desca( mb_ )
239  ib = min( desca( mb_ ), iia+np-ii )
240 *
241 * Regenerate ib rows of the matrix A(IA:IA+N-1,JA:JA+N-1).
242 *
243  CALL pzmatgen( ictxt, symm, diag, desca( m_ ),
244  $ desca( n_ ), desca( mb_ ), desca( nb_ ),
245  $ work( ipa ), ib, desca( rsrc_ ),
246  $ desca( csrc_ ), iaseed, ii-1, ib,
247  $ jja-1, nq, myrow, mycol, nprow, npcol )
248 *
249 * Compute B <= B - A * X.
250 *
251  CALL zgemm( 'No transpose', 'Transpose', ib, jbrhs, nq,
252  $ -one, work( ipa ), ib, work( ipx ), jbrhs,
253  $ beta, work( ipb+ii-iia ), ldx )
254 *
255  10 CONTINUE
256 *
257  ELSE IF( mycol.NE.icurcol ) THEN
258 *
259  CALL zlaset( 'All', np, jbrhs, zero, zero, work( ipb ),
260  $ ldx )
261 *
262  END IF
263 *
264 * Add B rowwise to ICURCOL
265 *
266  CALL zgsum2d( ictxt, 'Row', ' ', np, jbrhs, work( ipb ), ldx,
267  $ myrow, icurcol )
268 *
269  IF( mycol.EQ.icurcol ) THEN
270 *
271 * Figure || A * X - B || & || X ||
272 *
273  ipw = ipa + jbrhs
274  DO 20 jj = 0, jbrhs - 1
275  IF( np.GT.0 ) THEN
276  ii = izamax( np, work( ipb+jj*ldx ), 1 )
277  work( ipa+jj ) = abs( work( ipb+ii-1+jj*ldx ) )
278  work( ipw+jj ) = abs( x( ioffx + izamax( np,
279  $ x( ioffx + jj*descx( lld_ ) ), 1 )-1+jj*
280  $ descx( lld_ ) ) )
281  ELSE
282  work( ipa+jj ) = zero
283  work( ipw+jj ) = zero
284  END IF
285  20 CONTINUE
286 *
287 * After ZGAMX2D computation,
288 * WORK(IPB) has the maximum of || Ax - b ||, and
289 * WORK(IPX) has the maximum of || X ||.
290 *
291  CALL zgamx2d( ictxt, 'Column', ' ', 1, 2*jbrhs,
292  $ work( ipa ), 1, idumm, idumm, -1, 0, icurcol )
293 *
294 * Calculate residual = ||Ax-b|| / (||x||*||A||*eps*N)
295 *
296  IF( myrow.EQ.0 ) THEN
297  DO 30 jj = 0, jbrhs - 1
298  resid1 = dble( work( ipa+jj ) ) /
299  $ ( dble( work( ipw+jj ) )*divisor )
300  IF( resid.LT.resid1 )
301  $ resid = resid1
302  30 CONTINUE
303  IF( mycol.NE.0 )
304  $ CALL dgesd2d( ictxt, 1, 1, resid, 1, 0, 0 )
305  END IF
306 *
307  ELSE IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
308 *
309  CALL dgerv2d( ictxt, 1, 1, resid1, 1, 0, icurcol )
310  IF( resid.LT.resid1 )
311  $ resid = resid1
312 *
313  END IF
314 *
315  IF( mycol.EQ.icurcol )
316  $ jjx = jjx + jbrhs
317  icurcol = mod( icurcol+1, npcol )
318 *
319  40 CONTINUE
320 *
321  IF( myrow.EQ.0 .AND. mycol.EQ.0 ) THEN
322  CALL dgebs2d( ictxt, 'All', ' ', 1, 1, resid, 1 )
323  ELSE
324  CALL dgebr2d( ictxt, 'All', ' ', 1, 1, resid, 1, 0, 0 )
325  END IF
326 *
327  RETURN
328 *
329 * End of PZLASCHK
330 *
331  END
pbztran
subroutine pbztran(ICONTXT, ADIST, TRANS, M, N, NB, A, LDA, BETA, C, LDC, IAROW, IACOL, ICROW, ICCOL, WORK)
Definition: pbztran.f:3
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pzmatgen
subroutine pzmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pzmatgen.f:4
pzlaschk
subroutine pzlaschk(SYMM, DIAG, N, NRHS, X, IX, JX, DESCX, IASEED, IA, JA, DESCA, IBSEED, ANORM, RESID, WORK)
Definition: pzlaschk.f:4
min
#define min(A, B)
Definition: pcgemr.c:181