SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdnepfchk.f
Go to the documentation of this file.
1 SUBROUTINE pdnepfchk( N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ,
2 $ DESCZ, ANORM, FRESID, WORK )
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 INTEGER IA, IASEED, IZ, JA, JZ, N
11 DOUBLE PRECISION ANORM, FRESID
12* ..
13* .. Array Arguments ..
14 INTEGER DESCA( * ), DESCZ( * )
15 DOUBLE PRECISION A( * ), WORK( * ), Z( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PDNEPFCHK computes the residual
22* || sub(Z)*sub( A )*sub(Z)**T - sub( Ao ) || / (||sub( Ao )||*eps*N),
23* where Ao will be regenerated by the parallel random matrix generator,
24* sub( A ) = A(IA:IA+M-1,JA:JA+N-1), sub( Z ) = Z(IZ:IZ+N-1,JZ:JZ+N-1)
25* and ||.|| stands for the infinity norm.
26*
27* Notes
28* =====
29*
30* Each global data object is described by an associated description
31* vector. This vector stores the information required to establish
32* the mapping between an object element and its corresponding process
33* and memory location.
34*
35* Let A be a generic term for any 2D block cyclicly distributed array.
36* Such a global array has an associated description vector DESCA.
37* In the following comments, the character _ should be read as
38* "of the global array".
39*
40* NOTATION STORED IN EXPLANATION
41* --------------- -------------- --------------------------------------
42* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43* DTYPE_A = 1.
44* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45* the BLACS process grid A is distribu-
46* ted over. The context itself is glo-
47* bal, but the handle (the integer
48* value) may vary.
49* M_A (global) DESCA( M_ ) The number of rows in the global
50* array A.
51* N_A (global) DESCA( N_ ) The number of columns in the global
52* array A.
53* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54* the rows of the array.
55* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56* the columns of the array.
57* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58* row of the array A is distributed.
59* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60* first column of the array A is
61* distributed.
62* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63* array. LLD_A >= MAX(1,LOCr(M_A)).
64*
65* Let K be the number of rows or columns of a distributed matrix,
66* and assume that its process grid has dimension p x q.
67* LOCr( K ) denotes the number of elements of K that a process
68* would receive if K were distributed over the p processes of its
69* process column.
70* Similarly, LOCc( K ) denotes the number of elements of K that a
71* process would receive if K were distributed over the q processes of
72* its process row.
73* The values of LOCr() and LOCc() may be determined via a call to the
74* ScaLAPACK tool function, NUMROC:
75* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77* An upper bound for these quantities may be computed by:
78* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80*
81* Arguments
82* =========
83*
84* N (global input) INTEGER
85* The order of sub( A ) and sub( Z ). N >= 0.
86*
87* A (local input/local output) DOUBLE PRECISION pointer into the
88* local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
89* On entry, this array contains the local pieces of the N-by-N
90* distributed matrix sub( A ) to be checked. On exit, this
91* array contains the local pieces of the difference
92* sub(Z)*sub( A )*sub(Z)**T - sub( Ao ).
93*
94* IA (global input) INTEGER
95* A's global row index, which points to the beginning of the
96* submatrix which is to be operated on.
97*
98* JA (global input) INTEGER
99* A's global column index, which points to the beginning of
100* the submatrix which is to be operated on.
101*
102* DESCA (global and local input) INTEGER array of dimension DLEN_.
103* The array descriptor for the distributed matrix A.
104*
105* IASEED (global input) INTEGER
106* The seed number to generate the original matrix Ao.
107*
108* Z (local input) DOUBLE PRECISION pointer into the local memory
109* to an array of dimension (LLD_Z,LOCc(JZ+N-1)). On entry, this
110* array contains the local pieces of the N-by-N distributed
111* matrix sub( Z ).
112*
113* IZ (global input) INTEGER
114* Z's global row index, which points to the beginning of the
115* submatrix which is to be operated on.
116*
117* JZ (global input) INTEGER
118* Z's global column index, which points to the beginning of
119* the submatrix which is to be operated on.
120*
121* DESCZ (global and local input) INTEGER array of dimension DLEN_.
122* The array descriptor for the distributed matrix Z.
123*
124* ANORM (global input) DOUBLE PRECISION
125* The Infinity norm of sub( A ).
126*
127* FRESID (global output) DOUBLE PRECISION
128* The maximum (worst) factorizational error.
129*
130* WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK).
131* LWORK >= MAX( NpA0 * NB_A, MB_A * NqA0 ) where
132*
133* IROFFA = MOD( IA-1, MB_A ),
134* ICOFFA = MOD( JA-1, NB_A ),
135* IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
136* IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
137* NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ),
138* NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
139*
140* WORK is used to store a block of rows and a block of columns
141* of sub( A ).
142* INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
143* MYCOL, NPROW and NPCOL can be determined by calling the
144* subroutine BLACS_GRIDINFO.
145*
146* =====================================================================
147*
148* .. Parameters ..
149 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
150 $ lld_, mb_, m_, nb_, n_, rsrc_
151 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
152 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
153 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
154 DOUBLE PRECISION ONE, ZERO
155 parameter( one = 1.0d+0, zero = 0.0d+0 )
156* ..
157* .. Local Scalars ..
158 INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IOFFA, IROFF,
159 $ iw, j, jb, jja, jn, lda, ldw, mycol, myrow, np,
160 $ npcol, nprow
161 DOUBLE PRECISION EPS
162* ..
163* .. Local Arrays ..
164 INTEGER DESCW( DLEN_ )
165* ..
166* .. External Subroutines ..
167 EXTERNAL blacs_gridinfo, descset, dmatadd, infog2l,
168 $ pdgemm, pdlacpy, pdlaset, pdmatgen
169* ..
170* .. External Functions ..
171 INTEGER ICEIL, NUMROC
172 DOUBLE PRECISION PDLAMCH, PDLANGE
173 EXTERNAL iceil, numroc, pdlamch, pdlange
174* ..
175* .. Intrinsic Functions ..
176 INTRINSIC max, min, mod
177* ..
178* .. Executable Statements ..
179*
180 ictxt = desca( ctxt_ )
181 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
182 eps = pdlamch( ictxt, 'eps' )
183*
184 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
185 $ iarow, iacol )
186 iroff = mod( ia-1, desca( mb_ ) )
187 np = numroc( n+iroff, desca( mb_ ), myrow, iarow, nprow )
188 IF( myrow.EQ.iarow )
189 $ np = np - iroff
190 ldw = max( 1, np )
191*
192* First compute H <- H * Z**T
193*
194 CALL descset( descw, desca( mb_ ), n, desca( mb_ ), desca( nb_ ),
195 $ iarow, iacol, ictxt, desca( mb_ ) )
196*
197 DO 10 i = ia, ia + n - 1, desca( mb_ )
198 ib = min( ia+n-i, desca( mb_ ) )
199*
200 CALL pdlacpy( 'All', ib, n, a, i, ja, desca, work, 1, 1,
201 $ descw )
202 CALL pdgemm( 'No transpose', 'Transpose', ib, n, n, one, work,
203 $ 1, 1, descw, z, iz, jz, descz, zero, a, i, ja,
204 $ desca )
205*
206 descw( rsrc_ ) = mod( descw( rsrc_ )+1, nprow )
207*
208 10 CONTINUE
209*
210* Then compute H <- Z * H = Z * H0 * Z**T
211*
212 CALL descset( descw, n, desca( nb_ ), desca( mb_ ), desca( nb_ ),
213 $ iarow, iacol, ictxt, ldw )
214*
215 DO 20 j = ja, ja + n - 1, desca( nb_ )
216 jb = min( ja+n-j, desca( nb_ ) )
217*
218 CALL pdlacpy( 'All', n, jb, a, ia, j, desca, work, 1, 1,
219 $ descw )
220 CALL pdgemm( 'No transpose', 'No transpose', n, jb, n, one, z,
221 $ iz, jz, descz, work, 1, 1, descw, zero, a, ia, j,
222 $ desca )
223*
224 descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
225*
226 20 CONTINUE
227*
228* Compute H - H0
229*
230 jn = min( iceil( ja, desca( nb_ ) )*desca( nb_ ), ja+n-1 )
231 lda = desca( lld_ )
232 ioffa = iia + ( jja-1 )*lda
233 iw = 1
234 jb = jn - ja + 1
235 descw( csrc_ ) = iacol
236*
237* Handle first block of columns separately
238*
239 IF( mycol.EQ.descw( csrc_ ) ) THEN
240 CALL pdmatgen( ictxt, 'N', 'N', desca( m_ ), desca( n_ ),
241 $ desca( mb_ ), desca( nb_ ), work, ldw,
242 $ desca( rsrc_ ), desca( csrc_ ), iaseed, iia-1,
243 $ np, jja-1, jb, myrow, mycol, nprow, npcol )
244 CALL pdlaset( 'Lower', max( 0, n-2 ), jb, zero, zero, work,
245 $ min( iw+2, n ), 1, descw )
246 CALL dmatadd( np, jb, -one, work, ldw, one, a( ioffa ), lda )
247 jja = jja + jb
248 ioffa = ioffa + jb*lda
249 END IF
250*
251 iw = iw + desca( mb_ )
252 descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
253*
254 DO 30 j = jn + 1, ja + n - 1, desca( nb_ )
255 jb = min( ja+n-j, desca( nb_ ) )
256*
257 IF( mycol.EQ.descw( csrc_ ) ) THEN
258 CALL pdmatgen( ictxt, 'N', 'N', desca( m_ ), desca( n_ ),
259 $ desca( mb_ ), desca( nb_ ), work, ldw,
260 $ desca( rsrc_ ), desca( csrc_ ), iaseed,
261 $ iia-1, np, jja-1, jb, myrow, mycol, nprow,
262 $ npcol )
263 CALL pdlaset( 'Lower', max( 0, n-iw-1 ), jb, zero, zero,
264 $ work, min( n, iw+2 ), 1, descw )
265 CALL dmatadd( np, jb, -one, work, ldw, one, a( ioffa ),
266 $ lda )
267 jja = jja + jb
268 ioffa = ioffa + jb*lda
269 END IF
270 iw = iw + desca( mb_ )
271 descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
272 30 CONTINUE
273*
274* Calculate factor residual
275*
276 fresid = pdlange( 'I', n, n, a, ia, ja, desca, work ) /
277 $ ( n*eps*anorm )
278*
279 RETURN
280*
281* End PDNEPFCHK
282*
283 END
subroutine pdmatgen(ictxt, aform, diag, m, n, mb, nb, a, lda, iarow, iacol, iseed, iroff, irnum, icoff, icnum, myrow, mycol, nprow, npcol)
Definition pdmatgen.f:4
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine dmatadd(m, n, alpha, a, lda, beta, c, ldc)
Definition dmatadd.f:2
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pdlaset(uplo, m, n, alpha, beta, a, ia, ja, desca)
Definition pdblastst.f:6862
subroutine pdlacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pdlacpy.f:3
subroutine pdnepfchk(n, a, ia, ja, desca, iaseed, z, iz, jz, descz, anorm, fresid, work)
Definition pdnepfchk.f:3