ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdnepfchk.f
Go to the documentation of this file.
1  SUBROUTINE pdnepfchk( N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ,
2  $ DESCZ, ANORM, FRESID, WORK )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  INTEGER IA, IASEED, IZ, JA, JZ, N
11  DOUBLE PRECISION ANORM, FRESID
12 * ..
13 * .. Array Arguments ..
14  INTEGER DESCA( * ), DESCZ( * )
15  DOUBLE PRECISION A( * ), WORK( * ), Z( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PDNEPFCHK computes the residual
22 * || sub(Z)*sub( A )*sub(Z)**T - sub( Ao ) || / (||sub( Ao )||*eps*N),
23 * where Ao will be regenerated by the parallel random matrix generator,
24 * sub( A ) = A(IA:IA+M-1,JA:JA+N-1), sub( Z ) = Z(IZ:IZ+N-1,JZ:JZ+N-1)
25 * and ||.|| stands for the infinity norm.
26 *
27 * Notes
28 * =====
29 *
30 * Each global data object is described by an associated description
31 * vector. This vector stores the information required to establish
32 * the mapping between an object element and its corresponding process
33 * and memory location.
34 *
35 * Let A be a generic term for any 2D block cyclicly distributed array.
36 * Such a global array has an associated description vector DESCA.
37 * In the following comments, the character _ should be read as
38 * "of the global array".
39 *
40 * NOTATION STORED IN EXPLANATION
41 * --------------- -------------- --------------------------------------
42 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
43 * DTYPE_A = 1.
44 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
45 * the BLACS process grid A is distribu-
46 * ted over. The context itself is glo-
47 * bal, but the handle (the integer
48 * value) may vary.
49 * M_A (global) DESCA( M_ ) The number of rows in the global
50 * array A.
51 * N_A (global) DESCA( N_ ) The number of columns in the global
52 * array A.
53 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
54 * the rows of the array.
55 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
56 * the columns of the array.
57 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
58 * row of the array A is distributed.
59 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
60 * first column of the array A is
61 * distributed.
62 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
63 * array. LLD_A >= MAX(1,LOCr(M_A)).
64 *
65 * Let K be the number of rows or columns of a distributed matrix,
66 * and assume that its process grid has dimension p x q.
67 * LOCr( K ) denotes the number of elements of K that a process
68 * would receive if K were distributed over the p processes of its
69 * process column.
70 * Similarly, LOCc( K ) denotes the number of elements of K that a
71 * process would receive if K were distributed over the q processes of
72 * its process row.
73 * The values of LOCr() and LOCc() may be determined via a call to the
74 * ScaLAPACK tool function, NUMROC:
75 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
76 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
77 * An upper bound for these quantities may be computed by:
78 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
79 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
80 *
81 * Arguments
82 * =========
83 *
84 * N (global input) INTEGER
85 * The order of sub( A ) and sub( Z ). N >= 0.
86 *
87 * A (local input/local output) DOUBLE PRECISION pointer into the
88 * local memory to an array of dimension (LLD_A,LOCc(JA+N-1)).
89 * On entry, this array contains the local pieces of the N-by-N
90 * distributed matrix sub( A ) to be checked. On exit, this
91 * array contains the local pieces of the difference
92 * sub(Z)*sub( A )*sub(Z)**T - sub( Ao ).
93 *
94 * IA (global input) INTEGER
95 * A's global row index, which points to the beginning of the
96 * submatrix which is to be operated on.
97 *
98 * JA (global input) INTEGER
99 * A's global column index, which points to the beginning of
100 * the submatrix which is to be operated on.
101 *
102 * DESCA (global and local input) INTEGER array of dimension DLEN_.
103 * The array descriptor for the distributed matrix A.
104 *
105 * IASEED (global input) INTEGER
106 * The seed number to generate the original matrix Ao.
107 *
108 * Z (local input) DOUBLE PRECISION pointer into the local memory
109 * to an array of dimension (LLD_Z,LOCc(JZ+N-1)). On entry, this
110 * array contains the local pieces of the N-by-N distributed
111 * matrix sub( Z ).
112 *
113 * IZ (global input) INTEGER
114 * Z's global row index, which points to the beginning of the
115 * submatrix which is to be operated on.
116 *
117 * JZ (global input) INTEGER
118 * Z's global column index, which points to the beginning of
119 * the submatrix which is to be operated on.
120 *
121 * DESCZ (global and local input) INTEGER array of dimension DLEN_.
122 * The array descriptor for the distributed matrix Z.
123 *
124 * ANORM (global input) DOUBLE PRECISION
125 * The Infinity norm of sub( A ).
126 *
127 * FRESID (global output) DOUBLE PRECISION
128 * The maximum (worst) factorizational error.
129 *
130 * WORK (local workspace) DOUBLE PRECISION array, dimension (LWORK).
131 * LWORK >= MAX( NpA0 * NB_A, MB_A * NqA0 ) where
132 *
133 * IROFFA = MOD( IA-1, MB_A ),
134 * ICOFFA = MOD( JA-1, NB_A ),
135 * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
136 * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
137 * NpA0 = NUMROC( N+IROFFA, MB_A, MYROW, IAROW, NPROW ),
138 * NqA0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
139 *
140 * WORK is used to store a block of rows and a block of columns
141 * of sub( A ).
142 * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
143 * MYCOL, NPROW and NPCOL can be determined by calling the
144 * subroutine BLACS_GRIDINFO.
145 *
146 * =====================================================================
147 *
148 * .. Parameters ..
149  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
150  $ lld_, mb_, m_, nb_, n_, rsrc_
151  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
152  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
153  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
154  DOUBLE PRECISION ONE, ZERO
155  parameter( one = 1.0d+0, zero = 0.0d+0 )
156 * ..
157 * .. Local Scalars ..
158  INTEGER I, IACOL, IAROW, IB, ICTXT, IIA, IOFFA, IROFF,
159  $ iw, j, jb, jja, jn, lda, ldw, mycol, myrow, np,
160  $ npcol, nprow
161  DOUBLE PRECISION EPS
162 * ..
163 * .. Local Arrays ..
164  INTEGER DESCW( DLEN_ )
165 * ..
166 * .. External Subroutines ..
167  EXTERNAL blacs_gridinfo, descset, dmatadd, infog2l,
168  $ pdgemm, pdlacpy, pdlaset, pdmatgen
169 * ..
170 * .. External Functions ..
171  INTEGER ICEIL, NUMROC
172  DOUBLE PRECISION PDLAMCH, PDLANGE
173  EXTERNAL iceil, numroc, pdlamch, pdlange
174 * ..
175 * .. Intrinsic Functions ..
176  INTRINSIC max, min, mod
177 * ..
178 * .. Executable Statements ..
179 *
180  ictxt = desca( ctxt_ )
181  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
182  eps = pdlamch( ictxt, 'eps' )
183 *
184  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia, jja,
185  $ iarow, iacol )
186  iroff = mod( ia-1, desca( mb_ ) )
187  np = numroc( n+iroff, desca( mb_ ), myrow, iarow, nprow )
188  IF( myrow.EQ.iarow )
189  $ np = np - iroff
190  ldw = max( 1, np )
191 *
192 * First compute H <- H * Z**T
193 *
194  CALL descset( descw, desca( mb_ ), n, desca( mb_ ), desca( nb_ ),
195  $ iarow, iacol, ictxt, desca( mb_ ) )
196 *
197  DO 10 i = ia, ia + n - 1, desca( mb_ )
198  ib = min( ia+n-i, desca( mb_ ) )
199 *
200  CALL pdlacpy( 'All', ib, n, a, i, ja, desca, work, 1, 1,
201  $ descw )
202  CALL pdgemm( 'No transpose', 'Transpose', ib, n, n, one, work,
203  $ 1, 1, descw, z, iz, jz, descz, zero, a, i, ja,
204  $ desca )
205 *
206  descw( rsrc_ ) = mod( descw( rsrc_ )+1, nprow )
207 *
208  10 CONTINUE
209 *
210 * Then compute H <- Z * H = Z * H0 * Z**T
211 *
212  CALL descset( descw, n, desca( nb_ ), desca( mb_ ), desca( nb_ ),
213  $ iarow, iacol, ictxt, ldw )
214 *
215  DO 20 j = ja, ja + n - 1, desca( nb_ )
216  jb = min( ja+n-j, desca( nb_ ) )
217 *
218  CALL pdlacpy( 'All', n, jb, a, ia, j, desca, work, 1, 1,
219  $ descw )
220  CALL pdgemm( 'No transpose', 'No transpose', n, jb, n, one, z,
221  $ iz, jz, descz, work, 1, 1, descw, zero, a, ia, j,
222  $ desca )
223 *
224  descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
225 *
226  20 CONTINUE
227 *
228 * Compute H - H0
229 *
230  jn = min( iceil( ja, desca( nb_ ) )*desca( nb_ ), ja+n-1 )
231  lda = desca( lld_ )
232  ioffa = iia + ( jja-1 )*lda
233  iw = 1
234  jb = jn - ja + 1
235  descw( csrc_ ) = iacol
236 *
237 * Handle first block of columns separately
238 *
239  IF( mycol.EQ.descw( csrc_ ) ) THEN
240  CALL pdmatgen( ictxt, 'N', 'N', desca( m_ ), desca( n_ ),
241  $ desca( mb_ ), desca( nb_ ), work, ldw,
242  $ desca( rsrc_ ), desca( csrc_ ), iaseed, iia-1,
243  $ np, jja-1, jb, myrow, mycol, nprow, npcol )
244  CALL pdlaset( 'Lower', max( 0, n-2 ), jb, zero, zero, work,
245  $ min( iw+2, n ), 1, descw )
246  CALL dmatadd( np, jb, -one, work, ldw, one, a( ioffa ), lda )
247  jja = jja + jb
248  ioffa = ioffa + jb*lda
249  END IF
250 *
251  iw = iw + desca( mb_ )
252  descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
253 *
254  DO 30 j = jn + 1, ja + n - 1, desca( nb_ )
255  jb = min( ja+n-j, desca( nb_ ) )
256 *
257  IF( mycol.EQ.descw( csrc_ ) ) THEN
258  CALL pdmatgen( ictxt, 'N', 'N', desca( m_ ), desca( n_ ),
259  $ desca( mb_ ), desca( nb_ ), work, ldw,
260  $ desca( rsrc_ ), desca( csrc_ ), iaseed,
261  $ iia-1, np, jja-1, jb, myrow, mycol, nprow,
262  $ npcol )
263  CALL pdlaset( 'Lower', max( 0, n-iw-1 ), jb, zero, zero,
264  $ work, min( n, iw+2 ), 1, descw )
265  CALL dmatadd( np, jb, -one, work, ldw, one, a( ioffa ),
266  $ lda )
267  jja = jja + jb
268  ioffa = ioffa + jb*lda
269  END IF
270  iw = iw + desca( mb_ )
271  descw( csrc_ ) = mod( descw( csrc_ )+1, npcol )
272  30 CONTINUE
273 *
274 * Calculate factor residual
275 *
276  fresid = pdlange( 'I', n, n, a, ia, ja, desca, work ) /
277  $ ( n*eps*anorm )
278 *
279  RETURN
280 *
281 * End PDNEPFCHK
282 *
283  END
dmatadd
subroutine dmatadd(M, N, ALPHA, A, LDA, BETA, C, LDC)
Definition: dmatadd.f:2
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pdnepfchk
subroutine pdnepfchk(N, A, IA, JA, DESCA, IASEED, Z, IZ, JZ, DESCZ, ANORM, FRESID, WORK)
Definition: pdnepfchk.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
pdmatgen
subroutine pdmatgen(ICTXT, AFORM, DIAG, M, N, MB, NB, A, LDA, IAROW, IACOL, ISEED, IROFF, IRNUM, ICOFF, ICNUM, MYROW, MYCOL, NPROW, NPCOL)
Definition: pdmatgen.f:4
pdlaset
subroutine pdlaset(UPLO, M, N, ALPHA, BETA, A, IA, JA, DESCA)
Definition: pdblastst.f:6862
pdlacpy
subroutine pdlacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pdlacpy.f:3
min
#define min(A, B)
Definition: pcgemr.c:181