ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pdgerfs.f
Go to the documentation of this file.
1  SUBROUTINE pdgerfs( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF,
2  $ JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX,
3  $ JX, DESCX, FERR, BERR, WORK, LWORK, IWORK,
4  $ LIWORK, INFO )
5 *
6 * -- ScaLAPACK routine (version 1.7) --
7 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8 * and University of California, Berkeley.
9 * November 15, 1997
10 *
11 * .. Scalar Arguments ..
12  CHARACTER TRANS
13  INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX,
14  $ LIWORK, LWORK, N, NRHS
15 * ..
16 * .. Array Arguments ..
17  INTEGER DESCA( * ), DESCAF( * ), DESCB( * ),
18  $ DESCX( * ),IPIV( * ), IWORK( * )
19  DOUBLE PRECISION A( * ), AF( * ), B( * ), BERR( * ), FERR( * ),
20  $ work( * ), x( * )
21 * ..
22 *
23 * Purpose
24 * =======
25 *
26 * PDGERFS improves the computed solution to a system of linear
27 * equations and provides error bounds and backward error estimates for
28 * the solutions.
29 *
30 * Notes
31 * =====
32 *
33 * Each global data object is described by an associated description
34 * vector. This vector stores the information required to establish
35 * the mapping between an object element and its corresponding process
36 * and memory location.
37 *
38 * Let A be a generic term for any 2D block cyclicly distributed array.
39 * Such a global array has an associated description vector DESCA.
40 * In the following comments, the character _ should be read as
41 * "of the global array".
42 *
43 * NOTATION STORED IN EXPLANATION
44 * --------------- -------------- --------------------------------------
45 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46 * DTYPE_A = 1.
47 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48 * the BLACS process grid A is distribu-
49 * ted over. The context itself is glo-
50 * bal, but the handle (the integer
51 * value) may vary.
52 * M_A (global) DESCA( M_ ) The number of rows in the global
53 * array A.
54 * N_A (global) DESCA( N_ ) The number of columns in the global
55 * array A.
56 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57 * the rows of the array.
58 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59 * the columns of the array.
60 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61 * row of the array A is distributed.
62 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63 * first column of the array A is
64 * distributed.
65 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66 * array. LLD_A >= MAX(1,LOCr(M_A)).
67 *
68 * Let K be the number of rows or columns of a distributed matrix,
69 * and assume that its process grid has dimension p x q.
70 * LOCr( K ) denotes the number of elements of K that a process
71 * would receive if K were distributed over the p processes of its
72 * process column.
73 * Similarly, LOCc( K ) denotes the number of elements of K that a
74 * process would receive if K were distributed over the q processes of
75 * its process row.
76 * The values of LOCr() and LOCc() may be determined via a call to the
77 * ScaLAPACK tool function, NUMROC:
78 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80 * An upper bound for these quantities may be computed by:
81 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83 *
84 * In the following comments, sub( A ), sub( X ) and sub( B ) denote
85 * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and
86 * B(IB:IB+N-1,JB:JB+NRHS-1).
87 *
88 * Arguments
89 * =========
90 *
91 * TRANS (global input) CHARACTER*1
92 * Specifies the form of the system of equations.
93 * = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose)
94 * = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose)
95 * = 'C': sub( A )**T * sub( X ) = sub( B )
96 * (Conjugate transpose = Transpose)
97 *
98 *
99 * N (global input) INTEGER
100 * The order of the matrix sub( A ). N >= 0.
101 *
102 * NRHS (global input) INTEGER
103 * The number of right hand sides, i.e., the number of columns
104 * of the matrices sub( B ) and sub( X ). NRHS >= 0.
105 *
106 * A (local input) DOUBLE PRECISION pointer into the local
107 * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)).
108 * This array contains the local pieces of the distributed
109 * matrix sub( A ).
110 *
111 * IA (global input) INTEGER
112 * The row index in the global array A indicating the first
113 * row of sub( A ).
114 *
115 * JA (global input) INTEGER
116 * The column index in the global array A indicating the
117 * first column of sub( A ).
118 *
119 * DESCA (global and local input) INTEGER array of dimension DLEN_.
120 * The array descriptor for the distributed matrix A.
121 *
122 * AF (local input) DOUBLE PRECISION pointer into the local
123 * memory to an array of local dimension (LLD_AF,LOCc(JA+N-1)).
124 * This array contains the local pieces of the distributed
125 * factors of the matrix sub( A ) = P * L * U as computed by
126 * PDGETRF.
127 *
128 * IAF (global input) INTEGER
129 * The row index in the global array AF indicating the first
130 * row of sub( AF ).
131 *
132 * JAF (global input) INTEGER
133 * The column index in the global array AF indicating the
134 * first column of sub( AF ).
135 *
136 * DESCAF (global and local input) INTEGER array of dimension DLEN_.
137 * The array descriptor for the distributed matrix AF.
138 *
139 * IPIV (local input) INTEGER array of dimension LOCr(M_AF)+MB_AF.
140 * This array contains the pivoting information as computed
141 * by PDGETRF. IPIV(i) -> The global row local row i
142 * was swapped with. This array is tied to the distributed
143 * matrix A.
144 *
145 * B (local input) DOUBLE PRECISION pointer into the local
146 * memory to an array of local dimension
147 * (LLD_B,LOCc(JB+NRHS-1)). This array contains the local
148 * pieces of the distributed matrix of right hand sides
149 * sub( B ).
150 *
151 * IB (global input) INTEGER
152 * The row index in the global array B indicating the first
153 * row of sub( B ).
154 *
155 * JB (global input) INTEGER
156 * The column index in the global array B indicating the
157 * first column of sub( B ).
158 *
159 * DESCB (global and local input) INTEGER array of dimension DLEN_.
160 * The array descriptor for the distributed matrix B.
161 *
162 * X (local input and output) DOUBLE PRECISION pointer into the
163 * local memory to an array of local dimension
164 * (LLD_X,LOCc(JX+NRHS-1)). On entry, this array contains
165 * the local pieces of the distributed matrix solution
166 * sub( X ). On exit, the improved solution vectors.
167 *
168 * IX (global input) INTEGER
169 * The row index in the global array X indicating the first
170 * row of sub( X ).
171 *
172 * JX (global input) INTEGER
173 * The column index in the global array X indicating the
174 * first column of sub( X ).
175 *
176 * DESCX (global and local input) INTEGER array of dimension DLEN_.
177 * The array descriptor for the distributed matrix X.
178 *
179 * FERR (local output) DOUBLE PRECISION array of local dimension
180 * LOCc(JB+NRHS-1).
181 * The estimated forward error bound for each solution vector
182 * of sub( X ). If XTRUE is the true solution corresponding
183 * to sub( X ), FERR is an estimated upper bound for the
184 * magnitude of the largest element in (sub( X ) - XTRUE)
185 * divided by the magnitude of the largest element in sub( X ).
186 * The estimate is as reliable as the estimate for RCOND, and
187 * is almost always a slight overestimate of the true error.
188 * This array is tied to the distributed matrix X.
189 *
190 * BERR (local output) DOUBLE PRECISION array of local dimension
191 * LOCc(JB+NRHS-1). The componentwise relative backward
192 * error of each solution vector (i.e., the smallest re-
193 * lative change in any entry of sub( A ) or sub( B )
194 * that makes sub( X ) an exact solution).
195 * This array is tied to the distributed matrix X.
196 *
197 * WORK (local workspace/local output) DOUBLE PRECISION array,
198 * dimension (LWORK)
199 * On exit, WORK(1) returns the minimal and optimal LWORK.
200 *
201 * LWORK (local or global input) INTEGER
202 * The dimension of the array WORK.
203 * LWORK is local input and must be at least
204 * LWORK >= 3*LOCr( N + MOD(IA-1,MB_A) )
205 *
206 * If LWORK = -1, then LWORK is global input and a workspace
207 * query is assumed; the routine only calculates the minimum
208 * and optimal size for all work arrays. Each of these
209 * values is returned in the first entry of the corresponding
210 * work array, and no error message is issued by PXERBLA.
211 *
212 * IWORK (local workspace/local output) INTEGER array,
213 * dimension (LIWORK)
214 * On exit, IWORK(1) returns the minimal and optimal LIWORK.
215 *
216 * LIWORK (local or global input) INTEGER
217 * The dimension of the array IWORK.
218 * LIWORK is local input and must be at least
219 * LIWORK >= LOCr( N + MOD(IB-1,MB_B) ).
220 *
221 * If LIWORK = -1, then LIWORK is global input and a workspace
222 * query is assumed; the routine only calculates the minimum
223 * and optimal size for all work arrays. Each of these
224 * values is returned in the first entry of the corresponding
225 * work array, and no error message is issued by PXERBLA.
226 *
227 *
228 * INFO (global output) INTEGER
229 * = 0: successful exit
230 * < 0: If the i-th argument is an array and the j-entry had
231 * an illegal value, then INFO = -(i*100+j), if the i-th
232 * argument is a scalar and had an illegal value, then
233 * INFO = -i.
234 *
235 * Internal Parameters
236 * ===================
237 *
238 * ITMAX is the maximum number of steps of iterative refinement.
239 *
240 * Notes
241 * =====
242 *
243 * This routine temporarily returns when N <= 1.
244 *
245 * The distributed submatrices op( A ) and op( AF ) (respectively
246 * sub( X ) and sub( B ) ) should be distributed the same way on the
247 * same processes. These conditions ensure that sub( A ) and sub( AF )
248 * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned.
249 *
250 * Moreover, this routine requires the distributed submatrices sub( A ),
251 * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary,
252 * i.e., if f(x,y) = MOD( x-1, y ):
253 * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0,
254 * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0,
255 * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and
256 * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0.
257 *
258 * =====================================================================
259 *
260 * .. Parameters ..
261  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
262  $ LLD_, MB_, M_, NB_, N_, RSRC_
263  PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
264  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
265  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
266  INTEGER ITMAX
267  PARAMETER ( ITMAX = 5 )
268  double precision zero, one
269  parameter( zero = 0.0d+0, one = 1.0d+0 )
270  DOUBLE PRECISION TWO, THREE
271  parameter( two = 2.0d+0, three = 3.0d+0 )
272 * ..
273 * .. Local Scalars ..
274  LOGICAL LQUERY, NOTRAN
275  CHARACTER TRANST
276  INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL,
277  $ ixbrow, ixcol, ixrow, icoffa, icoffaf, icoffb,
278  $ icoffx, ictxt, icurcol, idum, ii, iixb, iiw,
279  $ ioffxb, ipb, ipr, ipv, iroffa, iroffaf, iroffb,
280  $ iroffx, iw, j, jbrhs, jj, jjfbe, jjxb, jn, jw,
281  $ k, kase, ldxb, liwmin, lwmin, mycol, myrhs,
282  $ myrow, np, np0, npcol, npmod, nprow, nz
283  DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN
284 * ..
285 * .. Local Arrays ..
286  INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 )
287 * ..
288 * .. External Functions ..
289  LOGICAL LSAME
290  INTEGER ICEIL, INDXG2P, NUMROC
291  DOUBLE PRECISION PDLAMCH
292  EXTERNAL iceil, indxg2p, lsame, numroc, pdlamch
293 * ..
294 * .. External Subroutines ..
295  EXTERNAL blacs_gridinfo, chk1mat, descset, dgamx2d,
296  $ dgebr2d, dgebs2d, infog2l, pchk2mat,
297  $ pdagemv, pdaxpy, pdcopy, pdgemv,
299 * ..
300 * .. Intrinsic Functions ..
301  INTRINSIC abs, dble, ichar, max, min, mod
302 * ..
303 * .. Executable Statements ..
304 *
305 * Get grid parameters
306 *
307  ictxt = desca( ctxt_ )
308  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
309 *
310 * Test the input parameters.
311 *
312  notran = lsame( trans, 'N' )
313 *
314  info = 0
315  IF( nprow.EQ.-1 ) THEN
316  info = -(700+ctxt_)
317  ELSE
318  CALL chk1mat( n, 2, n, 2, ia, ja, desca, 7, info )
319  CALL chk1mat( n, 2, n, 2, iaf, jaf, descaf, 11, info )
320  CALL chk1mat( n, 2, nrhs, 3, ib, jb, descb, 16, info )
321  CALL chk1mat( n, 2, nrhs, 3, ix, jx, descx, 20, info )
322  IF( info.EQ.0 ) THEN
323  iroffa = mod( ia-1, desca( mb_ ) )
324  icoffa = mod( ja-1, desca( nb_ ) )
325  iroffaf = mod( iaf-1, descaf( mb_ ) )
326  icoffaf = mod( jaf-1, descaf( nb_ ) )
327  iroffb = mod( ib-1, descb( mb_ ) )
328  icoffb = mod( jb-1, descb( nb_ ) )
329  iroffx = mod( ix-1, descx( mb_ ) )
330  icoffx = mod( jx-1, descx( nb_ ) )
331  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
332  $ nprow )
333  iafcol = indxg2p( jaf, descaf( nb_ ), mycol,
334  $ descaf( csrc_ ), npcol )
335  iafrow = indxg2p( iaf, descaf( mb_ ), myrow,
336  $ descaf( rsrc_ ), nprow )
337  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
338  $ npcol )
339  CALL infog2l( ib, jb, descb, nprow, npcol, myrow, mycol,
340  $ iixb, jjxb, ixbrow, ixbcol )
341  ixrow = indxg2p( ix, descx( mb_ ), myrow, descx( rsrc_ ),
342  $ nprow )
343  ixcol = indxg2p( jx, descx( nb_ ), mycol, descx( csrc_ ),
344  $ npcol )
345  npmod = numroc( n+iroffa, desca( mb_ ), myrow, iarow,
346  $ nprow )
347  lwmin = 3 * npmod
348  liwmin = npmod
349  work( 1 ) = dble( lwmin )
350  iwork( 1 ) = liwmin
351  lquery = ( lwork.EQ.-1 .OR. liwork.EQ.-1 )
352 *
353  IF( ( .NOT.notran ) .AND. ( .NOT.lsame( trans, 'T' ) ) .AND.
354  $ ( .NOT.lsame( trans, 'C' ) ) ) THEN
355  info = -1
356  ELSE IF( n.LT.0 ) THEN
357  info = -2
358  ELSE IF( nrhs.LT.0 ) THEN
359  info = -3
360  ELSE IF( iroffa.NE.0 ) THEN
361  info = -5
362  ELSE IF( icoffa.NE.0 ) THEN
363  info = -6
364  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
365  info = -( 700 + nb_ )
366  ELSE IF( desca( mb_ ).NE.descaf( mb_ ) ) THEN
367  info = -( 1100 + mb_ )
368  ELSE IF( iroffaf.NE.0 .OR. iarow.NE.iafrow ) THEN
369  info = -9
370  ELSE IF( desca( nb_ ).NE.descaf( nb_ ) ) THEN
371  info = -( 1100 + nb_ )
372  ELSE IF( icoffaf.NE.0 .OR. iacol.NE.iafcol ) THEN
373  info = -10
374  ELSE IF( ictxt.NE.descaf( ctxt_ ) ) THEN
375  info = -( 1100 + ctxt_ )
376  ELSE IF( iroffa.NE.iroffb .OR. iarow.NE.ixbrow ) THEN
377  info = -14
378  ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
379  info = -( 1600 + mb_ )
380  ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
381  info = -( 1600 + ctxt_ )
382  ELSE IF( descb( mb_ ).NE.descx( mb_ ) ) THEN
383  info = -( 2000 + mb_ )
384  ELSE IF( iroffx.NE.0 .OR. ixbrow.NE.ixrow ) THEN
385  info = -18
386  ELSE IF( descb( nb_ ).NE.descx( nb_ ) ) THEN
387  info = -( 2000 + nb_ )
388  ELSE IF( icoffb.NE.icoffx .OR. ixbcol.NE.ixcol ) THEN
389  info = -19
390  ELSE IF( ictxt.NE.descx( ctxt_ ) ) THEN
391  info = -( 2000 + ctxt_ )
392  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
393  info = -24
394  ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
395  info = -26
396  END IF
397  END IF
398 *
399  IF( notran ) THEN
400  idum1( 1 ) = ichar( 'N' )
401  ELSE IF( lsame( trans, 'T' ) ) THEN
402  idum1( 1 ) = ichar( 'T' )
403  ELSE
404  idum1( 1 ) = ichar( 'C' )
405  END IF
406  idum2( 1 ) = 1
407  idum1( 2 ) = n
408  idum2( 2 ) = 2
409  idum1( 3 ) = nrhs
410  idum2( 3 ) = 3
411  IF( lwork.EQ.-1 ) THEN
412  idum1( 4 ) = -1
413  ELSE
414  idum1( 4 ) = 1
415  END IF
416  idum2( 4 ) = 24
417  IF( liwork.EQ.-1 ) THEN
418  idum1( 5 ) = -1
419  ELSE
420  idum1( 5 ) = 1
421  END IF
422  idum2( 5 ) = 26
423  CALL pchk2mat( n, 2, n, 2, ia, ja, desca, 7, n, 2, n, 2, iaf,
424  $ jaf, descaf, 11, 5, idum1, idum2, info )
425  CALL pchk2mat( n, 2, nrhs, 3, ib, jb, descb, 16, n, 2, nrhs, 3,
426  $ ix, jx, descx, 20, 5, idum1, idum2, info )
427  END IF
428  IF( info.NE.0 ) THEN
429  CALL pxerbla( ictxt, 'PDGERFS', -info )
430  RETURN
431  ELSE IF( lquery ) THEN
432  RETURN
433  END IF
434 *
435  jjfbe = jjxb
436  myrhs = numroc( jb+nrhs-1, descb( nb_ ), mycol, descb( csrc_ ),
437  $ npcol )
438 *
439 * Quick return if possible
440 *
441  IF( n.LE.1 .OR. nrhs.EQ.0 ) THEN
442  DO 10 jj = jjfbe, myrhs
443  ferr( jj ) = zero
444  berr( jj ) = zero
445  10 CONTINUE
446  RETURN
447  END IF
448 *
449  IF( notran ) THEN
450  transt = 'T'
451  ELSE
452  transt = 'N'
453  END IF
454 *
455  np0 = numroc( n+iroffb, descb( mb_ ), myrow, ixbrow, nprow )
456  CALL descset( descw, n+iroffb, 1, desca( mb_ ), 1, ixbrow, ixbcol,
457  $ ictxt, max( 1, np0 ) )
458  ipb = 1
459  ipr = ipb + np0
460  ipv = ipr + np0
461  IF( myrow.EQ.ixbrow ) THEN
462  iiw = 1 + iroffb
463  np = np0 - iroffb
464  ELSE
465  iiw = 1
466  np = np0
467  END IF
468  iw = 1 + iroffb
469  jw = 1
470  ldxb = descb( lld_ )
471  ioffxb = ( jjxb-1 )*ldxb
472 *
473 * NZ = 1 + maximum number of nonzero entries in each row of sub( A )
474 *
475  nz = n + 1
476  eps = pdlamch( ictxt, 'Epsilon' )
477  safmin = pdlamch( ictxt, 'Safe minimum' )
478  safe1 = nz*safmin
479  safe2 = safe1 / eps
480  jn = min( iceil( jb, descb( nb_ ) ) * descb( nb_ ), jb+nrhs-1 )
481 *
482 * Handle first block separately
483 *
484  jbrhs = jn - jb + 1
485  DO 100 k = 0, jbrhs-1
486 *
487  count = 1
488  lstres = three
489  20 CONTINUE
490 *
491 * Loop until stopping criterion is satisfied.
492 *
493 * Compute residual R = sub(B) - op(sub(A)) * sub(X),
494 * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
495 * depending on TRANS.
496 *
497  CALL pdcopy( n, b, ib, jb+k, descb, 1, work( ipr ), iw, jw,
498  $ descw, 1 )
499  CALL pdgemv( trans, n, n, -one, a, ia, ja, desca, x, ix,
500  $ jx+k, descx, 1, one, work( ipr ), iw, jw,
501  $ descw, 1 )
502 *
503 * Compute componentwise relative backward error from formula
504 *
505 * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
506 *
507 * where abs(Z) is the componentwise absolute value of the
508 * matrix or vector Z. If the i-th component of the
509 * denominator is less than SAFE2, then SAFE1 is added to the
510 * i-th components of the numerator and denominator before
511 * dividing.
512 *
513  IF( mycol.EQ.ixbcol ) THEN
514  IF( np.GT.0 ) THEN
515  DO 30 ii = iixb, iixb + np - 1
516  work( iiw+ii-iixb ) = abs( b( ii+ioffxb ) )
517  30 CONTINUE
518  END IF
519  END IF
520 *
521  CALL pdagemv( trans, n, n, one, a, ia, ja, desca, x, ix, jx+k,
522  $ descx, 1, one, work( ipb ), iw, jw, descw, 1 )
523 *
524  s = zero
525  IF( mycol.EQ.ixbcol ) THEN
526  IF( np.GT.0 ) THEN
527  DO 40 ii = iiw-1, iiw+np-2
528  IF( work( ipb+ii ).GT.safe2 ) THEN
529  s = max( s, abs( work( ipr+ii ) ) /
530  $ work( ipb+ii ) )
531  ELSE
532  s = max( s, ( abs( work( ipr+ii ) )+safe1 ) /
533  $ ( work( ipb+ii )+safe1 ) )
534  END IF
535  40 CONTINUE
536  END IF
537  END IF
538 *
539  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
540  $ -1, mycol )
541  IF( mycol.EQ.ixbcol )
542  $ berr( jjfbe ) = s
543 *
544 * Test stopping criterion. Continue iterating if
545 * 1) The residual BERR(J+K) is larger than machine epsilon,
546 * and
547 * 2) BERR(J+K) decreased by at least a factor of 2 during the
548 * last iteration, and
549 * 3) At most ITMAX iterations tried.
550 *
551  IF( s.GT.eps .AND. two*s.LE.lstres .AND. count.LE.itmax ) THEN
552 *
553 * Update solution and try again.
554 *
555  CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
556  $ work( ipr ), iw, jw, descw, info )
557  CALL pdaxpy( n, one, work( ipr ), iw, jw, descw, 1, x, ix,
558  $ jx+k, descx, 1 )
559  lstres = s
560  count = count + 1
561  GO TO 20
562  END IF
563 *
564 * Bound error from formula
565 *
566 * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
567 * norm( abs(inv(op(sub(A))))*
568 * ( abs(R) + NZ*EPS*(
569 * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
570 *
571 * where
572 * norm(Z) is the magnitude of the largest component of Z
573 * inv(op(sub(A))) is the inverse of op(sub(A))
574 * abs(Z) is the componentwise absolute value of the matrix
575 * or vector Z
576 * NZ is the maximum number of nonzeros in any row of sub(A),
577 * plus 1
578 * EPS is machine epsilon
579 *
580 * The i-th component of
581 * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
582 * is incremented by SAFE1 if the i-th component of
583 * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
584 * SAFE2.
585 *
586 * Use PDLACON to estimate the infinity-norm of the matrix
587 * inv(op(sub(A))) * diag(W), where
588 * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
589 *
590  IF( mycol.EQ.ixbcol ) THEN
591  IF( np.GT.0 ) THEN
592  DO 50 ii = iiw-1, iiw+np-2
593  IF( work( ipb+ii ).GT.safe2 ) THEN
594  work( ipb+ii ) = abs( work( ipr+ii ) ) +
595  $ nz*eps*work( ipb+ii )
596  ELSE
597  work( ipb+ii ) = abs( work( ipr+ii ) ) +
598  $ nz*eps*work( ipb+ii ) + safe1
599  END IF
600  50 CONTINUE
601  END IF
602  END IF
603 *
604  kase = 0
605  60 CONTINUE
606  IF( mycol.EQ.ixbcol ) THEN
607  CALL dgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
608  $ descw( lld_ ) )
609  ELSE
610  CALL dgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
611  $ descw( lld_ ), myrow, ixbcol )
612  END IF
613  descw( csrc_ ) = mycol
614  CALL pdlacon( n, work( ipv ), iw, jw, descw, work( ipr ),
615  $ iw, jw, descw, iwork, est, kase )
616  descw( csrc_ ) = ixbcol
617 *
618  IF( kase.NE.0 ) THEN
619  IF( kase.EQ.1 ) THEN
620 *
621 * Multiply by diag(W)*inv(op(sub(A))').
622 *
623  CALL pdgetrs( transt, n, 1, af, iaf, jaf, descaf,
624  $ ipiv, work( ipr ), iw, jw, descw, info )
625 *
626  IF( mycol.EQ.ixbcol ) THEN
627  IF( np.GT.0 ) THEN
628  DO 70 ii = iiw-1, iiw+np-2
629  work( ipr+ii ) = work( ipb+ii )*work( ipr+ii )
630  70 CONTINUE
631  END IF
632  END IF
633  ELSE
634 *
635 * Multiply by inv(op(sub(A)))*diag(W).
636 *
637  IF( mycol.EQ.ixbcol ) THEN
638  IF( np.GT.0 ) THEN
639  DO 80 ii = iiw-1, iiw+np-2
640  work( ipr+ii ) = work( ipb+ii )*work( ipr+ii )
641  80 CONTINUE
642  END IF
643  END IF
644 *
645  CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
646  $ work( ipr ), iw, jw, descw, info )
647  END IF
648  GO TO 60
649  END IF
650 *
651 * Normalize error.
652 *
653  lstres = zero
654  IF( mycol.EQ.ixbcol ) THEN
655  IF( np.GT.0 ) THEN
656  DO 90 ii = iixb, iixb+np-1
657  lstres = max( lstres, abs( x( ioffxb+ii ) ) )
658  90 CONTINUE
659  END IF
660  CALL dgamx2d( ictxt, 'Column', ' ', 1, 1, lstres, 1, idum,
661  $ idum, 1, -1, mycol )
662  IF( lstres.NE.zero )
663  $ ferr( jjfbe ) = est / lstres
664 *
665  jjxb = jjxb + 1
666  jjfbe = jjfbe + 1
667  ioffxb = ioffxb + ldxb
668 *
669  END IF
670 *
671  100 CONTINUE
672 *
673  icurcol = mod( ixbcol+1, npcol )
674 *
675 * Do for each right hand side
676 *
677  DO 200 j = jn+1, jb+nrhs-1, descb( nb_ )
678  jbrhs = min( jb+nrhs-j, descb( nb_ ) )
679  descw( csrc_ ) = icurcol
680 *
681  DO 190 k = 0, jbrhs-1
682 *
683  count = 1
684  lstres = three
685  110 CONTINUE
686 *
687 * Loop until stopping criterion is satisfied.
688 *
689 * Compute residual R = sub(B) - op(sub(A)) * sub(X),
690 * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
691 * depending on TRANS.
692 *
693  CALL pdcopy( n, b, ib, j+k, descb, 1, work( ipr ), iw, jw,
694  $ descw, 1 )
695  CALL pdgemv( trans, n, n, -one, a, ia, ja, desca, x,
696  $ ix, j+k, descx, 1, one, work( ipr ), iw, jw,
697  $ descw, 1 )
698 *
699 * Compute componentwise relative backward error from formula
700 *
701 * max(i) (abs(R(i))/(abs(op(sub(A)))*abs(sub(X)) +
702 * abs(sub(B)))(i))
703 *
704 * where abs(Z) is the componentwise absolute value of the
705 * matrix or vector Z. If the i-th component of the
706 * denominator is less than SAFE2, then SAFE1 is added to the
707 * i-th components of the numerator and denominator before
708 * dividing.
709 *
710  IF( mycol.EQ.icurcol ) THEN
711  IF( np.GT.0 ) THEN
712  DO 120 ii = iixb, iixb+np-1
713  work( iiw+ii-iixb ) = abs( b( ii+ioffxb ) )
714  120 CONTINUE
715  END IF
716  END IF
717 *
718  CALL pdagemv( trans, n, n, one, a, ia, ja, desca, x, ix,
719  $ j+k, descx, 1, one, work( ipb ), iw, jw,
720  $ descw, 1 )
721 *
722  s = zero
723  IF( mycol.EQ.icurcol ) THEN
724  IF( np.GT.0 )THEN
725  DO 130 ii = iiw-1, iiw+np-2
726  IF( work( ipb+ii ).GT.safe2 ) THEN
727  s = max( s, abs( work( ipr+ii ) ) /
728  $ work( ipb+ii ) )
729  ELSE
730  s = max( s, ( abs( work( ipr+ii ) )+safe1 ) /
731  $ ( work( ipb+ii )+safe1 ) )
732  END IF
733  130 CONTINUE
734  END IF
735  END IF
736 *
737  CALL dgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
738  $ -1, mycol )
739  IF( mycol.EQ.icurcol )
740  $ berr( jjfbe ) = s
741 *
742 * Test stopping criterion. Continue iterating if
743 * 1) The residual BERR(J+K) is larger than machine epsilon,
744 * and
745 * 2) BERR(J+K) decreased by at least a factor of 2 during
746 * the last iteration, and
747 * 3) At most ITMAX iterations tried.
748 *
749  IF( s.GT.eps .AND. two*s.LE.lstres .AND.
750  $ count.LE.itmax ) THEN
751 *
752 * Update solution and try again.
753 *
754  CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
755  $ work( ipr ), iw, jw, descw, info )
756  CALL pdaxpy( n, one, work( ipr ), iw, jw, descw, 1, x,
757  $ ix, j+k, descx, 1 )
758  lstres = s
759  count = count + 1
760  GO TO 110
761  END IF
762 *
763 * Bound error from formula
764 *
765 * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
766 * norm( abs(inv(op(sub(A))))*
767 * ( abs(R) + NZ*EPS*(
768 * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
769 *
770 * where
771 * norm(Z) is the magnitude of the largest component of Z
772 * inv(op(sub(A))) is the inverse of op(sub(A))
773 * abs(Z) is the componentwise absolute value of the matrix
774 * or vector Z
775 * NZ is the maximum number of nonzeros in any row of sub(A),
776 * plus 1
777 * EPS is machine epsilon
778 *
779 * The i-th component of
780 * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
781 * is incremented by SAFE1 if the i-th component of
782 * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
783 * SAFE2.
784 *
785 * Use PDLACON to estimate the infinity-norm of the matrix
786 * inv(op(sub(A))) * diag(W), where
787 * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
788 *
789  IF( mycol.EQ.icurcol ) THEN
790  IF( np.GT.0 ) THEN
791  DO 140 ii = iiw-1, iiw+np-2
792  IF( work( ipb+ii ).GT.safe2 ) THEN
793  work( ipb+ii ) = abs( work( ipr+ii ) ) +
794  $ nz*eps*work( ipb+ii )
795  ELSE
796  work( ipb+ii ) = abs( work( ipr+ii ) ) +
797  $ nz*eps*work( ipb+ii ) + safe1
798  END IF
799  140 CONTINUE
800  END IF
801  END IF
802 *
803  kase = 0
804  150 CONTINUE
805  IF( mycol.EQ.icurcol ) THEN
806  CALL dgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
807  $ descw( lld_ ) )
808  ELSE
809  CALL dgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
810  $ descw( lld_ ), myrow, icurcol )
811  END IF
812  descw( csrc_ ) = mycol
813  CALL pdlacon( n, work( ipv ), iw, jw, descw, work( ipr ),
814  $ iw, jw, descw, iwork, est, kase )
815  descw( csrc_ ) = icurcol
816 *
817  IF( kase.NE.0 ) THEN
818  IF( kase.EQ.1 ) THEN
819 *
820 * Multiply by diag(W)*inv(op(sub(A))').
821 *
822  CALL pdgetrs( transt, n, 1, af, iaf, jaf, descaf,
823  $ ipiv, work( ipr ), iw, jw, descw, info )
824 *
825  IF( mycol.EQ.icurcol ) THEN
826  IF( np.GT.0 ) THEN
827  DO 160 ii = iiw-1, iiw+np-2
828  work( ipr+ii ) = work( ipb+ii )*
829  $ work( ipr+ii )
830  160 CONTINUE
831  END IF
832  END IF
833  ELSE
834 *
835 * Multiply by inv(op(sub(A)))*diag(W).
836 *
837  IF( mycol.EQ.icurcol ) THEN
838  IF( np.GT.0 ) THEN
839  DO 170 ii = iiw-1, iiw+np-2
840  work( ipr+ii ) = work( ipb+ii )*
841  $ work( ipr+ii )
842  170 CONTINUE
843  END IF
844  END IF
845 *
846  CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf,
847  $ ipiv, work( ipr ), iw, jw, descw,
848  $ info )
849  END IF
850  GO TO 150
851  END IF
852 *
853 * Normalize error.
854 *
855  lstres = zero
856  IF( mycol.EQ.icurcol ) THEN
857  IF( np.GT.0 ) THEN
858  DO 180 ii = iixb, iixb+np-1
859  lstres = max( lstres, abs( x( ioffxb+ii ) ) )
860  180 CONTINUE
861  END IF
862  CALL dgamx2d( ictxt, 'Column', ' ', 1, 1, lstres,
863  $ 1, idum, idum, 1, -1, mycol )
864  IF( lstres.NE.zero )
865  $ ferr( jjfbe ) = est / lstres
866 *
867  jjxb = jjxb + 1
868  jjfbe = jjfbe + 1
869  ioffxb = ioffxb + ldxb
870 *
871  END IF
872 *
873  190 CONTINUE
874 *
875  icurcol = mod( icurcol+1, npcol )
876 *
877  200 CONTINUE
878 *
879  work( 1 ) = dble( lwmin )
880  iwork( 1 ) = liwmin
881 *
882  RETURN
883 *
884 * End of PDGERFS
885 *
886  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pdgerfs
subroutine pdgerfs(TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, BERR, WORK, LWORK, IWORK, LIWORK, INFO)
Definition: pdgerfs.f:5
pdlacon
subroutine pdlacon(N, V, IV, JV, DESCV, X, IX, JX, DESCX, ISGN, EST, KASE)
Definition: pdlacon.f:3
pdgetrs
subroutine pdgetrs(TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO)
Definition: pdgetrs.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181