ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pcgerfs.f
Go to the documentation of this file.
1  SUBROUTINE pcgerfs( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF,
2  $ JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX,
3  $ JX, DESCX, FERR, BERR, WORK, LWORK, RWORK,
4  $ LRWORK, INFO )
5 *
6 * -- ScaLAPACK routine (version 1.7) --
7 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8 * and University of California, Berkeley.
9 * November 15, 1997
10 *
11 * .. Scalar Arguments ..
12  CHARACTER TRANS
13  INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX,
14  $ LRWORK, LWORK, N, NRHS
15 * ..
16 * .. Array Arguments ..
17  INTEGER DESCA( * ), DESCAF( * ), DESCB( * ),
18  $ DESCX( * ), IPIV( * )
19  REAL BERR( * ), FERR( * ), RWORK( * )
20  COMPLEX A( * ), AF( * ), B( * ), WORK( * ), X( * )
21 * ..
22 *
23 * Purpose
24 * =======
25 *
26 * PCGERFS improves the computed solution to a system of linear
27 * equations and provides error bounds and backward error estimates for
28 * the solutions.
29 *
30 * Notes
31 * =====
32 *
33 * Each global data object is described by an associated description
34 * vector. This vector stores the information required to establish
35 * the mapping between an object element and its corresponding process
36 * and memory location.
37 *
38 * Let A be a generic term for any 2D block cyclicly distributed array.
39 * Such a global array has an associated description vector DESCA.
40 * In the following comments, the character _ should be read as
41 * "of the global array".
42 *
43 * NOTATION STORED IN EXPLANATION
44 * --------------- -------------- --------------------------------------
45 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46 * DTYPE_A = 1.
47 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48 * the BLACS process grid A is distribu-
49 * ted over. The context itself is glo-
50 * bal, but the handle (the integer
51 * value) may vary.
52 * M_A (global) DESCA( M_ ) The number of rows in the global
53 * array A.
54 * N_A (global) DESCA( N_ ) The number of columns in the global
55 * array A.
56 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57 * the rows of the array.
58 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59 * the columns of the array.
60 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61 * row of the array A is distributed.
62 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63 * first column of the array A is
64 * distributed.
65 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66 * array. LLD_A >= MAX(1,LOCr(M_A)).
67 *
68 * Let K be the number of rows or columns of a distributed matrix,
69 * and assume that its process grid has dimension p x q.
70 * LOCr( K ) denotes the number of elements of K that a process
71 * would receive if K were distributed over the p processes of its
72 * process column.
73 * Similarly, LOCc( K ) denotes the number of elements of K that a
74 * process would receive if K were distributed over the q processes of
75 * its process row.
76 * The values of LOCr() and LOCc() may be determined via a call to the
77 * ScaLAPACK tool function, NUMROC:
78 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80 * An upper bound for these quantities may be computed by:
81 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83 *
84 * In the following comments, sub( A ), sub( X ) and sub( B ) denote
85 * respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and
86 * B(IB:IB+N-1,JB:JB+NRHS-1).
87 *
88 * Arguments
89 * =========
90 *
91 * TRANS (global input) CHARACTER*1
92 * Specifies the form of the system of equations.
93 * = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose)
94 * = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose)
95 * = 'C': sub( A )**H * sub( X ) = sub( B )
96 * (Conjugate transpose)
97 *
98 * N (global input) INTEGER
99 * The order of the matrix sub( A ). N >= 0.
100 *
101 * NRHS (global input) INTEGER
102 * The number of right hand sides, i.e., the number of columns
103 * of the matrices sub( B ) and sub( X ). NRHS >= 0.
104 *
105 * A (local input) COMPLEX pointer into the local
106 * memory to an array of local dimension (LLD_A,LOCc(JA+N-1)).
107 * This array contains the local pieces of the distributed
108 * matrix sub( A ).
109 *
110 * IA (global input) INTEGER
111 * The row index in the global array A indicating the first
112 * row of sub( A ).
113 *
114 * JA (global input) INTEGER
115 * The column index in the global array A indicating the
116 * first column of sub( A ).
117 *
118 * DESCA (global and local input) INTEGER array of dimension DLEN_.
119 * The array descriptor for the distributed matrix A.
120 *
121 * AF (local input) COMPLEX pointer into the local
122 * memory to an array of local dimension (LLD_AF,LOCc(JA+N-1)).
123 * This array contains the local pieces of the distributed
124 * factors of the matrix sub( A ) = P * L * U as computed by
125 * PCGETRF.
126 *
127 * IAF (global input) INTEGER
128 * The row index in the global array AF indicating the first
129 * row of sub( AF ).
130 *
131 * JAF (global input) INTEGER
132 * The column index in the global array AF indicating the
133 * first column of sub( AF ).
134 *
135 * DESCAF (global and local input) INTEGER array of dimension DLEN_.
136 * The array descriptor for the distributed matrix AF.
137 *
138 * IPIV (local input) INTEGER array of dimension LOCr(M_AF)+MB_AF.
139 * This array contains the pivoting information as computed
140 * by PCGETRF. IPIV(i) -> The global row local row i
141 * was swapped with. This array is tied to the distributed
142 * matrix A.
143 *
144 * B (local input) COMPLEX pointer into the local
145 * memory to an array of local dimension
146 * (LLD_B,LOCc(JB+NRHS-1)). This array contains the local
147 * pieces of the distributed matrix of right hand sides
148 * sub( B ).
149 *
150 * IB (global input) INTEGER
151 * The row index in the global array B indicating the first
152 * row of sub( B ).
153 *
154 * JB (global input) INTEGER
155 * The column index in the global array B indicating the
156 * first column of sub( B ).
157 *
158 * DESCB (global and local input) INTEGER array of dimension DLEN_.
159 * The array descriptor for the distributed matrix B.
160 *
161 * X (local input and output) COMPLEX pointer into the
162 * local memory to an array of local dimension
163 * (LLD_X,LOCc(JX+NRHS-1)). On entry, this array contains
164 * the local pieces of the distributed matrix solution
165 * sub( X ). On exit, the improved solution vectors.
166 *
167 * IX (global input) INTEGER
168 * The row index in the global array X indicating the first
169 * row of sub( X ).
170 *
171 * JX (global input) INTEGER
172 * The column index in the global array X indicating the
173 * first column of sub( X ).
174 *
175 * DESCX (global and local input) INTEGER array of dimension DLEN_.
176 * The array descriptor for the distributed matrix X.
177 *
178 * FERR (local output) REAL array of local dimension
179 * LOCc(JB+NRHS-1).
180 * The estimated forward error bound for each solution vector
181 * of sub( X ). If XTRUE is the true solution corresponding
182 * to sub( X ), FERR is an estimated upper bound for the
183 * magnitude of the largest element in (sub( X ) - XTRUE)
184 * divided by the magnitude of the largest element in sub( X ).
185 * The estimate is as reliable as the estimate for RCOND, and
186 * is almost always a slight overestimate of the true error.
187 * This array is tied to the distributed matrix X.
188 *
189 * BERR (local output) REAL array of local dimension
190 * LOCc(JB+NRHS-1). The componentwise relative backward
191 * error of each solution vector (i.e., the smallest re-
192 * lative change in any entry of sub( A ) or sub( B )
193 * that makes sub( X ) an exact solution).
194 * This array is tied to the distributed matrix X.
195 *
196 * WORK (local workspace/local output) COMPLEX array,
197 * dimension (LWORK)
198 * On exit, WORK(1) returns the minimal and optimal LWORK.
199 *
200 * LWORK (local or global input) INTEGER
201 * The dimension of the array WORK.
202 * LWORK is local input and must be at least
203 * LWORK >= 2*LOCr( N + MOD(IA-1,MB_A) )
204 *
205 * If LWORK = -1, then LWORK is global input and a workspace
206 * query is assumed; the routine only calculates the minimum
207 * and optimal size for all work arrays. Each of these
208 * values is returned in the first entry of the corresponding
209 * work array, and no error message is issued by PXERBLA.
210 *
211 * RWORK (local workspace/local output) REAL array,
212 * dimension (LRWORK)
213 * On exit, RWORK(1) returns the minimal and optimal LRWORK.
214 *
215 * LRWORK (local or global input) INTEGER
216 * The dimension of the array RWORK.
217 * LRWORK is local input and must be at least
218 * LRWORK >= LOCr( N + MOD(IB-1,MB_B) ).
219 *
220 * If LRWORK = -1, then LRWORK is global input and a workspace
221 * query is assumed; the routine only calculates the minimum
222 * and optimal size for all work arrays. Each of these
223 * values is returned in the first entry of the corresponding
224 * work array, and no error message is issued by PXERBLA.
225 *
226 *
227 * INFO (global output) INTEGER
228 * = 0: successful exit
229 * < 0: If the i-th argument is an array and the j-entry had
230 * an illegal value, then INFO = -(i*100+j), if the i-th
231 * argument is a scalar and had an illegal value, then
232 * INFO = -i.
233 *
234 * Internal Parameters
235 * ===================
236 *
237 * ITMAX is the maximum number of steps of iterative refinement.
238 *
239 * Notes
240 * =====
241 *
242 * This routine temporarily returns when N <= 1.
243 *
244 * The distributed submatrices op( A ) and op( AF ) (respectively
245 * sub( X ) and sub( B ) ) should be distributed the same way on the
246 * same processes. These conditions ensure that sub( A ) and sub( AF )
247 * (resp. sub( X ) and sub( B ) ) are "perfectly" aligned.
248 *
249 * Moreover, this routine requires the distributed submatrices sub( A ),
250 * sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary,
251 * i.e., if f(x,y) = MOD( x-1, y ):
252 * f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0,
253 * f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0,
254 * f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and
255 * f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0.
256 *
257 * =====================================================================
258 *
259 * .. Parameters ..
260  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
261  $ LLD_, MB_, M_, NB_, N_, RSRC_
262  PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
263  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
264  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
265  INTEGER ITMAX
266  PARAMETER ( ITMAX = 5 )
267  real zero, rone, two, three
268  parameter( zero = 0.0e+0, rone = 1.0e+0, two = 2.0e+0,
269  $ three = 3.0e+0 )
270  COMPLEX ONE
271  parameter( one = ( 1.0e+0, 0.0e+0 ) )
272 * ..
273 * .. Local Scalars ..
274  LOGICAL LQUERY, NOTRAN
275  CHARACTER TRANSN, TRANST
276  INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL,
277  $ ixbrow, ixcol, ixrow, icoffa, icoffaf, icoffb,
278  $ icoffx, ictxt, icurcol, idum, ii, iixb, iiw,
279  $ ioffxb, ipb, ipr, ipv, iroffa, iroffaf, iroffb,
280  $ iroffx, iw, j, jbrhs, jj, jjfbe, jjxb, jn, jw,
281  $ k, kase, ldxb, lrwmin, lwmin, mycol, myrhs,
282  $ myrow, np, np0, npcol, npmod, nprow, nz
283  REAL EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN
284  COMPLEX ZDUM
285 * ..
286 * .. Local Arrays ..
287  INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 )
288 * ..
289 * .. External Functions ..
290  LOGICAL LSAME
291  INTEGER ICEIL, INDXG2P, NUMROC
292  REAL PSLAMCH
293  EXTERNAL iceil, indxg2p, lsame, numroc, pslamch
294 * ..
295 * .. External Subroutines ..
296  EXTERNAL blacs_gridinfo, cgebr2d, cgebs2d, chk1mat,
297  $ descset, infog2l, pcagemv, pcaxpy, pchk2mat,
298  $ pccopy, pcgemv, pcgetrs, pclacon,
299  $ pxerbla, sgamx2d
300 * ..
301 * .. Intrinsic Functions ..
302  INTRINSIC abs, aimag, cmplx, ichar, max, min, mod, real
303 * ..
304 * .. Statement Functions ..
305  REAL CABS1
306 * ..
307 * .. Statement Function definitions ..
308  cabs1( zdum ) = abs( real( zdum ) ) + abs( aimag( zdum ) )
309 * ..
310 * .. Executable Statements ..
311 *
312 * Get grid parameters
313 *
314  ictxt = desca( ctxt_ )
315  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
316 *
317 * Test the input parameters.
318 *
319  notran = lsame( trans, 'N' )
320 *
321  info = 0
322  IF( nprow.EQ.-1 ) THEN
323  info = -(700+ctxt_)
324  ELSE
325  CALL chk1mat( n, 2, n, 2, ia, ja, desca, 7, info )
326  CALL chk1mat( n, 2, n, 2, iaf, jaf, descaf, 11, info )
327  CALL chk1mat( n, 2, nrhs, 3, ib, jb, descb, 16, info )
328  CALL chk1mat( n, 2, nrhs, 3, ix, jx, descx, 20, info )
329  IF( info.EQ.0 ) THEN
330  iroffa = mod( ia-1, desca( mb_ ) )
331  icoffa = mod( ja-1, desca( nb_ ) )
332  iroffaf = mod( iaf-1, descaf( mb_ ) )
333  icoffaf = mod( jaf-1, descaf( nb_ ) )
334  iroffb = mod( ib-1, descb( mb_ ) )
335  icoffb = mod( jb-1, descb( nb_ ) )
336  iroffx = mod( ix-1, descx( mb_ ) )
337  icoffx = mod( jx-1, descx( nb_ ) )
338  iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
339  $ nprow )
340  iafcol = indxg2p( jaf, descaf( nb_ ), mycol,
341  $ descaf( csrc_ ), npcol )
342  iafrow = indxg2p( iaf, descaf( mb_ ), myrow,
343  $ descaf( rsrc_ ), nprow )
344  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
345  $ npcol )
346  CALL infog2l( ib, jb, descb, nprow, npcol, myrow, mycol,
347  $ iixb, jjxb, ixbrow, ixbcol )
348  ixrow = indxg2p( ix, descx( mb_ ), myrow, descx( rsrc_ ),
349  $ nprow )
350  ixcol = indxg2p( jx, descx( nb_ ), mycol, descx( csrc_ ),
351  $ npcol )
352  npmod = numroc( n+iroffa, desca( mb_ ), myrow, iarow,
353  $ nprow )
354  lwmin = 2 * npmod
355  lrwmin = npmod
356  work( 1 ) = cmplx( real( lwmin ) )
357  rwork( 1 ) = real( lrwmin )
358  lquery = ( lwork.EQ.-1 .OR. lrwork.EQ.-1 )
359 *
360  IF( ( .NOT.notran ) .AND. ( .NOT.lsame( trans, 'T' ) ) .AND.
361  $ ( .NOT.lsame( trans, 'C' ) ) ) THEN
362  info = -1
363  ELSE IF( n.LT.0 ) THEN
364  info = -2
365  ELSE IF( nrhs.LT.0 ) THEN
366  info = -3
367  ELSE IF( iroffa.NE.0 ) THEN
368  info = -5
369  ELSE IF( icoffa.NE.0 ) THEN
370  info = -6
371  ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
372  info = -( 700 + nb_ )
373  ELSE IF( desca( mb_ ).NE.descaf( mb_ ) ) THEN
374  info = -( 1100 + mb_ )
375  ELSE IF( iroffaf.NE.0 .OR. iarow.NE.iafrow ) THEN
376  info = -9
377  ELSE IF( desca( nb_ ).NE.descaf( nb_ ) ) THEN
378  info = -( 1100 + nb_ )
379  ELSE IF( icoffaf.NE.0 .OR. iacol.NE.iafcol ) THEN
380  info = -10
381  ELSE IF( ictxt.NE.descaf( ctxt_ ) ) THEN
382  info = -( 1100 + ctxt_ )
383  ELSE IF( iroffa.NE.iroffb .OR. iarow.NE.ixbrow ) THEN
384  info = -14
385  ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
386  info = -( 1600 + mb_ )
387  ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
388  info = -( 1600 + ctxt_ )
389  ELSE IF( descb( mb_ ).NE.descx( mb_ ) ) THEN
390  info = -( 2000 + mb_ )
391  ELSE IF( iroffx.NE.0 .OR. ixbrow.NE.ixrow ) THEN
392  info = -18
393  ELSE IF( descb( nb_ ).NE.descx( nb_ ) ) THEN
394  info = -( 2000 + nb_ )
395  ELSE IF( icoffb.NE.icoffx .OR. ixbcol.NE.ixcol ) THEN
396  info = -19
397  ELSE IF( ictxt.NE.descx( ctxt_ ) ) THEN
398  info = -( 2000 + ctxt_ )
399  ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
400  info = -24
401  ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
402  info = -26
403  END IF
404  END IF
405 *
406  IF( notran ) THEN
407  idum1( 1 ) = ichar( 'N' )
408  ELSE IF( lsame( trans, 'T' ) ) THEN
409  idum1( 1 ) = ichar( 'T' )
410  ELSE
411  idum1( 1 ) = ichar( 'C' )
412  END IF
413  idum2( 1 ) = 1
414  idum1( 2 ) = n
415  idum2( 2 ) = 2
416  idum1( 3 ) = nrhs
417  idum2( 3 ) = 3
418  IF( lwork.EQ.-1 ) THEN
419  idum1( 4 ) = -1
420  ELSE
421  idum1( 4 ) = 1
422  END IF
423  idum2( 4 ) = 24
424  IF( lrwork.EQ.-1 ) THEN
425  idum1( 5 ) = -1
426  ELSE
427  idum1( 5 ) = 1
428  END IF
429  idum2( 5 ) = 26
430  CALL pchk2mat( n, 2, n, 2, ia, ja, desca, 7, n, 2, n, 2, iaf,
431  $ jaf, descaf, 11, 5, idum1, idum2, info )
432  CALL pchk2mat( n, 2, nrhs, 3, ib, jb, descb, 16, n, 2, nrhs, 3,
433  $ ix, jx, descx, 20, 5, idum1, idum2, info )
434  END IF
435  IF( info.NE.0 ) THEN
436  CALL pxerbla( ictxt, 'PCGERFS', -info )
437  RETURN
438  ELSE IF( lquery ) THEN
439  RETURN
440  END IF
441 *
442  jjfbe = jjxb
443  myrhs = numroc( jb+nrhs-1, descb( nb_ ), mycol, descb( csrc_ ),
444  $ npcol )
445 *
446 * Quick return if possible
447 *
448  IF( n.LE.1 .OR. nrhs.EQ.0 ) THEN
449  DO 10 jj = jjfbe, myrhs
450  ferr( jj ) = zero
451  berr( jj ) = zero
452  10 CONTINUE
453  RETURN
454  END IF
455 *
456  IF( notran ) THEN
457  transn = 'N'
458  transt = 'C'
459  ELSE
460  transn = 'C'
461  transt = 'N'
462  END IF
463 *
464  np0 = numroc( n+iroffb, descb( mb_ ), myrow, ixbrow, nprow )
465  CALL descset( descw, n+iroffb, 1, desca( mb_ ), 1, ixbrow, ixbcol,
466  $ ictxt, max( 1, np0 ) )
467  ipb = 1
468  ipr = 1
469  ipv = ipr + np0
470  IF( myrow.EQ.ixbrow ) THEN
471  iiw = 1 + iroffb
472  np = np0 - iroffb
473  ELSE
474  iiw = 1
475  np = np0
476  END IF
477  iw = 1 + iroffb
478  jw = 1
479  ldxb = descb( lld_ )
480  ioffxb = ( jjxb-1 )*ldxb
481 *
482 * NZ = 1 + maximum number of nonzero entries in each row of sub( A )
483 *
484  nz = n + 1
485  eps = pslamch( ictxt, 'Epsilon' )
486  safmin = pslamch( ictxt, 'Safe minimum' )
487  safe1 = nz*safmin
488  safe2 = safe1 / eps
489  jn = min( iceil( jb, descb( nb_ ) ) * descb( nb_ ), jb+nrhs-1 )
490 *
491 * Handle first block separately
492 *
493  jbrhs = jn - jb + 1
494  DO 100 k = 0, jbrhs-1
495 *
496  count = 1
497  lstres = three
498  20 CONTINUE
499 *
500 * Loop until stopping criterion is satisfied.
501 *
502 * Compute residual R = sub(B) - op(sub(A)) * sub(X),
503 * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
504 * depending on TRANS.
505 *
506  CALL pccopy( n, b, ib, jb+k, descb, 1, work( ipr ), iw, jw,
507  $ descw, 1 )
508  CALL pcgemv( trans, n, n, -one, a, ia, ja, desca, x, ix,
509  $ jx+k, descx, 1, one, work( ipr ), iw, jw,
510  $ descw, 1 )
511 *
512 * Compute componentwise relative backward error from formula
513 *
514 * max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
515 *
516 * where abs(Z) is the componentwise absolute value of the
517 * matrix or vector Z. If the i-th component of the
518 * denominator is less than SAFE2, then SAFE1 is added to the
519 * i-th components of the numerator and denominator before
520 * dividing.
521 *
522  IF( mycol.EQ.ixbcol ) THEN
523  IF( np.GT.0 ) THEN
524  DO 30 ii = iixb, iixb + np - 1
525  rwork( iiw+ii-iixb ) = cabs1( b( ii+ioffxb ) )
526  30 CONTINUE
527  END IF
528  END IF
529 *
530  CALL pcagemv( trans, n, n, rone, a, ia, ja, desca, x, ix, jx+k,
531  $ descx, 1, rone, rwork( ipb ), iw, jw, descw, 1 )
532 *
533  s = zero
534  IF( mycol.EQ.ixbcol ) THEN
535  IF( np.GT.0 ) THEN
536  DO 40 ii = iiw-1, iiw+np-2
537  IF( rwork( ipb+ii ).GT.safe2 ) THEN
538  s = max( s, cabs1( work( ipr+ii ) ) /
539  $ rwork( ipb+ii ) )
540  ELSE
541  s = max( s, ( cabs1( work( ipr+ii ) )+safe1 ) /
542  $ ( rwork( ipb+ii )+safe1 ) )
543  END IF
544  40 CONTINUE
545  END IF
546  END IF
547 *
548  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
549  $ -1, mycol )
550  IF( mycol.EQ.ixbcol )
551  $ berr( jjfbe ) = s
552 *
553 * Test stopping criterion. Continue iterating if
554 * 1) The residual BERR(J+K) is larger than machine epsilon,
555 * and
556 * 2) BERR(J+K) decreased by at least a factor of 2 during the
557 * last iteration, and
558 * 3) At most ITMAX iterations tried.
559 *
560  IF( s.GT.eps .AND. two*s.LE.lstres .AND. count.LE.itmax ) THEN
561 *
562 * Update solution and try again.
563 *
564  CALL pcgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
565  $ work( ipr ), iw, jw, descw, info )
566  CALL pcaxpy( n, one, work( ipr ), iw, jw, descw, 1, x, ix,
567  $ jx+k, descx, 1 )
568  lstres = s
569  count = count + 1
570  GO TO 20
571  END IF
572 *
573 * Bound error from formula
574 *
575 * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
576 * norm( abs(inv(op(sub(A))))*
577 * ( abs(R) + NZ*EPS*(
578 * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
579 *
580 * where
581 * norm(Z) is the magnitude of the largest component of Z
582 * inv(op(sub(A))) is the inverse of op(sub(A))
583 * abs(Z) is the componentwise absolute value of the matrix
584 * or vector Z
585 * NZ is the maximum number of nonzeros in any row of sub(A),
586 * plus 1
587 * EPS is machine epsilon
588 *
589 * The i-th component of
590 * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
591 * is incremented by SAFE1 if the i-th component of
592 * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
593 * SAFE2.
594 *
595 * Use PCLACON to estimate the infinity-norm of the matrix
596 * inv(op(sub(A))) * diag(W), where
597 * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
598 *
599  IF( mycol.EQ.ixbcol ) THEN
600  IF( np.GT.0 ) THEN
601  DO 50 ii = iiw-1, iiw+np-2
602  IF( rwork( ipb+ii ).GT.safe2 ) THEN
603  rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
604  $ nz*eps*rwork( ipb+ii )
605  ELSE
606  rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
607  $ nz*eps*rwork( ipb+ii ) + safe1
608  END IF
609  50 CONTINUE
610  END IF
611  END IF
612 *
613  kase = 0
614  60 CONTINUE
615  IF( mycol.EQ.ixbcol ) THEN
616  CALL cgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
617  $ descw( lld_ ) )
618  ELSE
619  CALL cgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
620  $ descw( lld_ ), myrow, ixbcol )
621  END IF
622  descw( csrc_ ) = mycol
623  CALL pclacon( n, work( ipv ), iw, jw, descw, work( ipr ),
624  $ iw, jw, descw, est, kase )
625  descw( csrc_ ) = ixbcol
626 *
627  IF( kase.NE.0 ) THEN
628  IF( kase.EQ.1 ) THEN
629 *
630 * Multiply by diag(W)*inv(op(sub(A))').
631 *
632  CALL pcgetrs( transt, n, 1, af, iaf, jaf, descaf,
633  $ ipiv, work( ipr ), iw, jw, descw, info )
634 *
635  IF( mycol.EQ.ixbcol ) THEN
636  IF( np.GT.0 ) THEN
637  DO 70 ii = iiw-1, iiw+np-2
638  work( ipr+ii ) = rwork( ipb+ii )*work( ipr+ii )
639  70 CONTINUE
640  END IF
641  END IF
642  ELSE
643 *
644 * Multiply by inv(op(sub(A)))*diag(W).
645 *
646  IF( mycol.EQ.ixbcol ) THEN
647  IF( np.GT.0 ) THEN
648  DO 80 ii = iiw-1, iiw+np-2
649  work( ipr+ii ) = rwork( ipb+ii )*work( ipr+ii )
650  80 CONTINUE
651  END IF
652  END IF
653 *
654  CALL pcgetrs( transn, n, 1, af, iaf, jaf, descaf,
655  $ ipiv, work( ipr ), iw, jw, descw, info )
656  END IF
657  GO TO 60
658  END IF
659 *
660 * Normalize error.
661 *
662  lstres = zero
663  IF( mycol.EQ.ixbcol ) THEN
664  IF( np.GT.0 ) THEN
665  DO 90 ii = iixb, iixb+np-1
666  lstres = max( lstres, cabs1( x( ioffxb+ii ) ) )
667  90 CONTINUE
668  END IF
669  CALL sgamx2d( ictxt, 'Column', ' ', 1, 1, lstres, 1, idum,
670  $ idum, 1, -1, mycol )
671  IF( lstres.NE.zero )
672  $ ferr( jjfbe ) = est / lstres
673 *
674  jjxb = jjxb + 1
675  jjfbe = jjfbe + 1
676  ioffxb = ioffxb + ldxb
677 *
678  END IF
679 *
680  100 CONTINUE
681 *
682  icurcol = mod( ixbcol+1, npcol )
683 *
684 * Do for each right hand side
685 *
686  DO 200 j = jn+1, jb+nrhs-1, descb( nb_ )
687  jbrhs = min( jb+nrhs-j, descb( nb_ ) )
688  descw( csrc_ ) = icurcol
689 *
690  DO 190 k = 0, jbrhs-1
691 *
692  count = 1
693  lstres = three
694  110 CONTINUE
695 *
696 * Loop until stopping criterion is satisfied.
697 *
698 * Compute residual R = sub(B) - op(sub(A)) * sub(X),
699 * where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
700 * depending on TRANS.
701 *
702  CALL pccopy( n, b, ib, j+k, descb, 1, work( ipr ), iw, jw,
703  $ descw, 1 )
704  CALL pcgemv( trans, n, n, -one, a, ia, ja, desca, x,
705  $ ix, j+k, descx, 1, one, work( ipr ), iw, jw,
706  $ descw, 1 )
707 *
708 * Compute componentwise relative backward error from formula
709 *
710 * max(i) (abs(R(i))/(abs(op(sub(A)))*abs(sub(X)) +
711 * abs(sub(B)))(i))
712 *
713 * where abs(Z) is the componentwise absolute value of the
714 * matrix or vector Z. If the i-th component of the
715 * denominator is less than SAFE2, then SAFE1 is added to the
716 * i-th components of the numerator and denominator before
717 * dividing.
718 *
719  IF( mycol.EQ.icurcol ) THEN
720  IF( np.GT.0 ) THEN
721  DO 120 ii = iixb, iixb+np-1
722  rwork( iiw+ii-iixb ) = cabs1( b( ii+ioffxb ) )
723  120 CONTINUE
724  END IF
725  END IF
726 *
727  CALL pcagemv( trans, n, n, rone, a, ia, ja, desca, x, ix,
728  $ j+k, descx, 1, rone, rwork( ipb ), iw, jw,
729  $ descw, 1 )
730 *
731  s = zero
732  IF( mycol.EQ.icurcol ) THEN
733  IF( np.GT.0 )THEN
734  DO 130 ii = iiw-1, iiw+np-2
735  IF( rwork( ipb+ii ).GT.safe2 ) THEN
736  s = max( s, cabs1( work( ipr+ii ) ) /
737  $ rwork( ipb+ii ) )
738  ELSE
739  s = max( s, ( cabs1( work( ipr+ii ) )+safe1 ) /
740  $ ( rwork( ipb+ii )+safe1 ) )
741  END IF
742  130 CONTINUE
743  END IF
744  END IF
745 *
746  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
747  $ -1, mycol )
748  IF( mycol.EQ.icurcol )
749  $ berr( jjfbe ) = s
750 *
751 * Test stopping criterion. Continue iterating if
752 * 1) The residual BERR(J+K) is larger than machine epsilon,
753 * and
754 * 2) BERR(J+K) decreased by at least a factor of 2 during
755 * the last iteration, and
756 * 3) At most ITMAX iterations tried.
757 *
758  IF( s.GT.eps .AND. two*s.LE.lstres .AND.
759  $ count.LE.itmax ) THEN
760 *
761 * Update solution and try again.
762 *
763  CALL pcgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
764  $ work( ipr ), iw, jw, descw, info )
765  CALL pcaxpy( n, one, work( ipr ), iw, jw, descw, 1, x,
766  $ ix, j+k, descx, 1 )
767  lstres = s
768  count = count + 1
769  GO TO 110
770  END IF
771 *
772 * Bound error from formula
773 *
774 * norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
775 * norm( abs(inv(op(sub(A))))*
776 * ( abs(R) + NZ*EPS*(
777 * abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
778 *
779 * where
780 * norm(Z) is the magnitude of the largest component of Z
781 * inv(op(sub(A))) is the inverse of op(sub(A))
782 * abs(Z) is the componentwise absolute value of the matrix
783 * or vector Z
784 * NZ is the maximum number of nonzeros in any row of sub(A),
785 * plus 1
786 * EPS is machine epsilon
787 *
788 * The i-th component of
789 * abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
790 * is incremented by SAFE1 if the i-th component of
791 * abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
792 * SAFE2.
793 *
794 * Use PCLACON to estimate the infinity-norm of the matrix
795 * inv(op(sub(A))) * diag(W), where
796 * W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
797 *
798  IF( mycol.EQ.icurcol ) THEN
799  IF( np.GT.0 ) THEN
800  DO 140 ii = iiw-1, iiw+np-2
801  IF( rwork( ipb+ii ).GT.safe2 ) THEN
802  rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
803  $ nz*eps*rwork( ipb+ii )
804  ELSE
805  rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
806  $ nz*eps*rwork( ipb+ii ) + safe1
807  END IF
808  140 CONTINUE
809  END IF
810  END IF
811 *
812  kase = 0
813  150 CONTINUE
814  IF( mycol.EQ.icurcol ) THEN
815  CALL cgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
816  $ descw( lld_ ) )
817  ELSE
818  CALL cgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
819  $ descw( lld_ ), myrow, icurcol )
820  END IF
821  descw( csrc_ ) = mycol
822  CALL pclacon( n, work( ipv ), iw, jw, descw, work( ipr ),
823  $ iw, jw, descw, est, kase )
824  descw( csrc_ ) = icurcol
825 *
826  IF( kase.NE.0 ) THEN
827  IF( kase.EQ.1 ) THEN
828 *
829 * Multiply by diag(W)*inv(op(sub(A))').
830 *
831  CALL pcgetrs( transt, n, 1, af, iaf, jaf, descaf,
832  $ ipiv, work( ipr ), iw, jw, descw, info )
833 *
834  IF( mycol.EQ.icurcol ) THEN
835  IF( np.GT.0 ) THEN
836  DO 160 ii = iiw-1, iiw+np-2
837  work( ipr+ii ) = rwork( ipb+ii )*
838  $ work( ipr+ii )
839  160 CONTINUE
840  END IF
841  END IF
842  ELSE
843 *
844 * Multiply by inv(op(sub(A)))*diag(W).
845 *
846  IF( mycol.EQ.icurcol ) THEN
847  IF( np.GT.0 ) THEN
848  DO 170 ii = iiw-1, iiw+np-2
849  work( ipr+ii ) = rwork( ipb+ii )*
850  $ work( ipr+ii )
851  170 CONTINUE
852  END IF
853  END IF
854 *
855  CALL pcgetrs( transn, n, 1, af, iaf, jaf, descaf,
856  $ ipiv, work( ipr ), iw, jw, descw,
857  $ info )
858  END IF
859  GO TO 150
860  END IF
861 *
862 * Normalize error.
863 *
864  lstres = zero
865  IF( mycol.EQ.icurcol ) THEN
866  IF( np.GT.0 ) THEN
867  DO 180 ii = iixb, iixb+np-1
868  lstres = max( lstres, cabs1( x( ioffxb+ii ) ) )
869  180 CONTINUE
870  END IF
871  CALL sgamx2d( ictxt, 'Column', ' ', 1, 1, lstres,
872  $ 1, idum, idum, 1, -1, mycol )
873  IF( lstres.NE.zero )
874  $ ferr( jjfbe ) = est / lstres
875 *
876  jjxb = jjxb + 1
877  jjfbe = jjfbe + 1
878  ioffxb = ioffxb + ldxb
879 *
880  END IF
881 *
882  190 CONTINUE
883 *
884  icurcol = mod( icurcol+1, npcol )
885 *
886  200 CONTINUE
887 *
888  work( 1 ) = cmplx( real( lwmin ) )
889  rwork( 1 ) = real( lrwmin )
890 *
891  RETURN
892 *
893 * End of PCGERFS
894 *
895  END
cmplx
float cmplx[2]
Definition: pblas.h:132
max
#define max(A, B)
Definition: pcgemr.c:180
pcgerfs
subroutine pcgerfs(TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF, JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX, JX, DESCX, FERR, BERR, WORK, LWORK, RWORK, LRWORK, INFO)
Definition: pcgerfs.f:5
pclacon
subroutine pclacon(N, V, IV, JV, DESCV, X, IX, JX, DESCX, EST, KASE)
Definition: pclacon.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pcgetrs
subroutine pcgetrs(TRANS, N, NRHS, A, IA, JA, DESCA, IPIV, B, IB, JB, DESCB, INFO)
Definition: pcgetrs.f:3
pchk2mat
subroutine pchk2mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, MB, MBPOS0, NB, NBPOS0, IB, JB, DESCB, DESCBPOS0, NEXTRA, EX, EXPOS, INFO)
Definition: pchkxmat.f:175
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
chk1mat
subroutine chk1mat(MA, MAPOS0, NA, NAPOS0, IA, JA, DESCA, DESCAPOS0, INFO)
Definition: chk1mat.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181