SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pdgerfs.f
Go to the documentation of this file.
1 SUBROUTINE pdgerfs( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF,
2 $ JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX,
3 $ JX, DESCX, FERR, BERR, WORK, LWORK, IWORK,
4 $ LIWORK, INFO )
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* November 15, 1997
10*
11* .. Scalar Arguments ..
12 CHARACTER TRANS
13 INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX,
14 $ LIWORK, LWORK, N, NRHS
15* ..
16* .. Array Arguments ..
17 INTEGER DESCA( * ), DESCAF( * ), DESCB( * ),
18 $ DESCX( * ),IPIV( * ), IWORK( * )
19 DOUBLE PRECISION A( * ), AF( * ), B( * ), BERR( * ), FERR( * ),
20 $ work( * ), x( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PDGERFS improves the computed solution to a system of linear
27* equations and provides error bounds and backward error estimates for
28* the solutions.
29*
30* Notes
31* =====
32*
33* Each global data object is described by an associated description
34* vector. This vector stores the information required to establish
35* the mapping between an object element and its corresponding process
36* and memory location.
37*
38* Let A be a generic term for any 2D block cyclicly distributed array.
39* Such a global array has an associated description vector DESCA.
40* In the following comments, the character _ should be read as
41* "of the global array".
42*
43* NOTATION STORED IN EXPLANATION
44* --------------- -------------- --------------------------------------
45* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46* DTYPE_A = 1.
47* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48* the BLACS process grid A is distribu-
49* ted over. The context itself is glo-
50* bal, but the handle (the integer
51* value) may vary.
52* M_A (global) DESCA( M_ ) The number of rows in the global
53* array A.
54* N_A (global) DESCA( N_ ) The number of columns in the global
55* array A.
56* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57* the rows of the array.
58* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59* the columns of the array.
60* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61* row of the array A is distributed.
62* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63* first column of the array A is
64* distributed.
65* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66* array. LLD_A >= MAX(1,LOCr(M_A)).
67*
68* Let K be the number of rows or columns of a distributed matrix,
69* and assume that its process grid has dimension p x q.
70* LOCr( K ) denotes the number of elements of K that a process
71* would receive if K were distributed over the p processes of its
72* process column.
73* Similarly, LOCc( K ) denotes the number of elements of K that a
74* process would receive if K were distributed over the q processes of
75* its process row.
76* The values of LOCr() and LOCc() may be determined via a call to the
77* ScaLAPACK tool function, NUMROC:
78* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80* An upper bound for these quantities may be computed by:
81* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83*
84* In the following comments, sub( A ), sub( X ) and sub( B ) denote
85* respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and
86* B(IB:IB+N-1,JB:JB+NRHS-1).
87*
88* Arguments
89* =========
90*
91* TRANS (global input) CHARACTER*1
92* Specifies the form of the system of equations.
93* = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose)
94* = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose)
95* = 'C': sub( A )**T * sub( X ) = sub( B )
96* (Conjugate transpose = Transpose)
97*
98*
99* N (global input) INTEGER
100* The order of the matrix sub( A ). N >= 0.
101*
102* NRHS (global input) INTEGER
103* The number of right hand sides, i.e., the number of columns
104* of the matrices sub( B ) and sub( X ). NRHS >= 0.
105*
106* A (local input) DOUBLE PRECISION pointer into the local
107* memory to an array of local dimension (LLD_A,LOCc(JA+N-1)).
108* This array contains the local pieces of the distributed
109* matrix sub( A ).
110*
111* IA (global input) INTEGER
112* The row index in the global array A indicating the first
113* row of sub( A ).
114*
115* JA (global input) INTEGER
116* The column index in the global array A indicating the
117* first column of sub( A ).
118*
119* DESCA (global and local input) INTEGER array of dimension DLEN_.
120* The array descriptor for the distributed matrix A.
121*
122* AF (local input) DOUBLE PRECISION pointer into the local
123* memory to an array of local dimension (LLD_AF,LOCc(JA+N-1)).
124* This array contains the local pieces of the distributed
125* factors of the matrix sub( A ) = P * L * U as computed by
126* PDGETRF.
127*
128* IAF (global input) INTEGER
129* The row index in the global array AF indicating the first
130* row of sub( AF ).
131*
132* JAF (global input) INTEGER
133* The column index in the global array AF indicating the
134* first column of sub( AF ).
135*
136* DESCAF (global and local input) INTEGER array of dimension DLEN_.
137* The array descriptor for the distributed matrix AF.
138*
139* IPIV (local input) INTEGER array of dimension LOCr(M_AF)+MB_AF.
140* This array contains the pivoting information as computed
141* by PDGETRF. IPIV(i) -> The global row local row i
142* was swapped with. This array is tied to the distributed
143* matrix A.
144*
145* B (local input) DOUBLE PRECISION pointer into the local
146* memory to an array of local dimension
147* (LLD_B,LOCc(JB+NRHS-1)). This array contains the local
148* pieces of the distributed matrix of right hand sides
149* sub( B ).
150*
151* IB (global input) INTEGER
152* The row index in the global array B indicating the first
153* row of sub( B ).
154*
155* JB (global input) INTEGER
156* The column index in the global array B indicating the
157* first column of sub( B ).
158*
159* DESCB (global and local input) INTEGER array of dimension DLEN_.
160* The array descriptor for the distributed matrix B.
161*
162* X (local input and output) DOUBLE PRECISION pointer into the
163* local memory to an array of local dimension
164* (LLD_X,LOCc(JX+NRHS-1)). On entry, this array contains
165* the local pieces of the distributed matrix solution
166* sub( X ). On exit, the improved solution vectors.
167*
168* IX (global input) INTEGER
169* The row index in the global array X indicating the first
170* row of sub( X ).
171*
172* JX (global input) INTEGER
173* The column index in the global array X indicating the
174* first column of sub( X ).
175*
176* DESCX (global and local input) INTEGER array of dimension DLEN_.
177* The array descriptor for the distributed matrix X.
178*
179* FERR (local output) DOUBLE PRECISION array of local dimension
180* LOCc(JB+NRHS-1).
181* The estimated forward error bound for each solution vector
182* of sub( X ). If XTRUE is the true solution corresponding
183* to sub( X ), FERR is an estimated upper bound for the
184* magnitude of the largest element in (sub( X ) - XTRUE)
185* divided by the magnitude of the largest element in sub( X ).
186* The estimate is as reliable as the estimate for RCOND, and
187* is almost always a slight overestimate of the true error.
188* This array is tied to the distributed matrix X.
189*
190* BERR (local output) DOUBLE PRECISION array of local dimension
191* LOCc(JB+NRHS-1). The componentwise relative backward
192* error of each solution vector (i.e., the smallest re-
193* lative change in any entry of sub( A ) or sub( B )
194* that makes sub( X ) an exact solution).
195* This array is tied to the distributed matrix X.
196*
197* WORK (local workspace/local output) DOUBLE PRECISION array,
198* dimension (LWORK)
199* On exit, WORK(1) returns the minimal and optimal LWORK.
200*
201* LWORK (local or global input) INTEGER
202* The dimension of the array WORK.
203* LWORK is local input and must be at least
204* LWORK >= 3*LOCr( N + MOD(IA-1,MB_A) )
205*
206* If LWORK = -1, then LWORK is global input and a workspace
207* query is assumed; the routine only calculates the minimum
208* and optimal size for all work arrays. Each of these
209* values is returned in the first entry of the corresponding
210* work array, and no error message is issued by PXERBLA.
211*
212* IWORK (local workspace/local output) INTEGER array,
213* dimension (LIWORK)
214* On exit, IWORK(1) returns the minimal and optimal LIWORK.
215*
216* LIWORK (local or global input) INTEGER
217* The dimension of the array IWORK.
218* LIWORK is local input and must be at least
219* LIWORK >= LOCr( N + MOD(IB-1,MB_B) ).
220*
221* If LIWORK = -1, then LIWORK is global input and a workspace
222* query is assumed; the routine only calculates the minimum
223* and optimal size for all work arrays. Each of these
224* values is returned in the first entry of the corresponding
225* work array, and no error message is issued by PXERBLA.
226*
227*
228* INFO (global output) INTEGER
229* = 0: successful exit
230* < 0: If the i-th argument is an array and the j-entry had
231* an illegal value, then INFO = -(i*100+j), if the i-th
232* argument is a scalar and had an illegal value, then
233* INFO = -i.
234*
235* Internal Parameters
236* ===================
237*
238* ITMAX is the maximum number of steps of iterative refinement.
239*
240* Notes
241* =====
242*
243* This routine temporarily returns when N <= 1.
244*
245* The distributed submatrices op( A ) and op( AF ) (respectively
246* sub( X ) and sub( B ) ) should be distributed the same way on the
247* same processes. These conditions ensure that sub( A ) and sub( AF )
248* (resp. sub( X ) and sub( B ) ) are "perfectly" aligned.
249*
250* Moreover, this routine requires the distributed submatrices sub( A ),
251* sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary,
252* i.e., if f(x,y) = MOD( x-1, y ):
253* f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0,
254* f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0,
255* f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and
256* f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0.
257*
258* =====================================================================
259*
260* .. Parameters ..
261 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
262 $ LLD_, MB_, M_, NB_, N_, RSRC_
263 PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
264 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
265 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
266 INTEGER ITMAX
267 PARAMETER ( ITMAX = 5 )
268 double precision zero, one
269 parameter( zero = 0.0d+0, one = 1.0d+0 )
270 DOUBLE PRECISION TWO, THREE
271 parameter( two = 2.0d+0, three = 3.0d+0 )
272* ..
273* .. Local Scalars ..
274 LOGICAL LQUERY, NOTRAN
275 CHARACTER TRANST
276 INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL,
277 $ ixbrow, ixcol, ixrow, icoffa, icoffaf, icoffb,
278 $ icoffx, ictxt, icurcol, idum, ii, iixb, iiw,
279 $ ioffxb, ipb, ipr, ipv, iroffa, iroffaf, iroffb,
280 $ iroffx, iw, j, jbrhs, jj, jjfbe, jjxb, jn, jw,
281 $ k, kase, ldxb, liwmin, lwmin, mycol, myrhs,
282 $ myrow, np, np0, npcol, npmod, nprow, nz
283 DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN
284* ..
285* .. Local Arrays ..
286 INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 )
287* ..
288* .. External Functions ..
289 LOGICAL LSAME
290 INTEGER ICEIL, INDXG2P, NUMROC
291 DOUBLE PRECISION PDLAMCH
292 EXTERNAL iceil, indxg2p, lsame, numroc, pdlamch
293* ..
294* .. External Subroutines ..
295 EXTERNAL blacs_gridinfo, chk1mat, descset, dgamx2d,
296 $ dgebr2d, dgebs2d, infog2l, pchk2mat,
297 $ pdagemv, pdaxpy, pdcopy, pdgemv,
299* ..
300* .. Intrinsic Functions ..
301 INTRINSIC abs, dble, ichar, max, min, mod
302* ..
303* .. Executable Statements ..
304*
305* Get grid parameters
306*
307 ictxt = desca( ctxt_ )
308 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
309*
310* Test the input parameters.
311*
312 notran = lsame( trans, 'N' )
313*
314 info = 0
315 IF( nprow.EQ.-1 ) THEN
316 info = -(700+ctxt_)
317 ELSE
318 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 7, info )
319 CALL chk1mat( n, 2, n, 2, iaf, jaf, descaf, 11, info )
320 CALL chk1mat( n, 2, nrhs, 3, ib, jb, descb, 16, info )
321 CALL chk1mat( n, 2, nrhs, 3, ix, jx, descx, 20, info )
322 IF( info.EQ.0 ) THEN
323 iroffa = mod( ia-1, desca( mb_ ) )
324 icoffa = mod( ja-1, desca( nb_ ) )
325 iroffaf = mod( iaf-1, descaf( mb_ ) )
326 icoffaf = mod( jaf-1, descaf( nb_ ) )
327 iroffb = mod( ib-1, descb( mb_ ) )
328 icoffb = mod( jb-1, descb( nb_ ) )
329 iroffx = mod( ix-1, descx( mb_ ) )
330 icoffx = mod( jx-1, descx( nb_ ) )
331 iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
332 $ nprow )
333 iafcol = indxg2p( jaf, descaf( nb_ ), mycol,
334 $ descaf( csrc_ ), npcol )
335 iafrow = indxg2p( iaf, descaf( mb_ ), myrow,
336 $ descaf( rsrc_ ), nprow )
337 iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
338 $ npcol )
339 CALL infog2l( ib, jb, descb, nprow, npcol, myrow, mycol,
340 $ iixb, jjxb, ixbrow, ixbcol )
341 ixrow = indxg2p( ix, descx( mb_ ), myrow, descx( rsrc_ ),
342 $ nprow )
343 ixcol = indxg2p( jx, descx( nb_ ), mycol, descx( csrc_ ),
344 $ npcol )
345 npmod = numroc( n+iroffa, desca( mb_ ), myrow, iarow,
346 $ nprow )
347 lwmin = 3 * npmod
348 liwmin = npmod
349 work( 1 ) = dble( lwmin )
350 iwork( 1 ) = liwmin
351 lquery = ( lwork.EQ.-1 .OR. liwork.EQ.-1 )
352*
353 IF( ( .NOT.notran ) .AND. ( .NOT.lsame( trans, 'T' ) ) .AND.
354 $ ( .NOT.lsame( trans, 'C' ) ) ) THEN
355 info = -1
356 ELSE IF( n.LT.0 ) THEN
357 info = -2
358 ELSE IF( nrhs.LT.0 ) THEN
359 info = -3
360 ELSE IF( iroffa.NE.0 ) THEN
361 info = -5
362 ELSE IF( icoffa.NE.0 ) THEN
363 info = -6
364 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
365 info = -( 700 + nb_ )
366 ELSE IF( desca( mb_ ).NE.descaf( mb_ ) ) THEN
367 info = -( 1100 + mb_ )
368 ELSE IF( iroffaf.NE.0 .OR. iarow.NE.iafrow ) THEN
369 info = -9
370 ELSE IF( desca( nb_ ).NE.descaf( nb_ ) ) THEN
371 info = -( 1100 + nb_ )
372 ELSE IF( icoffaf.NE.0 .OR. iacol.NE.iafcol ) THEN
373 info = -10
374 ELSE IF( ictxt.NE.descaf( ctxt_ ) ) THEN
375 info = -( 1100 + ctxt_ )
376 ELSE IF( iroffa.NE.iroffb .OR. iarow.NE.ixbrow ) THEN
377 info = -14
378 ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
379 info = -( 1600 + mb_ )
380 ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
381 info = -( 1600 + ctxt_ )
382 ELSE IF( descb( mb_ ).NE.descx( mb_ ) ) THEN
383 info = -( 2000 + mb_ )
384 ELSE IF( iroffx.NE.0 .OR. ixbrow.NE.ixrow ) THEN
385 info = -18
386 ELSE IF( descb( nb_ ).NE.descx( nb_ ) ) THEN
387 info = -( 2000 + nb_ )
388 ELSE IF( icoffb.NE.icoffx .OR. ixbcol.NE.ixcol ) THEN
389 info = -19
390 ELSE IF( ictxt.NE.descx( ctxt_ ) ) THEN
391 info = -( 2000 + ctxt_ )
392 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
393 info = -24
394 ELSE IF( liwork.LT.liwmin .AND. .NOT.lquery ) THEN
395 info = -26
396 END IF
397 END IF
398*
399 IF( notran ) THEN
400 idum1( 1 ) = ichar( 'N' )
401 ELSE IF( lsame( trans, 'T' ) ) THEN
402 idum1( 1 ) = ichar( 'T' )
403 ELSE
404 idum1( 1 ) = ichar( 'C' )
405 END IF
406 idum2( 1 ) = 1
407 idum1( 2 ) = n
408 idum2( 2 ) = 2
409 idum1( 3 ) = nrhs
410 idum2( 3 ) = 3
411 IF( lwork.EQ.-1 ) THEN
412 idum1( 4 ) = -1
413 ELSE
414 idum1( 4 ) = 1
415 END IF
416 idum2( 4 ) = 24
417 IF( liwork.EQ.-1 ) THEN
418 idum1( 5 ) = -1
419 ELSE
420 idum1( 5 ) = 1
421 END IF
422 idum2( 5 ) = 26
423 CALL pchk2mat( n, 2, n, 2, ia, ja, desca, 7, n, 2, n, 2, iaf,
424 $ jaf, descaf, 11, 5, idum1, idum2, info )
425 CALL pchk2mat( n, 2, nrhs, 3, ib, jb, descb, 16, n, 2, nrhs, 3,
426 $ ix, jx, descx, 20, 5, idum1, idum2, info )
427 END IF
428 IF( info.NE.0 ) THEN
429 CALL pxerbla( ictxt, 'PDGERFS', -info )
430 RETURN
431 ELSE IF( lquery ) THEN
432 RETURN
433 END IF
434*
435 jjfbe = jjxb
436 myrhs = numroc( jb+nrhs-1, descb( nb_ ), mycol, descb( csrc_ ),
437 $ npcol )
438*
439* Quick return if possible
440*
441 IF( n.LE.1 .OR. nrhs.EQ.0 ) THEN
442 DO 10 jj = jjfbe, myrhs
443 ferr( jj ) = zero
444 berr( jj ) = zero
445 10 CONTINUE
446 RETURN
447 END IF
448*
449 IF( notran ) THEN
450 transt = 'T'
451 ELSE
452 transt = 'N'
453 END IF
454*
455 np0 = numroc( n+iroffb, descb( mb_ ), myrow, ixbrow, nprow )
456 CALL descset( descw, n+iroffb, 1, desca( mb_ ), 1, ixbrow, ixbcol,
457 $ ictxt, max( 1, np0 ) )
458 ipb = 1
459 ipr = ipb + np0
460 ipv = ipr + np0
461 IF( myrow.EQ.ixbrow ) THEN
462 iiw = 1 + iroffb
463 np = np0 - iroffb
464 ELSE
465 iiw = 1
466 np = np0
467 END IF
468 iw = 1 + iroffb
469 jw = 1
470 ldxb = descb( lld_ )
471 ioffxb = ( jjxb-1 )*ldxb
472*
473* NZ = 1 + maximum number of nonzero entries in each row of sub( A )
474*
475 nz = n + 1
476 eps = pdlamch( ictxt, 'Epsilon' )
477 safmin = pdlamch( ictxt, 'Safe minimum' )
478 safe1 = nz*safmin
479 safe2 = safe1 / eps
480 jn = min( iceil( jb, descb( nb_ ) ) * descb( nb_ ), jb+nrhs-1 )
481*
482* Handle first block separately
483*
484 jbrhs = jn - jb + 1
485 DO 100 k = 0, jbrhs-1
486*
487 count = 1
488 lstres = three
489 20 CONTINUE
490*
491* Loop until stopping criterion is satisfied.
492*
493* Compute residual R = sub(B) - op(sub(A)) * sub(X),
494* where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
495* depending on TRANS.
496*
497 CALL pdcopy( n, b, ib, jb+k, descb, 1, work( ipr ), iw, jw,
498 $ descw, 1 )
499 CALL pdgemv( trans, n, n, -one, a, ia, ja, desca, x, ix,
500 $ jx+k, descx, 1, one, work( ipr ), iw, jw,
501 $ descw, 1 )
502*
503* Compute componentwise relative backward error from formula
504*
505* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
506*
507* where abs(Z) is the componentwise absolute value of the
508* matrix or vector Z. If the i-th component of the
509* denominator is less than SAFE2, then SAFE1 is added to the
510* i-th components of the numerator and denominator before
511* dividing.
512*
513 IF( mycol.EQ.ixbcol ) THEN
514 IF( np.GT.0 ) THEN
515 DO 30 ii = iixb, iixb + np - 1
516 work( iiw+ii-iixb ) = abs( b( ii+ioffxb ) )
517 30 CONTINUE
518 END IF
519 END IF
520*
521 CALL pdagemv( trans, n, n, one, a, ia, ja, desca, x, ix, jx+k,
522 $ descx, 1, one, work( ipb ), iw, jw, descw, 1 )
523*
524 s = zero
525 IF( mycol.EQ.ixbcol ) THEN
526 IF( np.GT.0 ) THEN
527 DO 40 ii = iiw-1, iiw+np-2
528 IF( work( ipb+ii ).GT.safe2 ) THEN
529 s = max( s, abs( work( ipr+ii ) ) /
530 $ work( ipb+ii ) )
531 ELSE
532 s = max( s, ( abs( work( ipr+ii ) )+safe1 ) /
533 $ ( work( ipb+ii )+safe1 ) )
534 END IF
535 40 CONTINUE
536 END IF
537 END IF
538*
539 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
540 $ -1, mycol )
541 IF( mycol.EQ.ixbcol )
542 $ berr( jjfbe ) = s
543*
544* Test stopping criterion. Continue iterating if
545* 1) The residual BERR(J+K) is larger than machine epsilon,
546* and
547* 2) BERR(J+K) decreased by at least a factor of 2 during the
548* last iteration, and
549* 3) At most ITMAX iterations tried.
550*
551 IF( s.GT.eps .AND. two*s.LE.lstres .AND. count.LE.itmax ) THEN
552*
553* Update solution and try again.
554*
555 CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
556 $ work( ipr ), iw, jw, descw, info )
557 CALL pdaxpy( n, one, work( ipr ), iw, jw, descw, 1, x, ix,
558 $ jx+k, descx, 1 )
559 lstres = s
560 count = count + 1
561 GO TO 20
562 END IF
563*
564* Bound error from formula
565*
566* norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
567* norm( abs(inv(op(sub(A))))*
568* ( abs(R) + NZ*EPS*(
569* abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
570*
571* where
572* norm(Z) is the magnitude of the largest component of Z
573* inv(op(sub(A))) is the inverse of op(sub(A))
574* abs(Z) is the componentwise absolute value of the matrix
575* or vector Z
576* NZ is the maximum number of nonzeros in any row of sub(A),
577* plus 1
578* EPS is machine epsilon
579*
580* The i-th component of
581* abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
582* is incremented by SAFE1 if the i-th component of
583* abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
584* SAFE2.
585*
586* Use PDLACON to estimate the infinity-norm of the matrix
587* inv(op(sub(A))) * diag(W), where
588* W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
589*
590 IF( mycol.EQ.ixbcol ) THEN
591 IF( np.GT.0 ) THEN
592 DO 50 ii = iiw-1, iiw+np-2
593 IF( work( ipb+ii ).GT.safe2 ) THEN
594 work( ipb+ii ) = abs( work( ipr+ii ) ) +
595 $ nz*eps*work( ipb+ii )
596 ELSE
597 work( ipb+ii ) = abs( work( ipr+ii ) ) +
598 $ nz*eps*work( ipb+ii ) + safe1
599 END IF
600 50 CONTINUE
601 END IF
602 END IF
603*
604 kase = 0
605 60 CONTINUE
606 IF( mycol.EQ.ixbcol ) THEN
607 CALL dgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
608 $ descw( lld_ ) )
609 ELSE
610 CALL dgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
611 $ descw( lld_ ), myrow, ixbcol )
612 END IF
613 descw( csrc_ ) = mycol
614 CALL pdlacon( n, work( ipv ), iw, jw, descw, work( ipr ),
615 $ iw, jw, descw, iwork, est, kase )
616 descw( csrc_ ) = ixbcol
617*
618 IF( kase.NE.0 ) THEN
619 IF( kase.EQ.1 ) THEN
620*
621* Multiply by diag(W)*inv(op(sub(A))').
622*
623 CALL pdgetrs( transt, n, 1, af, iaf, jaf, descaf,
624 $ ipiv, work( ipr ), iw, jw, descw, info )
625*
626 IF( mycol.EQ.ixbcol ) THEN
627 IF( np.GT.0 ) THEN
628 DO 70 ii = iiw-1, iiw+np-2
629 work( ipr+ii ) = work( ipb+ii )*work( ipr+ii )
630 70 CONTINUE
631 END IF
632 END IF
633 ELSE
634*
635* Multiply by inv(op(sub(A)))*diag(W).
636*
637 IF( mycol.EQ.ixbcol ) THEN
638 IF( np.GT.0 ) THEN
639 DO 80 ii = iiw-1, iiw+np-2
640 work( ipr+ii ) = work( ipb+ii )*work( ipr+ii )
641 80 CONTINUE
642 END IF
643 END IF
644*
645 CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
646 $ work( ipr ), iw, jw, descw, info )
647 END IF
648 GO TO 60
649 END IF
650*
651* Normalize error.
652*
653 lstres = zero
654 IF( mycol.EQ.ixbcol ) THEN
655 IF( np.GT.0 ) THEN
656 DO 90 ii = iixb, iixb+np-1
657 lstres = max( lstres, abs( x( ioffxb+ii ) ) )
658 90 CONTINUE
659 END IF
660 CALL dgamx2d( ictxt, 'Column', ' ', 1, 1, lstres, 1, idum,
661 $ idum, 1, -1, mycol )
662 IF( lstres.NE.zero )
663 $ ferr( jjfbe ) = est / lstres
664*
665 jjxb = jjxb + 1
666 jjfbe = jjfbe + 1
667 ioffxb = ioffxb + ldxb
668*
669 END IF
670*
671 100 CONTINUE
672*
673 icurcol = mod( ixbcol+1, npcol )
674*
675* Do for each right hand side
676*
677 DO 200 j = jn+1, jb+nrhs-1, descb( nb_ )
678 jbrhs = min( jb+nrhs-j, descb( nb_ ) )
679 descw( csrc_ ) = icurcol
680*
681 DO 190 k = 0, jbrhs-1
682*
683 count = 1
684 lstres = three
685 110 CONTINUE
686*
687* Loop until stopping criterion is satisfied.
688*
689* Compute residual R = sub(B) - op(sub(A)) * sub(X),
690* where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
691* depending on TRANS.
692*
693 CALL pdcopy( n, b, ib, j+k, descb, 1, work( ipr ), iw, jw,
694 $ descw, 1 )
695 CALL pdgemv( trans, n, n, -one, a, ia, ja, desca, x,
696 $ ix, j+k, descx, 1, one, work( ipr ), iw, jw,
697 $ descw, 1 )
698*
699* Compute componentwise relative backward error from formula
700*
701* max(i) (abs(R(i))/(abs(op(sub(A)))*abs(sub(X)) +
702* abs(sub(B)))(i))
703*
704* where abs(Z) is the componentwise absolute value of the
705* matrix or vector Z. If the i-th component of the
706* denominator is less than SAFE2, then SAFE1 is added to the
707* i-th components of the numerator and denominator before
708* dividing.
709*
710 IF( mycol.EQ.icurcol ) THEN
711 IF( np.GT.0 ) THEN
712 DO 120 ii = iixb, iixb+np-1
713 work( iiw+ii-iixb ) = abs( b( ii+ioffxb ) )
714 120 CONTINUE
715 END IF
716 END IF
717*
718 CALL pdagemv( trans, n, n, one, a, ia, ja, desca, x, ix,
719 $ j+k, descx, 1, one, work( ipb ), iw, jw,
720 $ descw, 1 )
721*
722 s = zero
723 IF( mycol.EQ.icurcol ) THEN
724 IF( np.GT.0 )THEN
725 DO 130 ii = iiw-1, iiw+np-2
726 IF( work( ipb+ii ).GT.safe2 ) THEN
727 s = max( s, abs( work( ipr+ii ) ) /
728 $ work( ipb+ii ) )
729 ELSE
730 s = max( s, ( abs( work( ipr+ii ) )+safe1 ) /
731 $ ( work( ipb+ii )+safe1 ) )
732 END IF
733 130 CONTINUE
734 END IF
735 END IF
736*
737 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
738 $ -1, mycol )
739 IF( mycol.EQ.icurcol )
740 $ berr( jjfbe ) = s
741*
742* Test stopping criterion. Continue iterating if
743* 1) The residual BERR(J+K) is larger than machine epsilon,
744* and
745* 2) BERR(J+K) decreased by at least a factor of 2 during
746* the last iteration, and
747* 3) At most ITMAX iterations tried.
748*
749 IF( s.GT.eps .AND. two*s.LE.lstres .AND.
750 $ count.LE.itmax ) THEN
751*
752* Update solution and try again.
753*
754 CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
755 $ work( ipr ), iw, jw, descw, info )
756 CALL pdaxpy( n, one, work( ipr ), iw, jw, descw, 1, x,
757 $ ix, j+k, descx, 1 )
758 lstres = s
759 count = count + 1
760 GO TO 110
761 END IF
762*
763* Bound error from formula
764*
765* norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
766* norm( abs(inv(op(sub(A))))*
767* ( abs(R) + NZ*EPS*(
768* abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
769*
770* where
771* norm(Z) is the magnitude of the largest component of Z
772* inv(op(sub(A))) is the inverse of op(sub(A))
773* abs(Z) is the componentwise absolute value of the matrix
774* or vector Z
775* NZ is the maximum number of nonzeros in any row of sub(A),
776* plus 1
777* EPS is machine epsilon
778*
779* The i-th component of
780* abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
781* is incremented by SAFE1 if the i-th component of
782* abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
783* SAFE2.
784*
785* Use PDLACON to estimate the infinity-norm of the matrix
786* inv(op(sub(A))) * diag(W), where
787* W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
788*
789 IF( mycol.EQ.icurcol ) THEN
790 IF( np.GT.0 ) THEN
791 DO 140 ii = iiw-1, iiw+np-2
792 IF( work( ipb+ii ).GT.safe2 ) THEN
793 work( ipb+ii ) = abs( work( ipr+ii ) ) +
794 $ nz*eps*work( ipb+ii )
795 ELSE
796 work( ipb+ii ) = abs( work( ipr+ii ) ) +
797 $ nz*eps*work( ipb+ii ) + safe1
798 END IF
799 140 CONTINUE
800 END IF
801 END IF
802*
803 kase = 0
804 150 CONTINUE
805 IF( mycol.EQ.icurcol ) THEN
806 CALL dgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
807 $ descw( lld_ ) )
808 ELSE
809 CALL dgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
810 $ descw( lld_ ), myrow, icurcol )
811 END IF
812 descw( csrc_ ) = mycol
813 CALL pdlacon( n, work( ipv ), iw, jw, descw, work( ipr ),
814 $ iw, jw, descw, iwork, est, kase )
815 descw( csrc_ ) = icurcol
816*
817 IF( kase.NE.0 ) THEN
818 IF( kase.EQ.1 ) THEN
819*
820* Multiply by diag(W)*inv(op(sub(A))').
821*
822 CALL pdgetrs( transt, n, 1, af, iaf, jaf, descaf,
823 $ ipiv, work( ipr ), iw, jw, descw, info )
824*
825 IF( mycol.EQ.icurcol ) THEN
826 IF( np.GT.0 ) THEN
827 DO 160 ii = iiw-1, iiw+np-2
828 work( ipr+ii ) = work( ipb+ii )*
829 $ work( ipr+ii )
830 160 CONTINUE
831 END IF
832 END IF
833 ELSE
834*
835* Multiply by inv(op(sub(A)))*diag(W).
836*
837 IF( mycol.EQ.icurcol ) THEN
838 IF( np.GT.0 ) THEN
839 DO 170 ii = iiw-1, iiw+np-2
840 work( ipr+ii ) = work( ipb+ii )*
841 $ work( ipr+ii )
842 170 CONTINUE
843 END IF
844 END IF
845*
846 CALL pdgetrs( trans, n, 1, af, iaf, jaf, descaf,
847 $ ipiv, work( ipr ), iw, jw, descw,
848 $ info )
849 END IF
850 GO TO 150
851 END IF
852*
853* Normalize error.
854*
855 lstres = zero
856 IF( mycol.EQ.icurcol ) THEN
857 IF( np.GT.0 ) THEN
858 DO 180 ii = iixb, iixb+np-1
859 lstres = max( lstres, abs( x( ioffxb+ii ) ) )
860 180 CONTINUE
861 END IF
862 CALL dgamx2d( ictxt, 'Column', ' ', 1, 1, lstres,
863 $ 1, idum, idum, 1, -1, mycol )
864 IF( lstres.NE.zero )
865 $ ferr( jjfbe ) = est / lstres
866*
867 jjxb = jjxb + 1
868 jjfbe = jjfbe + 1
869 ioffxb = ioffxb + ldxb
870*
871 END IF
872*
873 190 CONTINUE
874*
875 icurcol = mod( icurcol+1, npcol )
876*
877 200 CONTINUE
878*
879 work( 1 ) = dble( lwmin )
880 iwork( 1 ) = liwmin
881*
882 RETURN
883*
884* End of PDGERFS
885*
886 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
subroutine pdgerfs(trans, n, nrhs, a, ia, ja, desca, af, iaf, jaf, descaf, ipiv, b, ib, jb, descb, x, ix, jx, descx, ferr, berr, work, lwork, iwork, liwork, info)
Definition pdgerfs.f:5
subroutine pdgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition pdgetrs.f:3
subroutine pdlacon(n, v, iv, jv, descv, x, ix, jx, descx, isgn, est, kase)
Definition pdlacon.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2