SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pcgerfs.f
Go to the documentation of this file.
1 SUBROUTINE pcgerfs( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF,
2 $ JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX,
3 $ JX, DESCX, FERR, BERR, WORK, LWORK, RWORK,
4 $ LRWORK, INFO )
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* November 15, 1997
10*
11* .. Scalar Arguments ..
12 CHARACTER TRANS
13 INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX,
14 $ LRWORK, LWORK, N, NRHS
15* ..
16* .. Array Arguments ..
17 INTEGER DESCA( * ), DESCAF( * ), DESCB( * ),
18 $ DESCX( * ), IPIV( * )
19 REAL BERR( * ), FERR( * ), RWORK( * )
20 COMPLEX A( * ), AF( * ), B( * ), WORK( * ), X( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PCGERFS improves the computed solution to a system of linear
27* equations and provides error bounds and backward error estimates for
28* the solutions.
29*
30* Notes
31* =====
32*
33* Each global data object is described by an associated description
34* vector. This vector stores the information required to establish
35* the mapping between an object element and its corresponding process
36* and memory location.
37*
38* Let A be a generic term for any 2D block cyclicly distributed array.
39* Such a global array has an associated description vector DESCA.
40* In the following comments, the character _ should be read as
41* "of the global array".
42*
43* NOTATION STORED IN EXPLANATION
44* --------------- -------------- --------------------------------------
45* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46* DTYPE_A = 1.
47* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48* the BLACS process grid A is distribu-
49* ted over. The context itself is glo-
50* bal, but the handle (the integer
51* value) may vary.
52* M_A (global) DESCA( M_ ) The number of rows in the global
53* array A.
54* N_A (global) DESCA( N_ ) The number of columns in the global
55* array A.
56* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57* the rows of the array.
58* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59* the columns of the array.
60* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61* row of the array A is distributed.
62* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63* first column of the array A is
64* distributed.
65* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66* array. LLD_A >= MAX(1,LOCr(M_A)).
67*
68* Let K be the number of rows or columns of a distributed matrix,
69* and assume that its process grid has dimension p x q.
70* LOCr( K ) denotes the number of elements of K that a process
71* would receive if K were distributed over the p processes of its
72* process column.
73* Similarly, LOCc( K ) denotes the number of elements of K that a
74* process would receive if K were distributed over the q processes of
75* its process row.
76* The values of LOCr() and LOCc() may be determined via a call to the
77* ScaLAPACK tool function, NUMROC:
78* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80* An upper bound for these quantities may be computed by:
81* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83*
84* In the following comments, sub( A ), sub( X ) and sub( B ) denote
85* respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and
86* B(IB:IB+N-1,JB:JB+NRHS-1).
87*
88* Arguments
89* =========
90*
91* TRANS (global input) CHARACTER*1
92* Specifies the form of the system of equations.
93* = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose)
94* = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose)
95* = 'C': sub( A )**H * sub( X ) = sub( B )
96* (Conjugate transpose)
97*
98* N (global input) INTEGER
99* The order of the matrix sub( A ). N >= 0.
100*
101* NRHS (global input) INTEGER
102* The number of right hand sides, i.e., the number of columns
103* of the matrices sub( B ) and sub( X ). NRHS >= 0.
104*
105* A (local input) COMPLEX pointer into the local
106* memory to an array of local dimension (LLD_A,LOCc(JA+N-1)).
107* This array contains the local pieces of the distributed
108* matrix sub( A ).
109*
110* IA (global input) INTEGER
111* The row index in the global array A indicating the first
112* row of sub( A ).
113*
114* JA (global input) INTEGER
115* The column index in the global array A indicating the
116* first column of sub( A ).
117*
118* DESCA (global and local input) INTEGER array of dimension DLEN_.
119* The array descriptor for the distributed matrix A.
120*
121* AF (local input) COMPLEX pointer into the local
122* memory to an array of local dimension (LLD_AF,LOCc(JA+N-1)).
123* This array contains the local pieces of the distributed
124* factors of the matrix sub( A ) = P * L * U as computed by
125* PCGETRF.
126*
127* IAF (global input) INTEGER
128* The row index in the global array AF indicating the first
129* row of sub( AF ).
130*
131* JAF (global input) INTEGER
132* The column index in the global array AF indicating the
133* first column of sub( AF ).
134*
135* DESCAF (global and local input) INTEGER array of dimension DLEN_.
136* The array descriptor for the distributed matrix AF.
137*
138* IPIV (local input) INTEGER array of dimension LOCr(M_AF)+MB_AF.
139* This array contains the pivoting information as computed
140* by PCGETRF. IPIV(i) -> The global row local row i
141* was swapped with. This array is tied to the distributed
142* matrix A.
143*
144* B (local input) COMPLEX pointer into the local
145* memory to an array of local dimension
146* (LLD_B,LOCc(JB+NRHS-1)). This array contains the local
147* pieces of the distributed matrix of right hand sides
148* sub( B ).
149*
150* IB (global input) INTEGER
151* The row index in the global array B indicating the first
152* row of sub( B ).
153*
154* JB (global input) INTEGER
155* The column index in the global array B indicating the
156* first column of sub( B ).
157*
158* DESCB (global and local input) INTEGER array of dimension DLEN_.
159* The array descriptor for the distributed matrix B.
160*
161* X (local input and output) COMPLEX pointer into the
162* local memory to an array of local dimension
163* (LLD_X,LOCc(JX+NRHS-1)). On entry, this array contains
164* the local pieces of the distributed matrix solution
165* sub( X ). On exit, the improved solution vectors.
166*
167* IX (global input) INTEGER
168* The row index in the global array X indicating the first
169* row of sub( X ).
170*
171* JX (global input) INTEGER
172* The column index in the global array X indicating the
173* first column of sub( X ).
174*
175* DESCX (global and local input) INTEGER array of dimension DLEN_.
176* The array descriptor for the distributed matrix X.
177*
178* FERR (local output) REAL array of local dimension
179* LOCc(JB+NRHS-1).
180* The estimated forward error bound for each solution vector
181* of sub( X ). If XTRUE is the true solution corresponding
182* to sub( X ), FERR is an estimated upper bound for the
183* magnitude of the largest element in (sub( X ) - XTRUE)
184* divided by the magnitude of the largest element in sub( X ).
185* The estimate is as reliable as the estimate for RCOND, and
186* is almost always a slight overestimate of the true error.
187* This array is tied to the distributed matrix X.
188*
189* BERR (local output) REAL array of local dimension
190* LOCc(JB+NRHS-1). The componentwise relative backward
191* error of each solution vector (i.e., the smallest re-
192* lative change in any entry of sub( A ) or sub( B )
193* that makes sub( X ) an exact solution).
194* This array is tied to the distributed matrix X.
195*
196* WORK (local workspace/local output) COMPLEX array,
197* dimension (LWORK)
198* On exit, WORK(1) returns the minimal and optimal LWORK.
199*
200* LWORK (local or global input) INTEGER
201* The dimension of the array WORK.
202* LWORK is local input and must be at least
203* LWORK >= 2*LOCr( N + MOD(IA-1,MB_A) )
204*
205* If LWORK = -1, then LWORK is global input and a workspace
206* query is assumed; the routine only calculates the minimum
207* and optimal size for all work arrays. Each of these
208* values is returned in the first entry of the corresponding
209* work array, and no error message is issued by PXERBLA.
210*
211* RWORK (local workspace/local output) REAL array,
212* dimension (LRWORK)
213* On exit, RWORK(1) returns the minimal and optimal LRWORK.
214*
215* LRWORK (local or global input) INTEGER
216* The dimension of the array RWORK.
217* LRWORK is local input and must be at least
218* LRWORK >= LOCr( N + MOD(IB-1,MB_B) ).
219*
220* If LRWORK = -1, then LRWORK is global input and a workspace
221* query is assumed; the routine only calculates the minimum
222* and optimal size for all work arrays. Each of these
223* values is returned in the first entry of the corresponding
224* work array, and no error message is issued by PXERBLA.
225*
226*
227* INFO (global output) INTEGER
228* = 0: successful exit
229* < 0: If the i-th argument is an array and the j-entry had
230* an illegal value, then INFO = -(i*100+j), if the i-th
231* argument is a scalar and had an illegal value, then
232* INFO = -i.
233*
234* Internal Parameters
235* ===================
236*
237* ITMAX is the maximum number of steps of iterative refinement.
238*
239* Notes
240* =====
241*
242* This routine temporarily returns when N <= 1.
243*
244* The distributed submatrices op( A ) and op( AF ) (respectively
245* sub( X ) and sub( B ) ) should be distributed the same way on the
246* same processes. These conditions ensure that sub( A ) and sub( AF )
247* (resp. sub( X ) and sub( B ) ) are "perfectly" aligned.
248*
249* Moreover, this routine requires the distributed submatrices sub( A ),
250* sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary,
251* i.e., if f(x,y) = MOD( x-1, y ):
252* f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0,
253* f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0,
254* f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and
255* f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0.
256*
257* =====================================================================
258*
259* .. Parameters ..
260 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
261 $ LLD_, MB_, M_, NB_, N_, RSRC_
262 PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
263 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
264 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
265 INTEGER ITMAX
266 PARAMETER ( ITMAX = 5 )
267 real zero, rone, two, three
268 parameter( zero = 0.0e+0, rone = 1.0e+0, two = 2.0e+0,
269 $ three = 3.0e+0 )
270 COMPLEX ONE
271 parameter( one = ( 1.0e+0, 0.0e+0 ) )
272* ..
273* .. Local Scalars ..
274 LOGICAL LQUERY, NOTRAN
275 CHARACTER TRANSN, TRANST
276 INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL,
277 $ ixbrow, ixcol, ixrow, icoffa, icoffaf, icoffb,
278 $ icoffx, ictxt, icurcol, idum, ii, iixb, iiw,
279 $ ioffxb, ipb, ipr, ipv, iroffa, iroffaf, iroffb,
280 $ iroffx, iw, j, jbrhs, jj, jjfbe, jjxb, jn, jw,
281 $ k, kase, ldxb, lrwmin, lwmin, mycol, myrhs,
282 $ myrow, np, np0, npcol, npmod, nprow, nz
283 REAL EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN
284 COMPLEX ZDUM
285* ..
286* .. Local Arrays ..
287 INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 )
288* ..
289* .. External Functions ..
290 LOGICAL LSAME
291 INTEGER ICEIL, INDXG2P, NUMROC
292 REAL PSLAMCH
293 EXTERNAL iceil, indxg2p, lsame, numroc, pslamch
294* ..
295* .. External Subroutines ..
296 EXTERNAL blacs_gridinfo, cgebr2d, cgebs2d, chk1mat,
297 $ descset, infog2l, pcagemv, pcaxpy, pchk2mat,
298 $ pccopy, pcgemv, pcgetrs, pclacon,
299 $ pxerbla, sgamx2d
300* ..
301* .. Intrinsic Functions ..
302 INTRINSIC abs, aimag, cmplx, ichar, max, min, mod, real
303* ..
304* .. Statement Functions ..
305 REAL CABS1
306* ..
307* .. Statement Function definitions ..
308 cabs1( zdum ) = abs( real( zdum ) ) + abs( aimag( zdum ) )
309* ..
310* .. Executable Statements ..
311*
312* Get grid parameters
313*
314 ictxt = desca( ctxt_ )
315 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
316*
317* Test the input parameters.
318*
319 notran = lsame( trans, 'N' )
320*
321 info = 0
322 IF( nprow.EQ.-1 ) THEN
323 info = -(700+ctxt_)
324 ELSE
325 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 7, info )
326 CALL chk1mat( n, 2, n, 2, iaf, jaf, descaf, 11, info )
327 CALL chk1mat( n, 2, nrhs, 3, ib, jb, descb, 16, info )
328 CALL chk1mat( n, 2, nrhs, 3, ix, jx, descx, 20, info )
329 IF( info.EQ.0 ) THEN
330 iroffa = mod( ia-1, desca( mb_ ) )
331 icoffa = mod( ja-1, desca( nb_ ) )
332 iroffaf = mod( iaf-1, descaf( mb_ ) )
333 icoffaf = mod( jaf-1, descaf( nb_ ) )
334 iroffb = mod( ib-1, descb( mb_ ) )
335 icoffb = mod( jb-1, descb( nb_ ) )
336 iroffx = mod( ix-1, descx( mb_ ) )
337 icoffx = mod( jx-1, descx( nb_ ) )
338 iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
339 $ nprow )
340 iafcol = indxg2p( jaf, descaf( nb_ ), mycol,
341 $ descaf( csrc_ ), npcol )
342 iafrow = indxg2p( iaf, descaf( mb_ ), myrow,
343 $ descaf( rsrc_ ), nprow )
344 iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
345 $ npcol )
346 CALL infog2l( ib, jb, descb, nprow, npcol, myrow, mycol,
347 $ iixb, jjxb, ixbrow, ixbcol )
348 ixrow = indxg2p( ix, descx( mb_ ), myrow, descx( rsrc_ ),
349 $ nprow )
350 ixcol = indxg2p( jx, descx( nb_ ), mycol, descx( csrc_ ),
351 $ npcol )
352 npmod = numroc( n+iroffa, desca( mb_ ), myrow, iarow,
353 $ nprow )
354 lwmin = 2 * npmod
355 lrwmin = npmod
356 work( 1 ) = cmplx( real( lwmin ) )
357 rwork( 1 ) = real( lrwmin )
358 lquery = ( lwork.EQ.-1 .OR. lrwork.EQ.-1 )
359*
360 IF( ( .NOT.notran ) .AND. ( .NOT.lsame( trans, 'T' ) ) .AND.
361 $ ( .NOT.lsame( trans, 'C' ) ) ) THEN
362 info = -1
363 ELSE IF( n.LT.0 ) THEN
364 info = -2
365 ELSE IF( nrhs.LT.0 ) THEN
366 info = -3
367 ELSE IF( iroffa.NE.0 ) THEN
368 info = -5
369 ELSE IF( icoffa.NE.0 ) THEN
370 info = -6
371 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
372 info = -( 700 + nb_ )
373 ELSE IF( desca( mb_ ).NE.descaf( mb_ ) ) THEN
374 info = -( 1100 + mb_ )
375 ELSE IF( iroffaf.NE.0 .OR. iarow.NE.iafrow ) THEN
376 info = -9
377 ELSE IF( desca( nb_ ).NE.descaf( nb_ ) ) THEN
378 info = -( 1100 + nb_ )
379 ELSE IF( icoffaf.NE.0 .OR. iacol.NE.iafcol ) THEN
380 info = -10
381 ELSE IF( ictxt.NE.descaf( ctxt_ ) ) THEN
382 info = -( 1100 + ctxt_ )
383 ELSE IF( iroffa.NE.iroffb .OR. iarow.NE.ixbrow ) THEN
384 info = -14
385 ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
386 info = -( 1600 + mb_ )
387 ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
388 info = -( 1600 + ctxt_ )
389 ELSE IF( descb( mb_ ).NE.descx( mb_ ) ) THEN
390 info = -( 2000 + mb_ )
391 ELSE IF( iroffx.NE.0 .OR. ixbrow.NE.ixrow ) THEN
392 info = -18
393 ELSE IF( descb( nb_ ).NE.descx( nb_ ) ) THEN
394 info = -( 2000 + nb_ )
395 ELSE IF( icoffb.NE.icoffx .OR. ixbcol.NE.ixcol ) THEN
396 info = -19
397 ELSE IF( ictxt.NE.descx( ctxt_ ) ) THEN
398 info = -( 2000 + ctxt_ )
399 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
400 info = -24
401 ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
402 info = -26
403 END IF
404 END IF
405*
406 IF( notran ) THEN
407 idum1( 1 ) = ichar( 'N' )
408 ELSE IF( lsame( trans, 'T' ) ) THEN
409 idum1( 1 ) = ichar( 'T' )
410 ELSE
411 idum1( 1 ) = ichar( 'C' )
412 END IF
413 idum2( 1 ) = 1
414 idum1( 2 ) = n
415 idum2( 2 ) = 2
416 idum1( 3 ) = nrhs
417 idum2( 3 ) = 3
418 IF( lwork.EQ.-1 ) THEN
419 idum1( 4 ) = -1
420 ELSE
421 idum1( 4 ) = 1
422 END IF
423 idum2( 4 ) = 24
424 IF( lrwork.EQ.-1 ) THEN
425 idum1( 5 ) = -1
426 ELSE
427 idum1( 5 ) = 1
428 END IF
429 idum2( 5 ) = 26
430 CALL pchk2mat( n, 2, n, 2, ia, ja, desca, 7, n, 2, n, 2, iaf,
431 $ jaf, descaf, 11, 5, idum1, idum2, info )
432 CALL pchk2mat( n, 2, nrhs, 3, ib, jb, descb, 16, n, 2, nrhs, 3,
433 $ ix, jx, descx, 20, 5, idum1, idum2, info )
434 END IF
435 IF( info.NE.0 ) THEN
436 CALL pxerbla( ictxt, 'PCGERFS', -info )
437 RETURN
438 ELSE IF( lquery ) THEN
439 RETURN
440 END IF
441*
442 jjfbe = jjxb
443 myrhs = numroc( jb+nrhs-1, descb( nb_ ), mycol, descb( csrc_ ),
444 $ npcol )
445*
446* Quick return if possible
447*
448 IF( n.LE.1 .OR. nrhs.EQ.0 ) THEN
449 DO 10 jj = jjfbe, myrhs
450 ferr( jj ) = zero
451 berr( jj ) = zero
452 10 CONTINUE
453 RETURN
454 END IF
455*
456 IF( notran ) THEN
457 transn = 'N'
458 transt = 'C'
459 ELSE
460 transn = 'C'
461 transt = 'N'
462 END IF
463*
464 np0 = numroc( n+iroffb, descb( mb_ ), myrow, ixbrow, nprow )
465 CALL descset( descw, n+iroffb, 1, desca( mb_ ), 1, ixbrow, ixbcol,
466 $ ictxt, max( 1, np0 ) )
467 ipb = 1
468 ipr = 1
469 ipv = ipr + np0
470 IF( myrow.EQ.ixbrow ) THEN
471 iiw = 1 + iroffb
472 np = np0 - iroffb
473 ELSE
474 iiw = 1
475 np = np0
476 END IF
477 iw = 1 + iroffb
478 jw = 1
479 ldxb = descb( lld_ )
480 ioffxb = ( jjxb-1 )*ldxb
481*
482* NZ = 1 + maximum number of nonzero entries in each row of sub( A )
483*
484 nz = n + 1
485 eps = pslamch( ictxt, 'Epsilon' )
486 safmin = pslamch( ictxt, 'Safe minimum' )
487 safe1 = nz*safmin
488 safe2 = safe1 / eps
489 jn = min( iceil( jb, descb( nb_ ) ) * descb( nb_ ), jb+nrhs-1 )
490*
491* Handle first block separately
492*
493 jbrhs = jn - jb + 1
494 DO 100 k = 0, jbrhs-1
495*
496 count = 1
497 lstres = three
498 20 CONTINUE
499*
500* Loop until stopping criterion is satisfied.
501*
502* Compute residual R = sub(B) - op(sub(A)) * sub(X),
503* where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
504* depending on TRANS.
505*
506 CALL pccopy( n, b, ib, jb+k, descb, 1, work( ipr ), iw, jw,
507 $ descw, 1 )
508 CALL pcgemv( trans, n, n, -one, a, ia, ja, desca, x, ix,
509 $ jx+k, descx, 1, one, work( ipr ), iw, jw,
510 $ descw, 1 )
511*
512* Compute componentwise relative backward error from formula
513*
514* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
515*
516* where abs(Z) is the componentwise absolute value of the
517* matrix or vector Z. If the i-th component of the
518* denominator is less than SAFE2, then SAFE1 is added to the
519* i-th components of the numerator and denominator before
520* dividing.
521*
522 IF( mycol.EQ.ixbcol ) THEN
523 IF( np.GT.0 ) THEN
524 DO 30 ii = iixb, iixb + np - 1
525 rwork( iiw+ii-iixb ) = cabs1( b( ii+ioffxb ) )
526 30 CONTINUE
527 END IF
528 END IF
529*
530 CALL pcagemv( trans, n, n, rone, a, ia, ja, desca, x, ix, jx+k,
531 $ descx, 1, rone, rwork( ipb ), iw, jw, descw, 1 )
532*
533 s = zero
534 IF( mycol.EQ.ixbcol ) THEN
535 IF( np.GT.0 ) THEN
536 DO 40 ii = iiw-1, iiw+np-2
537 IF( rwork( ipb+ii ).GT.safe2 ) THEN
538 s = max( s, cabs1( work( ipr+ii ) ) /
539 $ rwork( ipb+ii ) )
540 ELSE
541 s = max( s, ( cabs1( work( ipr+ii ) )+safe1 ) /
542 $ ( rwork( ipb+ii )+safe1 ) )
543 END IF
544 40 CONTINUE
545 END IF
546 END IF
547*
548 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
549 $ -1, mycol )
550 IF( mycol.EQ.ixbcol )
551 $ berr( jjfbe ) = s
552*
553* Test stopping criterion. Continue iterating if
554* 1) The residual BERR(J+K) is larger than machine epsilon,
555* and
556* 2) BERR(J+K) decreased by at least a factor of 2 during the
557* last iteration, and
558* 3) At most ITMAX iterations tried.
559*
560 IF( s.GT.eps .AND. two*s.LE.lstres .AND. count.LE.itmax ) THEN
561*
562* Update solution and try again.
563*
564 CALL pcgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
565 $ work( ipr ), iw, jw, descw, info )
566 CALL pcaxpy( n, one, work( ipr ), iw, jw, descw, 1, x, ix,
567 $ jx+k, descx, 1 )
568 lstres = s
569 count = count + 1
570 GO TO 20
571 END IF
572*
573* Bound error from formula
574*
575* norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
576* norm( abs(inv(op(sub(A))))*
577* ( abs(R) + NZ*EPS*(
578* abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
579*
580* where
581* norm(Z) is the magnitude of the largest component of Z
582* inv(op(sub(A))) is the inverse of op(sub(A))
583* abs(Z) is the componentwise absolute value of the matrix
584* or vector Z
585* NZ is the maximum number of nonzeros in any row of sub(A),
586* plus 1
587* EPS is machine epsilon
588*
589* The i-th component of
590* abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
591* is incremented by SAFE1 if the i-th component of
592* abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
593* SAFE2.
594*
595* Use PCLACON to estimate the infinity-norm of the matrix
596* inv(op(sub(A))) * diag(W), where
597* W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
598*
599 IF( mycol.EQ.ixbcol ) THEN
600 IF( np.GT.0 ) THEN
601 DO 50 ii = iiw-1, iiw+np-2
602 IF( rwork( ipb+ii ).GT.safe2 ) THEN
603 rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
604 $ nz*eps*rwork( ipb+ii )
605 ELSE
606 rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
607 $ nz*eps*rwork( ipb+ii ) + safe1
608 END IF
609 50 CONTINUE
610 END IF
611 END IF
612*
613 kase = 0
614 60 CONTINUE
615 IF( mycol.EQ.ixbcol ) THEN
616 CALL cgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
617 $ descw( lld_ ) )
618 ELSE
619 CALL cgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
620 $ descw( lld_ ), myrow, ixbcol )
621 END IF
622 descw( csrc_ ) = mycol
623 CALL pclacon( n, work( ipv ), iw, jw, descw, work( ipr ),
624 $ iw, jw, descw, est, kase )
625 descw( csrc_ ) = ixbcol
626*
627 IF( kase.NE.0 ) THEN
628 IF( kase.EQ.1 ) THEN
629*
630* Multiply by diag(W)*inv(op(sub(A))').
631*
632 CALL pcgetrs( transt, n, 1, af, iaf, jaf, descaf,
633 $ ipiv, work( ipr ), iw, jw, descw, info )
634*
635 IF( mycol.EQ.ixbcol ) THEN
636 IF( np.GT.0 ) THEN
637 DO 70 ii = iiw-1, iiw+np-2
638 work( ipr+ii ) = rwork( ipb+ii )*work( ipr+ii )
639 70 CONTINUE
640 END IF
641 END IF
642 ELSE
643*
644* Multiply by inv(op(sub(A)))*diag(W).
645*
646 IF( mycol.EQ.ixbcol ) THEN
647 IF( np.GT.0 ) THEN
648 DO 80 ii = iiw-1, iiw+np-2
649 work( ipr+ii ) = rwork( ipb+ii )*work( ipr+ii )
650 80 CONTINUE
651 END IF
652 END IF
653*
654 CALL pcgetrs( transn, n, 1, af, iaf, jaf, descaf,
655 $ ipiv, work( ipr ), iw, jw, descw, info )
656 END IF
657 GO TO 60
658 END IF
659*
660* Normalize error.
661*
662 lstres = zero
663 IF( mycol.EQ.ixbcol ) THEN
664 IF( np.GT.0 ) THEN
665 DO 90 ii = iixb, iixb+np-1
666 lstres = max( lstres, cabs1( x( ioffxb+ii ) ) )
667 90 CONTINUE
668 END IF
669 CALL sgamx2d( ictxt, 'Column', ' ', 1, 1, lstres, 1, idum,
670 $ idum, 1, -1, mycol )
671 IF( lstres.NE.zero )
672 $ ferr( jjfbe ) = est / lstres
673*
674 jjxb = jjxb + 1
675 jjfbe = jjfbe + 1
676 ioffxb = ioffxb + ldxb
677*
678 END IF
679*
680 100 CONTINUE
681*
682 icurcol = mod( ixbcol+1, npcol )
683*
684* Do for each right hand side
685*
686 DO 200 j = jn+1, jb+nrhs-1, descb( nb_ )
687 jbrhs = min( jb+nrhs-j, descb( nb_ ) )
688 descw( csrc_ ) = icurcol
689*
690 DO 190 k = 0, jbrhs-1
691*
692 count = 1
693 lstres = three
694 110 CONTINUE
695*
696* Loop until stopping criterion is satisfied.
697*
698* Compute residual R = sub(B) - op(sub(A)) * sub(X),
699* where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
700* depending on TRANS.
701*
702 CALL pccopy( n, b, ib, j+k, descb, 1, work( ipr ), iw, jw,
703 $ descw, 1 )
704 CALL pcgemv( trans, n, n, -one, a, ia, ja, desca, x,
705 $ ix, j+k, descx, 1, one, work( ipr ), iw, jw,
706 $ descw, 1 )
707*
708* Compute componentwise relative backward error from formula
709*
710* max(i) (abs(R(i))/(abs(op(sub(A)))*abs(sub(X)) +
711* abs(sub(B)))(i))
712*
713* where abs(Z) is the componentwise absolute value of the
714* matrix or vector Z. If the i-th component of the
715* denominator is less than SAFE2, then SAFE1 is added to the
716* i-th components of the numerator and denominator before
717* dividing.
718*
719 IF( mycol.EQ.icurcol ) THEN
720 IF( np.GT.0 ) THEN
721 DO 120 ii = iixb, iixb+np-1
722 rwork( iiw+ii-iixb ) = cabs1( b( ii+ioffxb ) )
723 120 CONTINUE
724 END IF
725 END IF
726*
727 CALL pcagemv( trans, n, n, rone, a, ia, ja, desca, x, ix,
728 $ j+k, descx, 1, rone, rwork( ipb ), iw, jw,
729 $ descw, 1 )
730*
731 s = zero
732 IF( mycol.EQ.icurcol ) THEN
733 IF( np.GT.0 )THEN
734 DO 130 ii = iiw-1, iiw+np-2
735 IF( rwork( ipb+ii ).GT.safe2 ) THEN
736 s = max( s, cabs1( work( ipr+ii ) ) /
737 $ rwork( ipb+ii ) )
738 ELSE
739 s = max( s, ( cabs1( work( ipr+ii ) )+safe1 ) /
740 $ ( rwork( ipb+ii )+safe1 ) )
741 END IF
742 130 CONTINUE
743 END IF
744 END IF
745*
746 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
747 $ -1, mycol )
748 IF( mycol.EQ.icurcol )
749 $ berr( jjfbe ) = s
750*
751* Test stopping criterion. Continue iterating if
752* 1) The residual BERR(J+K) is larger than machine epsilon,
753* and
754* 2) BERR(J+K) decreased by at least a factor of 2 during
755* the last iteration, and
756* 3) At most ITMAX iterations tried.
757*
758 IF( s.GT.eps .AND. two*s.LE.lstres .AND.
759 $ count.LE.itmax ) THEN
760*
761* Update solution and try again.
762*
763 CALL pcgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
764 $ work( ipr ), iw, jw, descw, info )
765 CALL pcaxpy( n, one, work( ipr ), iw, jw, descw, 1, x,
766 $ ix, j+k, descx, 1 )
767 lstres = s
768 count = count + 1
769 GO TO 110
770 END IF
771*
772* Bound error from formula
773*
774* norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
775* norm( abs(inv(op(sub(A))))*
776* ( abs(R) + NZ*EPS*(
777* abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
778*
779* where
780* norm(Z) is the magnitude of the largest component of Z
781* inv(op(sub(A))) is the inverse of op(sub(A))
782* abs(Z) is the componentwise absolute value of the matrix
783* or vector Z
784* NZ is the maximum number of nonzeros in any row of sub(A),
785* plus 1
786* EPS is machine epsilon
787*
788* The i-th component of
789* abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
790* is incremented by SAFE1 if the i-th component of
791* abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
792* SAFE2.
793*
794* Use PCLACON to estimate the infinity-norm of the matrix
795* inv(op(sub(A))) * diag(W), where
796* W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
797*
798 IF( mycol.EQ.icurcol ) THEN
799 IF( np.GT.0 ) THEN
800 DO 140 ii = iiw-1, iiw+np-2
801 IF( rwork( ipb+ii ).GT.safe2 ) THEN
802 rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
803 $ nz*eps*rwork( ipb+ii )
804 ELSE
805 rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
806 $ nz*eps*rwork( ipb+ii ) + safe1
807 END IF
808 140 CONTINUE
809 END IF
810 END IF
811*
812 kase = 0
813 150 CONTINUE
814 IF( mycol.EQ.icurcol ) THEN
815 CALL cgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
816 $ descw( lld_ ) )
817 ELSE
818 CALL cgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
819 $ descw( lld_ ), myrow, icurcol )
820 END IF
821 descw( csrc_ ) = mycol
822 CALL pclacon( n, work( ipv ), iw, jw, descw, work( ipr ),
823 $ iw, jw, descw, est, kase )
824 descw( csrc_ ) = icurcol
825*
826 IF( kase.NE.0 ) THEN
827 IF( kase.EQ.1 ) THEN
828*
829* Multiply by diag(W)*inv(op(sub(A))').
830*
831 CALL pcgetrs( transt, n, 1, af, iaf, jaf, descaf,
832 $ ipiv, work( ipr ), iw, jw, descw, info )
833*
834 IF( mycol.EQ.icurcol ) THEN
835 IF( np.GT.0 ) THEN
836 DO 160 ii = iiw-1, iiw+np-2
837 work( ipr+ii ) = rwork( ipb+ii )*
838 $ work( ipr+ii )
839 160 CONTINUE
840 END IF
841 END IF
842 ELSE
843*
844* Multiply by inv(op(sub(A)))*diag(W).
845*
846 IF( mycol.EQ.icurcol ) THEN
847 IF( np.GT.0 ) THEN
848 DO 170 ii = iiw-1, iiw+np-2
849 work( ipr+ii ) = rwork( ipb+ii )*
850 $ work( ipr+ii )
851 170 CONTINUE
852 END IF
853 END IF
854*
855 CALL pcgetrs( transn, n, 1, af, iaf, jaf, descaf,
856 $ ipiv, work( ipr ), iw, jw, descw,
857 $ info )
858 END IF
859 GO TO 150
860 END IF
861*
862* Normalize error.
863*
864 lstres = zero
865 IF( mycol.EQ.icurcol ) THEN
866 IF( np.GT.0 ) THEN
867 DO 180 ii = iixb, iixb+np-1
868 lstres = max( lstres, cabs1( x( ioffxb+ii ) ) )
869 180 CONTINUE
870 END IF
871 CALL sgamx2d( ictxt, 'Column', ' ', 1, 1, lstres,
872 $ 1, idum, idum, 1, -1, mycol )
873 IF( lstres.NE.zero )
874 $ ferr( jjfbe ) = est / lstres
875*
876 jjxb = jjxb + 1
877 jjfbe = jjfbe + 1
878 ioffxb = ioffxb + ldxb
879*
880 END IF
881*
882 190 CONTINUE
883*
884 icurcol = mod( icurcol+1, npcol )
885*
886 200 CONTINUE
887*
888 work( 1 ) = cmplx( real( lwmin ) )
889 rwork( 1 ) = real( lrwmin )
890*
891 RETURN
892*
893* End of PCGERFS
894*
895 END
float cmplx[2]
Definition pblas.h:136
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pcgerfs(trans, n, nrhs, a, ia, ja, desca, af, iaf, jaf, descaf, ipiv, b, ib, jb, descb, x, ix, jx, descx, ferr, berr, work, lwork, rwork, lrwork, info)
Definition pcgerfs.f:5
subroutine pcgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition pcgetrs.f:3
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
subroutine pclacon(n, v, iv, jv, descv, x, ix, jx, descx, est, kase)
Definition pclacon.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2