SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pzgerfs.f
Go to the documentation of this file.
1 SUBROUTINE pzgerfs( TRANS, N, NRHS, A, IA, JA, DESCA, AF, IAF,
2 $ JAF, DESCAF, IPIV, B, IB, JB, DESCB, X, IX,
3 $ JX, DESCX, FERR, BERR, WORK, LWORK, RWORK,
4 $ LRWORK, INFO )
5*
6* -- ScaLAPACK routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* November 15, 1997
10*
11* .. Scalar Arguments ..
12 CHARACTER TRANS
13 INTEGER IA, IAF, IB, IX, INFO, JA, JAF, JB, JX,
14 $ LRWORK, LWORK, N, NRHS
15* ..
16* .. Array Arguments ..
17 INTEGER DESCA( * ), DESCAF( * ), DESCB( * ),
18 $ DESCX( * ), IPIV( * )
19 DOUBLE PRECISION BERR( * ), FERR( * ), RWORK( * )
20 COMPLEX*16 A( * ), AF( * ), B( * ), WORK( * ), X( * )
21* ..
22*
23* Purpose
24* =======
25*
26* PZGERFS improves the computed solution to a system of linear
27* equations and provides error bounds and backward error estimates for
28* the solutions.
29*
30* Notes
31* =====
32*
33* Each global data object is described by an associated description
34* vector. This vector stores the information required to establish
35* the mapping between an object element and its corresponding process
36* and memory location.
37*
38* Let A be a generic term for any 2D block cyclicly distributed array.
39* Such a global array has an associated description vector DESCA.
40* In the following comments, the character _ should be read as
41* "of the global array".
42*
43* NOTATION STORED IN EXPLANATION
44* --------------- -------------- --------------------------------------
45* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
46* DTYPE_A = 1.
47* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
48* the BLACS process grid A is distribu-
49* ted over. The context itself is glo-
50* bal, but the handle (the integer
51* value) may vary.
52* M_A (global) DESCA( M_ ) The number of rows in the global
53* array A.
54* N_A (global) DESCA( N_ ) The number of columns in the global
55* array A.
56* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
57* the rows of the array.
58* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
59* the columns of the array.
60* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
61* row of the array A is distributed.
62* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
63* first column of the array A is
64* distributed.
65* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
66* array. LLD_A >= MAX(1,LOCr(M_A)).
67*
68* Let K be the number of rows or columns of a distributed matrix,
69* and assume that its process grid has dimension p x q.
70* LOCr( K ) denotes the number of elements of K that a process
71* would receive if K were distributed over the p processes of its
72* process column.
73* Similarly, LOCc( K ) denotes the number of elements of K that a
74* process would receive if K were distributed over the q processes of
75* its process row.
76* The values of LOCr() and LOCc() may be determined via a call to the
77* ScaLAPACK tool function, NUMROC:
78* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
79* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
80* An upper bound for these quantities may be computed by:
81* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
82* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
83*
84* In the following comments, sub( A ), sub( X ) and sub( B ) denote
85* respectively A(IA:IA+N-1,JA:JA+N-1), X(IX:IX+N-1,JX:JX+NRHS-1) and
86* B(IB:IB+N-1,JB:JB+NRHS-1).
87*
88* Arguments
89* =========
90*
91* TRANS (global input) CHARACTER*1
92* Specifies the form of the system of equations.
93* = 'N': sub( A ) * sub( X ) = sub( B ) (No transpose)
94* = 'T': sub( A )**T * sub( X ) = sub( B ) (Transpose)
95* = 'C': sub( A )**H * sub( X ) = sub( B )
96* (Conjugate transpose)
97*
98* N (global input) INTEGER
99* The order of the matrix sub( A ). N >= 0.
100*
101* NRHS (global input) INTEGER
102* The number of right hand sides, i.e., the number of columns
103* of the matrices sub( B ) and sub( X ). NRHS >= 0.
104*
105* A (local input) COMPLEX*16 pointer into the local
106* memory to an array of local dimension (LLD_A,LOCc(JA+N-1)).
107* This array contains the local pieces of the distributed
108* matrix sub( A ).
109*
110* IA (global input) INTEGER
111* The row index in the global array A indicating the first
112* row of sub( A ).
113*
114* JA (global input) INTEGER
115* The column index in the global array A indicating the
116* first column of sub( A ).
117*
118* DESCA (global and local input) INTEGER array of dimension DLEN_.
119* The array descriptor for the distributed matrix A.
120*
121* AF (local input) COMPLEX*16 pointer into the local
122* memory to an array of local dimension (LLD_AF,LOCc(JA+N-1)).
123* This array contains the local pieces of the distributed
124* factors of the matrix sub( A ) = P * L * U as computed by
125* PZGETRF.
126*
127* IAF (global input) INTEGER
128* The row index in the global array AF indicating the first
129* row of sub( AF ).
130*
131* JAF (global input) INTEGER
132* The column index in the global array AF indicating the
133* first column of sub( AF ).
134*
135* DESCAF (global and local input) INTEGER array of dimension DLEN_.
136* The array descriptor for the distributed matrix AF.
137*
138* IPIV (local input) INTEGER array of dimension LOCr(M_AF)+MB_AF.
139* This array contains the pivoting information as computed
140* by PZGETRF. IPIV(i) -> The global row local row i
141* was swapped with. This array is tied to the distributed
142* matrix A.
143*
144* B (local input) COMPLEX*16 pointer into the local
145* memory to an array of local dimension
146* (LLD_B,LOCc(JB+NRHS-1)). This array contains the local
147* pieces of the distributed matrix of right hand sides
148* sub( B ).
149*
150* IB (global input) INTEGER
151* The row index in the global array B indicating the first
152* row of sub( B ).
153*
154* JB (global input) INTEGER
155* The column index in the global array B indicating the
156* first column of sub( B ).
157*
158* DESCB (global and local input) INTEGER array of dimension DLEN_.
159* The array descriptor for the distributed matrix B.
160*
161* X (local input and output) COMPLEX*16 pointer into the
162* local memory to an array of local dimension
163* (LLD_X,LOCc(JX+NRHS-1)). On entry, this array contains
164* the local pieces of the distributed matrix solution
165* sub( X ). On exit, the improved solution vectors.
166*
167* IX (global input) INTEGER
168* The row index in the global array X indicating the first
169* row of sub( X ).
170*
171* JX (global input) INTEGER
172* The column index in the global array X indicating the
173* first column of sub( X ).
174*
175* DESCX (global and local input) INTEGER array of dimension DLEN_.
176* The array descriptor for the distributed matrix X.
177*
178* FERR (local output) DOUBLE PRECISION array of local dimension
179* LOCc(JB+NRHS-1).
180* The estimated forward error bound for each solution vector
181* of sub( X ). If XTRUE is the true solution corresponding
182* to sub( X ), FERR is an estimated upper bound for the
183* magnitude of the largest element in (sub( X ) - XTRUE)
184* divided by the magnitude of the largest element in sub( X ).
185* The estimate is as reliable as the estimate for RCOND, and
186* is almost always a slight overestimate of the true error.
187* This array is tied to the distributed matrix X.
188*
189* BERR (local output) DOUBLE PRECISION array of local dimension
190* LOCc(JB+NRHS-1). The componentwise relative backward
191* error of each solution vector (i.e., the smallest re-
192* lative change in any entry of sub( A ) or sub( B )
193* that makes sub( X ) an exact solution).
194* This array is tied to the distributed matrix X.
195*
196* WORK (local workspace/local output) COMPLEX*16 array,
197* dimension (LWORK)
198* On exit, WORK(1) returns the minimal and optimal LWORK.
199*
200* LWORK (local or global input) INTEGER
201* The dimension of the array WORK.
202* LWORK is local input and must be at least
203* LWORK >= 2*LOCr( N + MOD(IA-1,MB_A) )
204*
205* If LWORK = -1, then LWORK is global input and a workspace
206* query is assumed; the routine only calculates the minimum
207* and optimal size for all work arrays. Each of these
208* values is returned in the first entry of the corresponding
209* work array, and no error message is issued by PXERBLA.
210*
211* RWORK (local workspace/local output) DOUBLE PRECISION array,
212* dimension (LRWORK)
213* On exit, RWORK(1) returns the minimal and optimal LRWORK.
214*
215* LRWORK (local or global input) INTEGER
216* The dimension of the array RWORK.
217* LRWORK is local input and must be at least
218* LRWORK >= LOCr( N + MOD(IB-1,MB_B) ).
219*
220* If LRWORK = -1, then LRWORK is global input and a workspace
221* query is assumed; the routine only calculates the minimum
222* and optimal size for all work arrays. Each of these
223* values is returned in the first entry of the corresponding
224* work array, and no error message is issued by PXERBLA.
225*
226*
227* INFO (global output) INTEGER
228* = 0: successful exit
229* < 0: If the i-th argument is an array and the j-entry had
230* an illegal value, then INFO = -(i*100+j), if the i-th
231* argument is a scalar and had an illegal value, then
232* INFO = -i.
233*
234* Internal Parameters
235* ===================
236*
237* ITMAX is the maximum number of steps of iterative refinement.
238*
239* Notes
240* =====
241*
242* This routine temporarily returns when N <= 1.
243*
244* The distributed submatrices op( A ) and op( AF ) (respectively
245* sub( X ) and sub( B ) ) should be distributed the same way on the
246* same processes. These conditions ensure that sub( A ) and sub( AF )
247* (resp. sub( X ) and sub( B ) ) are "perfectly" aligned.
248*
249* Moreover, this routine requires the distributed submatrices sub( A ),
250* sub( AF ), sub( X ), and sub( B ) to be aligned on a block boundary,
251* i.e., if f(x,y) = MOD( x-1, y ):
252* f( IA, DESCA( MB_ ) ) = f( JA, DESCA( NB_ ) ) = 0,
253* f( IAF, DESCAF( MB_ ) ) = f( JAF, DESCAF( NB_ ) ) = 0,
254* f( IB, DESCB( MB_ ) ) = f( JB, DESCB( NB_ ) ) = 0, and
255* f( IX, DESCX( MB_ ) ) = f( JX, DESCX( NB_ ) ) = 0.
256*
257* =====================================================================
258*
259* .. Parameters ..
260 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
261 $ LLD_, MB_, M_, NB_, N_, RSRC_
262 PARAMETER ( BLOCK_CYCLIC_2D = 1, dlen_ = 9, dtype_ = 1,
263 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
264 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
265 INTEGER ITMAX
266 PARAMETER ( ITMAX = 5 )
267 double precision zero, rone, two, three
268 parameter( zero = 0.0d+0, rone = 1.0d+0, two = 2.0d+0,
269 $ three = 3.0d+0 )
270 COMPLEX*16 ONE
271 parameter( one = ( 1.0d+0, 0.0d+0 ) )
272* ..
273* .. Local Scalars ..
274 LOGICAL LQUERY, NOTRAN
275 CHARACTER TRANSN, TRANST
276 INTEGER COUNT, IACOL, IAFCOL, IAFROW, IAROW, IXBCOL,
277 $ ixbrow, ixcol, ixrow, icoffa, icoffaf, icoffb,
278 $ icoffx, ictxt, icurcol, idum, ii, iixb, iiw,
279 $ ioffxb, ipb, ipr, ipv, iroffa, iroffaf, iroffb,
280 $ iroffx, iw, j, jbrhs, jj, jjfbe, jjxb, jn, jw,
281 $ k, kase, ldxb, lrwmin, lwmin, mycol, myrhs,
282 $ myrow, np, np0, npcol, npmod, nprow, nz
283 DOUBLE PRECISION EPS, EST, LSTRES, S, SAFE1, SAFE2, SAFMIN
284 COMPLEX*16 ZDUM
285* ..
286* .. Local Arrays ..
287 INTEGER DESCW( DLEN_ ), IDUM1( 5 ), IDUM2( 5 )
288* ..
289* .. External Functions ..
290 LOGICAL LSAME
291 INTEGER ICEIL, INDXG2P, NUMROC
292 DOUBLE PRECISION PDLAMCH
293 EXTERNAL iceil, indxg2p, lsame, numroc, pdlamch
294* ..
295* .. External Subroutines ..
296 EXTERNAL blacs_gridinfo, chk1mat, descset, dgamx2d,
297 $ infog2l, pchk2mat, pxerbla, pzagemv, pzaxpy,
298 $ pzcopy, pzgemv, pzgetrs, pzlacon,
299 $ zgebr2d, zgebs2d
300* ..
301* .. Intrinsic Functions ..
302 INTRINSIC abs, dble, dcmplx, dimag, ichar, max, min, mod
303* ..
304* .. Statement Functions ..
305 DOUBLE PRECISION CABS1
306* ..
307* .. Statement Function definitions ..
308 cabs1( zdum ) = abs( dble( zdum ) ) + abs( dimag( zdum ) )
309* ..
310* .. Executable Statements ..
311*
312* Get grid parameters
313*
314 ictxt = desca( ctxt_ )
315 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
316*
317* Test the input parameters.
318*
319 notran = lsame( trans, 'N' )
320*
321 info = 0
322 IF( nprow.EQ.-1 ) THEN
323 info = -(700+ctxt_)
324 ELSE
325 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 7, info )
326 CALL chk1mat( n, 2, n, 2, iaf, jaf, descaf, 11, info )
327 CALL chk1mat( n, 2, nrhs, 3, ib, jb, descb, 16, info )
328 CALL chk1mat( n, 2, nrhs, 3, ix, jx, descx, 20, info )
329 IF( info.EQ.0 ) THEN
330 iroffa = mod( ia-1, desca( mb_ ) )
331 icoffa = mod( ja-1, desca( nb_ ) )
332 iroffaf = mod( iaf-1, descaf( mb_ ) )
333 icoffaf = mod( jaf-1, descaf( nb_ ) )
334 iroffb = mod( ib-1, descb( mb_ ) )
335 icoffb = mod( jb-1, descb( nb_ ) )
336 iroffx = mod( ix-1, descx( mb_ ) )
337 icoffx = mod( jx-1, descx( nb_ ) )
338 iarow = indxg2p( ia, desca( mb_ ), myrow, desca( rsrc_ ),
339 $ nprow )
340 iafcol = indxg2p( jaf, descaf( nb_ ), mycol,
341 $ descaf( csrc_ ), npcol )
342 iafrow = indxg2p( iaf, descaf( mb_ ), myrow,
343 $ descaf( rsrc_ ), nprow )
344 iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
345 $ npcol )
346 CALL infog2l( ib, jb, descb, nprow, npcol, myrow, mycol,
347 $ iixb, jjxb, ixbrow, ixbcol )
348 ixrow = indxg2p( ix, descx( mb_ ), myrow, descx( rsrc_ ),
349 $ nprow )
350 ixcol = indxg2p( jx, descx( nb_ ), mycol, descx( csrc_ ),
351 $ npcol )
352 npmod = numroc( n+iroffa, desca( mb_ ), myrow, iarow,
353 $ nprow )
354 lwmin = 2 * npmod
355 lrwmin = npmod
356 work( 1 ) = dcmplx( dble( lwmin ) )
357 rwork( 1 ) = dble( lrwmin )
358 lquery = ( lwork.EQ.-1 .OR. lrwork.EQ.-1 )
359*
360 IF( ( .NOT.notran ) .AND. ( .NOT.lsame( trans, 'T' ) ) .AND.
361 $ ( .NOT.lsame( trans, 'C' ) ) ) THEN
362 info = -1
363 ELSE IF( n.LT.0 ) THEN
364 info = -2
365 ELSE IF( nrhs.LT.0 ) THEN
366 info = -3
367 ELSE IF( iroffa.NE.0 ) THEN
368 info = -5
369 ELSE IF( icoffa.NE.0 ) THEN
370 info = -6
371 ELSE IF( desca( mb_ ).NE.desca( nb_ ) ) THEN
372 info = -( 700 + nb_ )
373 ELSE IF( desca( mb_ ).NE.descaf( mb_ ) ) THEN
374 info = -( 1100 + mb_ )
375 ELSE IF( iroffaf.NE.0 .OR. iarow.NE.iafrow ) THEN
376 info = -9
377 ELSE IF( desca( nb_ ).NE.descaf( nb_ ) ) THEN
378 info = -( 1100 + nb_ )
379 ELSE IF( icoffaf.NE.0 .OR. iacol.NE.iafcol ) THEN
380 info = -10
381 ELSE IF( ictxt.NE.descaf( ctxt_ ) ) THEN
382 info = -( 1100 + ctxt_ )
383 ELSE IF( iroffa.NE.iroffb .OR. iarow.NE.ixbrow ) THEN
384 info = -14
385 ELSE IF( desca( mb_ ).NE.descb( mb_ ) ) THEN
386 info = -( 1600 + mb_ )
387 ELSE IF( ictxt.NE.descb( ctxt_ ) ) THEN
388 info = -( 1600 + ctxt_ )
389 ELSE IF( descb( mb_ ).NE.descx( mb_ ) ) THEN
390 info = -( 2000 + mb_ )
391 ELSE IF( iroffx.NE.0 .OR. ixbrow.NE.ixrow ) THEN
392 info = -18
393 ELSE IF( descb( nb_ ).NE.descx( nb_ ) ) THEN
394 info = -( 2000 + nb_ )
395 ELSE IF( icoffb.NE.icoffx .OR. ixbcol.NE.ixcol ) THEN
396 info = -19
397 ELSE IF( ictxt.NE.descx( ctxt_ ) ) THEN
398 info = -( 2000 + ctxt_ )
399 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
400 info = -24
401 ELSE IF( lrwork.LT.lrwmin .AND. .NOT.lquery ) THEN
402 info = -26
403 END IF
404 END IF
405*
406 IF( notran ) THEN
407 idum1( 1 ) = ichar( 'N' )
408 ELSE IF( lsame( trans, 'T' ) ) THEN
409 idum1( 1 ) = ichar( 'T' )
410 ELSE
411 idum1( 1 ) = ichar( 'C' )
412 END IF
413 idum2( 1 ) = 1
414 idum1( 2 ) = n
415 idum2( 2 ) = 2
416 idum1( 3 ) = nrhs
417 idum2( 3 ) = 3
418 IF( lwork.EQ.-1 ) THEN
419 idum1( 4 ) = -1
420 ELSE
421 idum1( 4 ) = 1
422 END IF
423 idum2( 4 ) = 24
424 IF( lrwork.EQ.-1 ) THEN
425 idum1( 5 ) = -1
426 ELSE
427 idum1( 5 ) = 1
428 END IF
429 idum2( 5 ) = 26
430 CALL pchk2mat( n, 2, n, 2, ia, ja, desca, 7, n, 2, n, 2, iaf,
431 $ jaf, descaf, 11, 5, idum1, idum2, info )
432 CALL pchk2mat( n, 2, nrhs, 3, ib, jb, descb, 16, n, 2, nrhs, 3,
433 $ ix, jx, descx, 20, 5, idum1, idum2, info )
434 END IF
435 IF( info.NE.0 ) THEN
436 CALL pxerbla( ictxt, 'PZGERFS', -info )
437 RETURN
438 ELSE IF( lquery ) THEN
439 RETURN
440 END IF
441*
442 jjfbe = jjxb
443 myrhs = numroc( jb+nrhs-1, descb( nb_ ), mycol, descb( csrc_ ),
444 $ npcol )
445*
446* Quick return if possible
447*
448 IF( n.LE.1 .OR. nrhs.EQ.0 ) THEN
449 DO 10 jj = jjfbe, myrhs
450 ferr( jj ) = zero
451 berr( jj ) = zero
452 10 CONTINUE
453 RETURN
454 END IF
455*
456 IF( notran ) THEN
457 transn = 'N'
458 transt = 'C'
459 ELSE
460 transn = 'C'
461 transt = 'N'
462 END IF
463*
464 np0 = numroc( n+iroffb, descb( mb_ ), myrow, ixbrow, nprow )
465 CALL descset( descw, n+iroffb, 1, desca( mb_ ), 1, ixbrow, ixbcol,
466 $ ictxt, max( 1, np0 ) )
467 ipb = 1
468 ipr = 1
469 ipv = ipr + np0
470 IF( myrow.EQ.ixbrow ) THEN
471 iiw = 1 + iroffb
472 np = np0 - iroffb
473 ELSE
474 iiw = 1
475 np = np0
476 END IF
477 iw = 1 + iroffb
478 jw = 1
479 ldxb = descb( lld_ )
480 ioffxb = ( jjxb-1 )*ldxb
481*
482* NZ = 1 + maximum number of nonzero entries in each row of sub( A )
483*
484 nz = n + 1
485 eps = pdlamch( ictxt, 'Epsilon' )
486 safmin = pdlamch( ictxt, 'Safe minimum' )
487 safe1 = nz*safmin
488 safe2 = safe1 / eps
489 jn = min( iceil( jb, descb( nb_ ) ) * descb( nb_ ), jb+nrhs-1 )
490*
491* Handle first block separately
492*
493 jbrhs = jn - jb + 1
494 DO 100 k = 0, jbrhs-1
495*
496 count = 1
497 lstres = three
498 20 CONTINUE
499*
500* Loop until stopping criterion is satisfied.
501*
502* Compute residual R = sub(B) - op(sub(A)) * sub(X),
503* where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
504* depending on TRANS.
505*
506 CALL pzcopy( n, b, ib, jb+k, descb, 1, work( ipr ), iw, jw,
507 $ descw, 1 )
508 CALL pzgemv( trans, n, n, -one, a, ia, ja, desca, x, ix,
509 $ jx+k, descx, 1, one, work( ipr ), iw, jw,
510 $ descw, 1 )
511*
512* Compute componentwise relative backward error from formula
513*
514* max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
515*
516* where abs(Z) is the componentwise absolute value of the
517* matrix or vector Z. If the i-th component of the
518* denominator is less than SAFE2, then SAFE1 is added to the
519* i-th components of the numerator and denominator before
520* dividing.
521*
522 IF( mycol.EQ.ixbcol ) THEN
523 IF( np.GT.0 ) THEN
524 DO 30 ii = iixb, iixb + np - 1
525 rwork( iiw+ii-iixb ) = cabs1( b( ii+ioffxb ) )
526 30 CONTINUE
527 END IF
528 END IF
529*
530 CALL pzagemv( trans, n, n, rone, a, ia, ja, desca, x, ix, jx+k,
531 $ descx, 1, rone, rwork( ipb ), iw, jw, descw, 1 )
532*
533 s = zero
534 IF( mycol.EQ.ixbcol ) THEN
535 IF( np.GT.0 ) THEN
536 DO 40 ii = iiw-1, iiw+np-2
537 IF( rwork( ipb+ii ).GT.safe2 ) THEN
538 s = max( s, cabs1( work( ipr+ii ) ) /
539 $ rwork( ipb+ii ) )
540 ELSE
541 s = max( s, ( cabs1( work( ipr+ii ) )+safe1 ) /
542 $ ( rwork( ipb+ii )+safe1 ) )
543 END IF
544 40 CONTINUE
545 END IF
546 END IF
547*
548 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
549 $ -1, mycol )
550 IF( mycol.EQ.ixbcol )
551 $ berr( jjfbe ) = s
552*
553* Test stopping criterion. Continue iterating if
554* 1) The residual BERR(J+K) is larger than machine epsilon,
555* and
556* 2) BERR(J+K) decreased by at least a factor of 2 during the
557* last iteration, and
558* 3) At most ITMAX iterations tried.
559*
560 IF( s.GT.eps .AND. two*s.LE.lstres .AND. count.LE.itmax ) THEN
561*
562* Update solution and try again.
563*
564 CALL pzgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
565 $ work( ipr ), iw, jw, descw, info )
566 CALL pzaxpy( n, one, work( ipr ), iw, jw, descw, 1, x, ix,
567 $ jx+k, descx, 1 )
568 lstres = s
569 count = count + 1
570 GO TO 20
571 END IF
572*
573* Bound error from formula
574*
575* norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
576* norm( abs(inv(op(sub(A))))*
577* ( abs(R) + NZ*EPS*(
578* abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
579*
580* where
581* norm(Z) is the magnitude of the largest component of Z
582* inv(op(sub(A))) is the inverse of op(sub(A))
583* abs(Z) is the componentwise absolute value of the matrix
584* or vector Z
585* NZ is the maximum number of nonzeros in any row of sub(A),
586* plus 1
587* EPS is machine epsilon
588*
589* The i-th component of
590* abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
591* is incremented by SAFE1 if the i-th component of
592* abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
593* SAFE2.
594*
595* Use PZLACON to estimate the infinity-norm of the matrix
596* inv(op(sub(A))) * diag(W), where
597* W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
598*
599 IF( mycol.EQ.ixbcol ) THEN
600 IF( np.GT.0 ) THEN
601 DO 50 ii = iiw-1, iiw+np-2
602 IF( rwork( ipb+ii ).GT.safe2 ) THEN
603 rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
604 $ nz*eps*rwork( ipb+ii )
605 ELSE
606 rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
607 $ nz*eps*rwork( ipb+ii ) + safe1
608 END IF
609 50 CONTINUE
610 END IF
611 END IF
612*
613 kase = 0
614 60 CONTINUE
615 IF( mycol.EQ.ixbcol ) THEN
616 CALL zgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
617 $ descw( lld_ ) )
618 ELSE
619 CALL zgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
620 $ descw( lld_ ), myrow, ixbcol )
621 END IF
622 descw( csrc_ ) = mycol
623 CALL pzlacon( n, work( ipv ), iw, jw, descw, work( ipr ),
624 $ iw, jw, descw, est, kase )
625 descw( csrc_ ) = ixbcol
626*
627 IF( kase.NE.0 ) THEN
628 IF( kase.EQ.1 ) THEN
629*
630* Multiply by diag(W)*inv(op(sub(A))').
631*
632 CALL pzgetrs( transt, n, 1, af, iaf, jaf, descaf,
633 $ ipiv, work( ipr ), iw, jw, descw, info )
634*
635 IF( mycol.EQ.ixbcol ) THEN
636 IF( np.GT.0 ) THEN
637 DO 70 ii = iiw-1, iiw+np-2
638 work( ipr+ii ) = rwork( ipb+ii )*work( ipr+ii )
639 70 CONTINUE
640 END IF
641 END IF
642 ELSE
643*
644* Multiply by inv(op(sub(A)))*diag(W).
645*
646 IF( mycol.EQ.ixbcol ) THEN
647 IF( np.GT.0 ) THEN
648 DO 80 ii = iiw-1, iiw+np-2
649 work( ipr+ii ) = rwork( ipb+ii )*work( ipr+ii )
650 80 CONTINUE
651 END IF
652 END IF
653*
654 CALL pzgetrs( transn, n, 1, af, iaf, jaf, descaf,
655 $ ipiv, work( ipr ), iw, jw, descw, info )
656 END IF
657 GO TO 60
658 END IF
659*
660* Normalize error.
661*
662 lstres = zero
663 IF( mycol.EQ.ixbcol ) THEN
664 IF( np.GT.0 ) THEN
665 DO 90 ii = iixb, iixb+np-1
666 lstres = max( lstres, cabs1( x( ioffxb+ii ) ) )
667 90 CONTINUE
668 END IF
669 CALL dgamx2d( ictxt, 'Column', ' ', 1, 1, lstres, 1, idum,
670 $ idum, 1, -1, mycol )
671 IF( lstres.NE.zero )
672 $ ferr( jjfbe ) = est / lstres
673*
674 jjxb = jjxb + 1
675 jjfbe = jjfbe + 1
676 ioffxb = ioffxb + ldxb
677*
678 END IF
679*
680 100 CONTINUE
681*
682 icurcol = mod( ixbcol+1, npcol )
683*
684* Do for each right hand side
685*
686 DO 200 j = jn+1, jb+nrhs-1, descb( nb_ )
687 jbrhs = min( jb+nrhs-j, descb( nb_ ) )
688 descw( csrc_ ) = icurcol
689*
690 DO 190 k = 0, jbrhs-1
691*
692 count = 1
693 lstres = three
694 110 CONTINUE
695*
696* Loop until stopping criterion is satisfied.
697*
698* Compute residual R = sub(B) - op(sub(A)) * sub(X),
699* where op(sub(A)) = sub(A), or sub(A)' (A**T or A**H),
700* depending on TRANS.
701*
702 CALL pzcopy( n, b, ib, j+k, descb, 1, work( ipr ), iw, jw,
703 $ descw, 1 )
704 CALL pzgemv( trans, n, n, -one, a, ia, ja, desca, x,
705 $ ix, j+k, descx, 1, one, work( ipr ), iw, jw,
706 $ descw, 1 )
707*
708* Compute componentwise relative backward error from formula
709*
710* max(i) (abs(R(i))/(abs(op(sub(A)))*abs(sub(X)) +
711* abs(sub(B)))(i))
712*
713* where abs(Z) is the componentwise absolute value of the
714* matrix or vector Z. If the i-th component of the
715* denominator is less than SAFE2, then SAFE1 is added to the
716* i-th components of the numerator and denominator before
717* dividing.
718*
719 IF( mycol.EQ.icurcol ) THEN
720 IF( np.GT.0 ) THEN
721 DO 120 ii = iixb, iixb+np-1
722 rwork( iiw+ii-iixb ) = cabs1( b( ii+ioffxb ) )
723 120 CONTINUE
724 END IF
725 END IF
726*
727 CALL pzagemv( trans, n, n, rone, a, ia, ja, desca, x, ix,
728 $ j+k, descx, 1, rone, rwork( ipb ), iw, jw,
729 $ descw, 1 )
730*
731 s = zero
732 IF( mycol.EQ.icurcol ) THEN
733 IF( np.GT.0 )THEN
734 DO 130 ii = iiw-1, iiw+np-2
735 IF( rwork( ipb+ii ).GT.safe2 ) THEN
736 s = max( s, cabs1( work( ipr+ii ) ) /
737 $ rwork( ipb+ii ) )
738 ELSE
739 s = max( s, ( cabs1( work( ipr+ii ) )+safe1 ) /
740 $ ( rwork( ipb+ii )+safe1 ) )
741 END IF
742 130 CONTINUE
743 END IF
744 END IF
745*
746 CALL dgamx2d( ictxt, 'All', ' ', 1, 1, s, 1, idum, idum, 1,
747 $ -1, mycol )
748 IF( mycol.EQ.icurcol )
749 $ berr( jjfbe ) = s
750*
751* Test stopping criterion. Continue iterating if
752* 1) The residual BERR(J+K) is larger than machine epsilon,
753* and
754* 2) BERR(J+K) decreased by at least a factor of 2 during
755* the last iteration, and
756* 3) At most ITMAX iterations tried.
757*
758 IF( s.GT.eps .AND. two*s.LE.lstres .AND.
759 $ count.LE.itmax ) THEN
760*
761* Update solution and try again.
762*
763 CALL pzgetrs( trans, n, 1, af, iaf, jaf, descaf, ipiv,
764 $ work( ipr ), iw, jw, descw, info )
765 CALL pzaxpy( n, one, work( ipr ), iw, jw, descw, 1, x,
766 $ ix, j+k, descx, 1 )
767 lstres = s
768 count = count + 1
769 GO TO 110
770 END IF
771*
772* Bound error from formula
773*
774* norm(sub(X) - XTRUE) / norm(sub(X)) .le. FERR =
775* norm( abs(inv(op(sub(A))))*
776* ( abs(R) + NZ*EPS*(
777* abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))))/norm(sub(X))
778*
779* where
780* norm(Z) is the magnitude of the largest component of Z
781* inv(op(sub(A))) is the inverse of op(sub(A))
782* abs(Z) is the componentwise absolute value of the matrix
783* or vector Z
784* NZ is the maximum number of nonzeros in any row of sub(A),
785* plus 1
786* EPS is machine epsilon
787*
788* The i-th component of
789* abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B)))
790* is incremented by SAFE1 if the i-th component of
791* abs(op(sub(A)))*abs(sub(X)) + abs(sub(B)) is less than
792* SAFE2.
793*
794* Use PZLACON to estimate the infinity-norm of the matrix
795* inv(op(sub(A))) * diag(W), where
796* W = abs(R)+NZ*EPS*(abs(op(sub(A)))*abs(sub(X))+abs(sub(B))).
797*
798 IF( mycol.EQ.icurcol ) THEN
799 IF( np.GT.0 ) THEN
800 DO 140 ii = iiw-1, iiw+np-2
801 IF( rwork( ipb+ii ).GT.safe2 ) THEN
802 rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
803 $ nz*eps*rwork( ipb+ii )
804 ELSE
805 rwork( ipb+ii ) = cabs1( work( ipr+ii ) ) +
806 $ nz*eps*rwork( ipb+ii ) + safe1
807 END IF
808 140 CONTINUE
809 END IF
810 END IF
811*
812 kase = 0
813 150 CONTINUE
814 IF( mycol.EQ.icurcol ) THEN
815 CALL zgebs2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
816 $ descw( lld_ ) )
817 ELSE
818 CALL zgebr2d( ictxt, 'Rowwise', ' ', np, 1, work( ipr ),
819 $ descw( lld_ ), myrow, icurcol )
820 END IF
821 descw( csrc_ ) = mycol
822 CALL pzlacon( n, work( ipv ), iw, jw, descw, work( ipr ),
823 $ iw, jw, descw, est, kase )
824 descw( csrc_ ) = icurcol
825*
826 IF( kase.NE.0 ) THEN
827 IF( kase.EQ.1 ) THEN
828*
829* Multiply by diag(W)*inv(op(sub(A))').
830*
831 CALL pzgetrs( transt, n, 1, af, iaf, jaf, descaf,
832 $ ipiv, work( ipr ), iw, jw, descw, info )
833*
834 IF( mycol.EQ.icurcol ) THEN
835 IF( np.GT.0 ) THEN
836 DO 160 ii = iiw-1, iiw+np-2
837 work( ipr+ii ) = rwork( ipb+ii )*
838 $ work( ipr+ii )
839 160 CONTINUE
840 END IF
841 END IF
842 ELSE
843*
844* Multiply by inv(op(sub(A)))*diag(W).
845*
846 IF( mycol.EQ.icurcol ) THEN
847 IF( np.GT.0 ) THEN
848 DO 170 ii = iiw-1, iiw+np-2
849 work( ipr+ii ) = rwork( ipb+ii )*
850 $ work( ipr+ii )
851 170 CONTINUE
852 END IF
853 END IF
854*
855 CALL pzgetrs( transn, n, 1, af, iaf, jaf, descaf,
856 $ ipiv, work( ipr ), iw, jw, descw,
857 $ info )
858 END IF
859 GO TO 150
860 END IF
861*
862* Normalize error.
863*
864 lstres = zero
865 IF( mycol.EQ.icurcol ) THEN
866 IF( np.GT.0 ) THEN
867 DO 180 ii = iixb, iixb+np-1
868 lstres = max( lstres, cabs1( x( ioffxb+ii ) ) )
869 180 CONTINUE
870 END IF
871 CALL dgamx2d( ictxt, 'Column', ' ', 1, 1, lstres,
872 $ 1, idum, idum, 1, -1, mycol )
873 IF( lstres.NE.zero )
874 $ ferr( jjfbe ) = est / lstres
875*
876 jjxb = jjxb + 1
877 jjfbe = jjfbe + 1
878 ioffxb = ioffxb + ldxb
879*
880 END IF
881*
882 190 CONTINUE
883*
884 icurcol = mod( icurcol+1, npcol )
885*
886 200 CONTINUE
887*
888 work( 1 ) = dcmplx( dble( lwmin ) )
889 rwork( 1 ) = dble( lrwmin )
890*
891 RETURN
892*
893* End of PZGERFS
894*
895 END
subroutine chk1mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, info)
Definition chk1mat.f:3
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pchk2mat(ma, mapos0, na, napos0, ia, ja, desca, descapos0, mb, mbpos0, nb, nbpos0, ib, jb, descb, descbpos0, nextra, ex, expos, info)
Definition pchkxmat.f:175
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
subroutine pzgerfs(trans, n, nrhs, a, ia, ja, desca, af, iaf, jaf, descaf, ipiv, b, ib, jb, descb, x, ix, jx, descx, ferr, berr, work, lwork, rwork, lrwork, info)
Definition pzgerfs.f:5
subroutine pzgetrs(trans, n, nrhs, a, ia, ja, desca, ipiv, b, ib, jb, descb, info)
Definition pzgetrs.f:3
subroutine pzlacon(n, v, iv, jv, descv, x, ix, jx, descx, est, kase)
Definition pzlacon.f:3