ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psqrt17.f
Go to the documentation of this file.
1  REAL FUNCTION PSQRT17( TRANS, IRESID, M, N, NRHS, A,
2  $ IA, JA, DESCA, X, IX, JX,
3  $ DESCX, B, IB, JB, DESCB, WORK,
4  $ RWORK )
5 *
6 * -- ScaLAPACK routine (version 1.7) --
7 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8 * and University of California, Berkeley.
9 * May 1, 1997
10 *
11 * .. Scalar Arguments ..
12  CHARACTER trans
13  INTEGER ia, ib, iresid, ix, ja, jb, jx, m, n, nrhs
14 * ..
15 * .. Array Arguments ..
16  INTEGER desca( * ), descb( * ), descx( * )
17  REAL a( * ), b( * ), work( * ), x( * )
18  REAL rwork( * )
19 * ..
20 *
21 * Purpose
22 * =======
23 *
24 * PSQRT17 computes the ratio
25 *
26 * || R'*op( sub( A ) ) ||/(||sub( A )||*alpha*max(M,N,NRHS)*eps)
27 *
28 * where R = op( sub( A ) )*sub( X ) - B, op(A) is A or A', and
29 *
30 * alpha = ||B|| if IRESID = 1 (zero-residual problem)
31 * alpha = ||R|| if IRESID = 2 (otherwise).
32 *
33 * Notes
34 * =====
35 *
36 * Each global data object is described by an associated description
37 * vector. This vector stores the information required to establish
38 * the mapping between an object element and its corresponding process
39 * and memory location.
40 *
41 * Let A be a generic term for any 2D block cyclicly distributed array.
42 * Such a global array has an associated description vector DESCA.
43 * In the following comments, the character _ should be read as
44 * "of the global array".
45 *
46 * NOTATION STORED IN EXPLANATION
47 * --------------- -------------- --------------------------------------
48 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
49 * DTYPE_A = 1.
50 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
51 * the BLACS process grid A is distribu-
52 * ted over. The context itself is glo-
53 * bal, but the handle (the integer
54 * value) may vary.
55 * M_A (global) DESCA( M_ ) The number of rows in the global
56 * array A.
57 * N_A (global) DESCA( N_ ) The number of columns in the global
58 * array A.
59 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
60 * the rows of the array.
61 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
62 * the columns of the array.
63 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
64 * row of the array A is distributed.
65 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
66 * first column of the array A is
67 * distributed.
68 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
69 * array. LLD_A >= MAX(1,LOCr(M_A)).
70 *
71 * Let K be the number of rows or columns of a distributed matrix,
72 * and assume that its process grid has dimension p x q.
73 * LOCr( K ) denotes the number of elements of K that a process
74 * would receive if K were distributed over the p processes of its
75 * process column.
76 * Similarly, LOCc( K ) denotes the number of elements of K that a
77 * process would receive if K were distributed over the q processes of
78 * its process row.
79 * The values of LOCr() and LOCc() may be determined via a call to the
80 * ScaLAPACK tool function, NUMROC:
81 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
82 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
83 * An upper bound for these quantities may be computed by:
84 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
85 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
86 *
87 * Arguments
88 * =========
89 *
90 * TRANS (global input) CHARACTER*1
91 * Specifies whether or not the transpose of sub( A ) is used.
92 * = 'N': No transpose, op( sub( A ) ) = sub( A ).
93 * = 'T': Transpose, op( sub( A ) ) = sub( A' ).
94 *
95 * IRESID (global input) INTEGER
96 * IRESID = 1 indicates zero-residual problem.
97 * IRESID = 2 indicates non-zero residual.
98 *
99 * M (global input) INTEGER
100 * The number of rows to be operated on, i.e. the number of rows
101 * of the distributed submatrix sub( A ). M >= 0.
102 * If TRANS = 'N', the number of rows of the distributed
103 * submatrix sub( B ).
104 * If TRANS = 'T', the number of rows of the distributed
105 * submatrix sub( X ).
106 *
107 * N (global input) INTEGER
108 * The number of columns to be operated on, i.e. the number of
109 * columns of the distributed submatrix sub( A ). N >= 0.
110 * If TRANS = 'N', the number of rows of the distributed
111 * submatrix sub( X ). Otherwise N is the number of rows of
112 * the distributed submatrix sub( B ).
113 *
114 * NRHS (global input) INTEGER
115 * The number of right hand sides, i.e., the number of columns
116 * of the distributed submatrices sub( X ) and sub( B ).
117 * NRHS >= 0.
118 *
119 * A (local input) REAL pointer into the local memory
120 * to an array of dimension (LLD_A,LOCc(JA+N-1)). This array
121 * contains the local pieces of the distributed M-by-N
122 * submatrix sub( A ).
123 *
124 * IA (global input) INTEGER
125 * The row index in the global array A indicating the first
126 * row of sub( A ).
127 *
128 * JA (global input) INTEGER
129 * The column index in the global array A indicating the
130 * first column of sub( A ).
131 *
132 * DESCA (global and local input) INTEGER array of dimension DLEN_.
133 * The array descriptor for the distributed matrix A.
134 *
135 * X (local input) REAL pointer into the local
136 * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)).
137 * If TRANS = 'N', this array contains the local pieces of the
138 * N-by-NRHS distributed submatrix sub( X ). Otherwise, this
139 * array contains the local pieces of the M-by-NRHS distributed
140 * submatrix sub( X ).
141 *
142 * IX (global input) INTEGER
143 * The row index in the global array X indicating the first
144 * row of sub( X ).
145 *
146 * JX (global input) INTEGER
147 * The column index in the global array X indicating the
148 * first column of sub( X ).
149 *
150 * DESCX (global and local input) INTEGER array of dimension DLEN_.
151 * The array descriptor for the distributed matrix X.
152 *
153 * B (local input) REAL pointer into the local memory
154 * to an array of dimension (LLD_B,LOCc(JB+NRHS-1)).
155 * If TRANS='N', this array contains the local pieces of the
156 * distributed M-by-NRHS submatrix operand sub( B ). Otherwise,
157 * this array contains the local pieces of the distributed
158 * N-by-NRHS submatrix operand sub( B ).
159 *
160 * IB (global input) INTEGER
161 * The row index in the global array B indicating the first
162 * row of sub( B ).
163 *
164 * JB (global input) INTEGER
165 * The column index in the global array B indicating the
166 * first column of sub( B ).
167 *
168 * DESCB (global and local input) INTEGER array of dimension DLEN_.
169 * The array descriptor for the distributed matrix B.
170 *
171 * WORK (local workspace) REAL array, dimension (LWORK)
172 * If TRANS = 'N', LWORK >= Mp0 * NRHSq0 + NRHSp0 * Nq0
173 * otherwise LWORK >= Np0 * NRHSq0 + NRHSp0 * Mq0
174 *
175 * RWORK (local workspace) REAL array, dimension (LRWORK)
176 * LRWORK >= Nq0, if TRANS = 'N', and LRWORK >= Mp0 otherwise.
177 *
178 * where
179 *
180 * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
181 * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
182 * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
183 * Mp0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
184 * Np0 = NUMROC( N+ICOFFA, NB_A, MYROW, IAROW, NPROW ),
185 * Mq0 = NUMROC( M+IROFFA, NB_A, MYCOL, IACOL, NPCOL ),
186 * Nq0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ),
187 *
188 * IROFFB = MOD( IB-1, MB_B ), ICOFFB = MOD( JB-1, NB_B ),
189 * IBROW = INDXG2P( IB, MB_B, MYROW, RSRC_B, NPROW ),
190 * IBCOL = INDXG2P( JB, NB_B, MYCOL, CSRC_B, NPCOL ),
191 * NRHSp0 = NUMROC( NRHS+ICOFFB, NB_B, MYROW, IBROW, NPROW ),
192 * NRHSq0 = NUMROC( NRHS+ICOFFB, NB_B, MYCOL, IBCOL, NPCOL ).
193 *
194 * INDXG2P and NUMROC are ScaLAPACK tool functions; MYROW,
195 * MYCOL, NPROW and NPCOL can be determined by calling the
196 * subroutine BLACS_GRIDINFO.
197 *
198 * =====================================================================
199 *
200 * .. Parameters ..
201  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
202  $ lld_, mb_, m_, nb_, n_, rsrc_
203  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
204  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
205  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
206  REAL zero, one
207  parameter( zero = 0.0e0, one = 1.0e0 )
208 * ..
209 * .. Local Scalars ..
210  INTEGER iacol, ibcol, ibrow, icoffb, ictxt, info,
211  $ ioffa, iroffb, iscl, iw, iw2, jw, jw2, mycol,
212  $ nrhsq, nrhsp, myrow, ncols, npcol, nprow,
213  $ nrows, nrowsp
214  REAL err, norma, normb, normrs, normx, smlnum
215 * ..
216 * .. Local Arrays ..
217  INTEGER descw( dlen_ ), descw2( dlen_ )
218 * ..
219 * .. External Functions ..
220  LOGICAL lsame
221  INTEGER indxg2p, numroc
222  REAL pslange, pslamch
223  EXTERNAL indxg2p, lsame, numroc, pslange, pslamch
224 * ..
225 * .. External Subroutines ..
226  EXTERNAL blacs_gridinfo, descset, psgemm, pslacpy,
227  $ pslascl, pxerbla
228 * ..
229 * .. Intrinsic Functions ..
230  INTRINSIC max, real
231 * ..
232 * .. Executable Statements ..
233 *
234  psqrt17 = zero
235 *
236 * Get grid parameters
237 *
238  ictxt = desca( ctxt_ )
239  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
240 *
241  info = 0
242  IF( lsame( trans, 'N' ) ) THEN
243  nrows = m
244  ncols = n
245  ioffa = mod( ja-1, desca( nb_ ) )
246  ELSE IF( lsame( trans, 'T' ) ) THEN
247  nrows = n
248  ncols = m
249  ioffa = mod( ia-1, desca( mb_ ) )
250  ELSE
251  CALL pxerbla( ictxt, 'PSQRT17', -1 )
252  RETURN
253  END IF
254 *
255  IF( m.LE.0 .OR. n.LE.0 .OR. nrhs.LE.0 )
256  $ RETURN
257 *
258  iroffb = mod( ia-1, desca( mb_ ) )
259  icoffb = mod( ja-1, desca( nb_ ) )
260  ibrow = indxg2p( ib, descb( mb_ ), myrow, descb( rsrc_ ),
261  $ nprow )
262  iacol = indxg2p( ja, desca( nb_ ), mycol, desca( csrc_ ),
263  $ npcol )
264  ibcol = indxg2p( jb, descb( nb_ ), mycol, descb( csrc_ ),
265  $ npcol )
266 *
267  nrhsq = numroc( nrhs+icoffb, descb( nb_ ), mycol, ibcol, npcol )
268  nrhsp = numroc( nrhs+iroffb, descb( nb_ ), myrow, ibrow, nprow )
269  nrowsp = numroc( nrows+iroffb, desca( mb_ ), myrow, ibrow, nprow )
270 *
271 * Assign array descriptor DESCW for workspace WORK, where DESCW
272 * holds a copy of the distributed submatrix sub( B )
273 *
274  CALL descset( descw, nrows+iroffb, nrhs+icoffb, descb( mb_ ),
275  $ descb( nb_ ), ibrow, ibcol, ictxt, max( 1,
276  $ nrowsp ) )
277 *
278 * Assign array descriptor DESCW2 for workspace WORK, where DESCW2
279 * holds a copy of the distributed submatrix sub( X**T )
280 *
281  CALL descset( descw2, nrhs+icoffb, ncols+ioffa, descx( nb_ ),
282  $ descx( mb_ ), ibrow, iacol, ictxt, max( 1,
283  $ nrhsp ) )
284 *
285  norma = pslange( 'One-norm', m, n, a, ia, ja, desca, rwork )
286  smlnum = pslamch( ictxt, 'Safe minimum' )
287  smlnum = smlnum / pslamch( ictxt, 'Precision' )
288  iscl = 0
289 *
290 * compute residual and scale it
291 *
292  iw = 1 + iroffb
293  jw = 1 + icoffb
294  CALL pslacpy( 'All', nrows, nrhs, b, ib, jb, descb, work, iw, jw,
295  $ descw )
296  CALL psgemm( trans, 'No transpose', nrows, nrhs, ncols, -one, a,
297  $ ia, ja, desca, x, ix, jx, descx, one, work, iw, jw,
298  $ descw )
299  normrs = pslange( 'Max', nrows, nrhs, work, iw, jw, descw,
300  $ rwork )
301  IF( normrs.GT.smlnum ) THEN
302  iscl = 1
303  CALL pslascl( 'General', normrs, one, nrows, nrhs, work,
304  $ iw, jw, descw, info )
305  END IF
306 *
307 * compute R'*sub( A )
308 *
309  iw2 = 1 + icoffb
310  jw2 = 1 + ioffa
311  CALL psgemm( 'Transpose', trans, nrhs, ncols, nrows, one, work,
312  $ iw, jw, descw, a, ia, ja, desca, zero,
313  $ work( nrowsp*nrhsq+1 ), iw2, jw2, descw2 )
314 *
315 * compute and properly scale error
316 *
317  err = pslange( 'One-norm', nrhs, ncols, work( nrowsp*nrhsq+1 ),
318  $ iw2, jw2, descw2, rwork )
319  IF( norma.NE.zero )
320  $ err = err / norma
321 *
322  IF( iscl.EQ.1 )
323  $ err = err*normrs
324 *
325  IF( iresid.EQ.1 ) THEN
326  normb = pslange( 'One-norm', nrows, nrhs, b, ib, jb, descb,
327  $ rwork )
328  IF( normb.NE.zero )
329  $ err = err / normb
330  ELSE
331  normx = pslange( 'One-norm', ncols, nrhs, x, ix, jx, descx,
332  $ rwork )
333  IF( normx.NE.zero )
334  $ err = err / normx
335  END IF
336 *
337  psqrt17 = err / ( pslamch( ictxt, 'Epsilon' ) *
338  $ real( max( m, n, nrhs ) ) )
339 *
340  RETURN
341 *
342 * End of PSQRT17
343 *
344  END
pslamch
real function pslamch(ICTXT, CMACH)
Definition: pcblastst.f:7455
indxg2p
integer function indxg2p(INDXGLOB, NB, IPROC, ISRCPROC, NPROCS)
Definition: indxg2p.f:2
max
#define max(A, B)
Definition: pcgemr.c:180
pslascl
subroutine pslascl(TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, INFO)
Definition: pslascl.f:3
pslange
real function pslange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pslange.f:3
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
pslacpy
subroutine pslacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pslacpy.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
psqrt17
real function psqrt17(TRANS, IRESID, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, B, IB, JB, DESCB, WORK, RWORK)
Definition: psqrt17.f:5
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2