ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
psqrt14.f
Go to the documentation of this file.
1  REAL FUNCTION PSQRT14( TRANS, M, N, NRHS, A, IA, JA,
2  $ DESCA, X, IX, JX, DESCX, WORK )
3 *
4 * -- ScaLAPACK routine (version 1.7) --
5 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6 * and University of California, Berkeley.
7 * May 1, 1997
8 *
9 * .. Scalar Arguments ..
10  CHARACTER trans
11  INTEGER ia, ix, ja, jx, m, n, nrhs
12 * ..
13 * .. Array Arguments ..
14  INTEGER desca( * ), descx( * )
15  REAL a( * ), work( * ), x( * )
16 * ..
17 *
18 * Purpose
19 * =======
20 *
21 * PSQRT14 checks whether sub( X ) is in the row space of sub( A ) or
22 * sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and
23 * sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and
24 * X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise. It does so by scaling both
25 * sub( X ) and sub( A ) such that their norms are in the range
26 * [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of
27 * [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of
28 * [sub( A ),sub( X )] otherwise, and returning the norm of the trailing
29 * triangle, scaled by MAX(M,N,NRHS)*eps.
30 *
31 * Notes
32 * =====
33 *
34 * Each global data object is described by an associated description
35 * vector. This vector stores the information required to establish
36 * the mapping between an object element and its corresponding process
37 * and memory location.
38 *
39 * Let A be a generic term for any 2D block cyclicly distributed array.
40 * Such a global array has an associated description vector DESCA.
41 * In the following comments, the character _ should be read as
42 * "of the global array".
43 *
44 * NOTATION STORED IN EXPLANATION
45 * --------------- -------------- --------------------------------------
46 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
47 * DTYPE_A = 1.
48 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49 * the BLACS process grid A is distribu-
50 * ted over. The context itself is glo-
51 * bal, but the handle (the integer
52 * value) may vary.
53 * M_A (global) DESCA( M_ ) The number of rows in the global
54 * array A.
55 * N_A (global) DESCA( N_ ) The number of columns in the global
56 * array A.
57 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
58 * the rows of the array.
59 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
60 * the columns of the array.
61 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62 * row of the array A is distributed.
63 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64 * first column of the array A is
65 * distributed.
66 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
67 * array. LLD_A >= MAX(1,LOCr(M_A)).
68 *
69 * Let K be the number of rows or columns of a distributed matrix,
70 * and assume that its process grid has dimension p x q.
71 * LOCr( K ) denotes the number of elements of K that a process
72 * would receive if K were distributed over the p processes of its
73 * process column.
74 * Similarly, LOCc( K ) denotes the number of elements of K that a
75 * process would receive if K were distributed over the q processes of
76 * its process row.
77 * The values of LOCr() and LOCc() may be determined via a call to the
78 * ScaLAPACK tool function, NUMROC:
79 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81 * An upper bound for these quantities may be computed by:
82 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84 *
85 * Arguments
86 * =========
87 *
88 * TRANS (global input) CHARACTER*1
89 * = 'N': No transpose, check for sub( X ) in the row space of
90 * sub( A ),
91 * = 'T': Transpose, check for sub( X ) in row space of
92 * sub( A )'.
93 *
94 * M (global input) INTEGER
95 * The number of rows to be operated on, i.e. the number of rows
96 * of the distributed submatrix sub( A ). M >= 0.
97 *
98 * N (global input) INTEGER
99 * The number of columns to be operated on, i.e. the number of
100 * columns of the distributed submatrix sub( A ). N >= 0.
101 *
102 * NRHS (global input) INTEGER
103 * The number of right hand sides, i.e., the number of columns
104 * of the distributed submatrix sub( X ). NRHS >= 0.
105 *
106 * A (local input) REAL pointer into the local memory
107 * to an array of dimension (LLD_A, LOCc(JA+N-1)). This array
108 * contains the local pieces of the M-by-N distributed matrix
109 * sub( A ).
110 *
111 * IA (global input) INTEGER
112 * The row index in the global array A indicating the first
113 * row of sub( A ).
114 *
115 * JA (global input) INTEGER
116 * The column index in the global array A indicating the
117 * first column of sub( A ).
118 *
119 * DESCA (global and local input) INTEGER array of dimension DLEN_.
120 * The array descriptor for the distributed matrix A.
121 *
122 * X (local input) REAL pointer into the local
123 * memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)).
124 * On entry, this array contains the local pieces of the
125 * N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N',
126 * and the M-by-NRHS distributed submatrix sub( X ) otherwise.
127 *
128 * IX (global input) INTEGER
129 * The row index in the global array X indicating the first
130 * row of sub( X ).
131 *
132 * JX (global input) INTEGER
133 * The column index in the global array X indicating the
134 * first column of sub( X ).
135 *
136 * DESCX (global and local input) INTEGER array of dimension DLEN_.
137 * The array descriptor for the distributed matrix X.
138 *
139 * WORK (local workspace) REAL array dimension (LWORK)
140 * If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and
141 * LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where
142 *
143 * IF TRANS='N', (LQ fact)
144 * MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW,
145 * NPROW )
146 * LTAU = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW,
147 * RSRC_A, NPROW )
148 * LWF = MB_A * ( MB_A + MNRHSP + NQ0 )
149 * ELSE (QR fact)
150 * NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL,
151 * NPCOL )
152 * LTAU = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL,
153 * CSRC_A, NPCOL )
154 * LWF = NB_A * ( NB_A + MP0 + NNRHSQ )
155 * END IF
156 *
157 * and,
158 *
159 * IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
160 * IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
161 * IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
162 * MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
163 * NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ).
164 *
165 * INDXG2P and NUMROC are ScaLAPACK tool functions;
166 * MYROW, MYCOL, NPROW and NPCOL can be determined by calling
167 * the subroutine BLACS_GRIDINFO.
168 *
169 *
170 * =====================================================================
171 *
172 * .. Parameters ..
173  INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
174  $ lld_, mb_, m_, nb_, n_, rsrc_
175  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
176  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
177  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
178  REAL one, zero
179  parameter( zero = 0.0e+0, one = 1.0e+0 )
180 * ..
181 * .. Local Scalars ..
182  LOGICAL tpsd
183  INTEGER iacol, iarow, icoffa, ictxt, idum, iia, info,
184  $ iptau, ipw, ipwa, iroffa, iwa, iwx, j, jja,
185  $ jwa, jwx, ldw, lwork, mpwa, mpw, mqw, mycol,
186  $ myrow, npcol, nprow, npw, nqwa, nqw
187  REAL amax, anrm, err, xnrm
188 * ..
189 * .. Local Arrays ..
190  INTEGER descw( dlen_ ), idum1( 1 ), idum2( 1 )
191  REAL rwork( 1 )
192 * ..
193 * .. External Functions ..
194  LOGICAL lsame
195  INTEGER numroc
196  REAL pslange, pslamch
197  EXTERNAL lsame, numroc, pslange, pslamch
198 * ..
199 * .. External Subroutines ..
200  EXTERNAL blacs_gridinfo, descset, infog2l, psamax,
201  $ pscopy, psgelqf, psgeqrf, pslacpy,
202  $ pslascl, pxerbla, sgamx2d
203 * ..
204 * .. Intrinsic Functions ..
205  INTRINSIC abs, max, min, mod, real
206 * ..
207 * .. Executable Statements ..
208 *
209 * Get grid parameters
210 *
211  ictxt = desca( ctxt_ )
212  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
213 *
214  psqrt14 = zero
215 *
216  ipwa = 1
217  iroffa = mod( ia-1, desca( mb_ ) )
218  icoffa = mod( ja-1, desca( nb_ ) )
219  iwa = iroffa + 1
220  jwa = icoffa + 1
221  CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,
222  $ jja, iarow, iacol )
223  mpwa = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )
224  nqwa = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
225 *
226  info = 0
227  IF( lsame( trans, 'N' ) ) THEN
228  IF( n.LE.0 .OR. nrhs.LE.0 )
229  $ RETURN
230  tpsd = .false.
231  mpw = numroc( m+nrhs+iroffa, desca( mb_ ), myrow, iarow,
232  $ nprow )
233  nqw = nqwa
234 *
235 * Assign descriptor DESCW for workspace WORK and pointers to
236 * matrices sub( A ) and sub( X ) in workspace
237 *
238  iwx = iwa + m
239  jwx = jwa
240  ldw = max( 1, mpw )
241  CALL descset( descw, m+nrhs+iroffa, n+icoffa, desca( mb_ ),
242  $ desca( nb_ ), iarow, iacol, ictxt, ldw )
243 *
244  ELSE IF( lsame( trans, 'T' ) ) THEN
245  IF( m.LE.0 .OR. nrhs.LE.0 )
246  $ RETURN
247  tpsd = .true.
248  mpw = mpwa
249  nqw = numroc( n+nrhs+icoffa, desca( nb_ ), mycol, iacol,
250  $ npcol )
251 *
252 * Assign descriptor DESCW for workspace WORK and pointers to
253 * matrices sub( A ) and sub( X ) in workspace
254 *
255  iwx = iwa
256  jwx = jwa + n
257  ldw = max( 1, mpw )
258  CALL descset( descw, m+iroffa, n+nrhs+icoffa, desca( mb_ ),
259  $ desca( nb_ ), iarow, iacol, ictxt, ldw )
260  ELSE
261  CALL pxerbla( ictxt, 'PSQRT14', -1 )
262  RETURN
263  END IF
264 *
265 * Copy and scale sub( A )
266 *
267  iptau = ipwa + mpw*nqw
268  CALL pslacpy( 'All', m, n, a, ia, ja, desca, work( ipwa ), iwa,
269  $ jwa, descw )
270  rwork( 1 ) = zero
271  anrm = pslange( 'M', m, n, work( ipwa ), iwa, jwa, descw, rwork )
272  IF( anrm.NE.zero )
273  $ CALL pslascl( 'G', anrm, one, m, n, work( ipwa ), iwa,
274  $ jwa, descw, info )
275 *
276 * Copy sub( X ) or sub( X )' into the right place and scale it
277 *
278  IF( tpsd ) THEN
279 *
280 * Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work
281 *
282  DO 10 j = 1, nrhs
283  CALL pscopy( m, x, ix, jx+j-1, descx, 1, work( ipwa ), iwx,
284  $ jwx+j-1, descw, 1 )
285  10 CONTINUE
286  xnrm = pslange( 'M', m, nrhs, work( ipwa ), iwx, jwx, descw,
287  $ rwork )
288  IF( xnrm.NE.zero )
289  $ CALL pslascl( 'G', xnrm, one, m, nrhs, work( ipwa ), iwx,
290  $ jwx, descw, info )
291 *
292 * Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1)
293 *
294  mqw = numroc( m+icoffa, desca( nb_ ), mycol, iacol, npcol )
295  ipw = iptau + min( mqw, nqw )
296  lwork = descw( nb_ ) * ( mpw + nqw + descw( nb_ ) )
297  CALL psgeqrf( m, n+nrhs, work( ipwa ), iwa, jwa, descw,
298  $ work( iptau ), work( ipw ), lwork, info )
299 *
300 * Compute largest entry in upper triangle of
301 * work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1)
302 *
303  err = zero
304  IF( n.LT.m ) THEN
305  DO 20 j = jwx, jwa+n+nrhs-1
306  CALL psamax( min(m-n,j-jwx+1), amax, idum, work( ipwa ),
307  $ iwa+n, j, descw, 1 )
308  err = max( err, abs( amax ) )
309  20 CONTINUE
310  END IF
311  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, idum1, idum2,
312  $ -1, -1, 0 )
313 *
314  ELSE
315 *
316 * Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work
317 *
318  DO 30 j = 1, nrhs
319  CALL pscopy( n, x, ix, jx+j-1, descx, 1, work( ipwa ),
320  $ iwx+j-1, jwx, descw, descw( m_ ) )
321  30 CONTINUE
322 *
323  xnrm = pslange( 'M', nrhs, n, work( ipwa ), iwx, jwx, descw,
324  $ rwork )
325  IF( xnrm.NE.zero )
326  $ CALL pslascl( 'G', xnrm, one, nrhs, n, work( ipwa ), iwx,
327  $ jwx, descw, info )
328 *
329 * Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1)
330 *
331  npw = numroc( n+iroffa, desca( mb_ ), myrow, iarow, nprow )
332  ipw = iptau + min( mpw, npw )
333  lwork = descw( mb_ ) * ( mpw + nqw + descw( mb_ ) )
334  CALL psgelqf( m+nrhs, n, work( ipwa ), iwa, jwa, descw,
335  $ work( iptau ), work( ipw ), lwork, info )
336 *
337 * Compute largest entry in lower triangle in
338 * work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1)
339 *
340  err = zero
341  DO 40 j = jwa+m, min( jwa+n-1, jwa+m+nrhs-1 )
342  CALL psamax( jwa+m+nrhs-j, amax, idum, work( ipwa ),
343  $ iwx+j-jwa-m, j, descw, 1 )
344  err = max( err, abs( amax ) )
345  40 CONTINUE
346  CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, idum1, idum2,
347  $ -1, -1, 0 )
348 *
349  END IF
350 *
351  psqrt14 = err / ( real( max( m, n, nrhs ) ) *
352  $ pslamch( ictxt, 'Epsilon' ) )
353 *
354  RETURN
355 *
356 * End of PSQRT14
357 *
358  END
pslamch
real function pslamch(ICTXT, CMACH)
Definition: pcblastst.f:7455
max
#define max(A, B)
Definition: pcgemr.c:180
pslascl
subroutine pslascl(TYPE, CFROM, CTO, M, N, A, IA, JA, DESCA, INFO)
Definition: pslascl.f:3
pslange
real function pslange(NORM, M, N, A, IA, JA, DESCA, WORK)
Definition: pslange.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
lsame
logical function lsame(CA, CB)
Definition: tools.f:1724
psqrt14
real function psqrt14(TRANS, M, N, NRHS, A, IA, JA, DESCA, X, IX, JX, DESCX, WORK)
Definition: psqrt14.f:3
pslacpy
subroutine pslacpy(UPLO, M, N, A, IA, JA, DESCA, B, IB, JB, DESCB)
Definition: pslacpy.f:3
psgelqf
subroutine psgelqf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: psgelqf.f:3
descset
subroutine descset(DESC, M, N, MB, NB, IRSRC, ICSRC, ICTXT, LLD)
Definition: descset.f:3
numroc
integer function numroc(N, NB, IPROC, ISRCPROC, NPROCS)
Definition: numroc.f:2
psgeqrf
subroutine psgeqrf(M, N, A, IA, JA, DESCA, TAU, WORK, LWORK, INFO)
Definition: psgeqrf.f:3
pxerbla
subroutine pxerbla(ICTXT, SRNAME, INFO)
Definition: pxerbla.f:2
min
#define min(A, B)
Definition: pcgemr.c:181