SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
psqrt14.f
Go to the documentation of this file.
1 REAL function psqrt14( trans, m, n, nrhs, a, ia, ja,
2 $ desca, x, ix, jx, descx, work )
3*
4* -- ScaLAPACK routine (version 1.7) --
5* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
6* and University of California, Berkeley.
7* May 1, 1997
8*
9* .. Scalar Arguments ..
10 CHARACTER trans
11 INTEGER ia, ix, ja, jx, m, n, nrhs
12* ..
13* .. Array Arguments ..
14 INTEGER desca( * ), descx( * )
15 REAL a( * ), work( * ), x( * )
16* ..
17*
18* Purpose
19* =======
20*
21* PSQRT14 checks whether sub( X ) is in the row space of sub( A ) or
22* sub( A )', where sub( A ) denotes A( IA:IA+M-1, JA:JA+N-1 ) and
23* sub( X ) denotes X( IX:IX+N-1, JX:JX+NRHS-1 ) if TRANS = 'N', and
24* X( IX:IX+N-1, JX:JX+NRHS-1 ) otherwise. It does so by scaling both
25* sub( X ) and sub( A ) such that their norms are in the range
26* [sqrt(eps), 1/sqrt(eps)], then computing an LQ factorization of
27* [sub( A )',sub( X )]' (if TRANS = 'N') or a QR factorization of
28* [sub( A ),sub( X )] otherwise, and returning the norm of the trailing
29* triangle, scaled by MAX(M,N,NRHS)*eps.
30*
31* Notes
32* =====
33*
34* Each global data object is described by an associated description
35* vector. This vector stores the information required to establish
36* the mapping between an object element and its corresponding process
37* and memory location.
38*
39* Let A be a generic term for any 2D block cyclicly distributed array.
40* Such a global array has an associated description vector DESCA.
41* In the following comments, the character _ should be read as
42* "of the global array".
43*
44* NOTATION STORED IN EXPLANATION
45* --------------- -------------- --------------------------------------
46* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
47* DTYPE_A = 1.
48* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
49* the BLACS process grid A is distribu-
50* ted over. The context itself is glo-
51* bal, but the handle (the integer
52* value) may vary.
53* M_A (global) DESCA( M_ ) The number of rows in the global
54* array A.
55* N_A (global) DESCA( N_ ) The number of columns in the global
56* array A.
57* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
58* the rows of the array.
59* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
60* the columns of the array.
61* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
62* row of the array A is distributed.
63* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
64* first column of the array A is
65* distributed.
66* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
67* array. LLD_A >= MAX(1,LOCr(M_A)).
68*
69* Let K be the number of rows or columns of a distributed matrix,
70* and assume that its process grid has dimension p x q.
71* LOCr( K ) denotes the number of elements of K that a process
72* would receive if K were distributed over the p processes of its
73* process column.
74* Similarly, LOCc( K ) denotes the number of elements of K that a
75* process would receive if K were distributed over the q processes of
76* its process row.
77* The values of LOCr() and LOCc() may be determined via a call to the
78* ScaLAPACK tool function, NUMROC:
79* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
80* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
81* An upper bound for these quantities may be computed by:
82* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
83* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
84*
85* Arguments
86* =========
87*
88* TRANS (global input) CHARACTER*1
89* = 'N': No transpose, check for sub( X ) in the row space of
90* sub( A ),
91* = 'T': Transpose, check for sub( X ) in row space of
92* sub( A )'.
93*
94* M (global input) INTEGER
95* The number of rows to be operated on, i.e. the number of rows
96* of the distributed submatrix sub( A ). M >= 0.
97*
98* N (global input) INTEGER
99* The number of columns to be operated on, i.e. the number of
100* columns of the distributed submatrix sub( A ). N >= 0.
101*
102* NRHS (global input) INTEGER
103* The number of right hand sides, i.e., the number of columns
104* of the distributed submatrix sub( X ). NRHS >= 0.
105*
106* A (local input) REAL pointer into the local memory
107* to an array of dimension (LLD_A, LOCc(JA+N-1)). This array
108* contains the local pieces of the M-by-N distributed matrix
109* sub( A ).
110*
111* IA (global input) INTEGER
112* The row index in the global array A indicating the first
113* row of sub( A ).
114*
115* JA (global input) INTEGER
116* The column index in the global array A indicating the
117* first column of sub( A ).
118*
119* DESCA (global and local input) INTEGER array of dimension DLEN_.
120* The array descriptor for the distributed matrix A.
121*
122* X (local input) REAL pointer into the local
123* memory to an array of dimension (LLD_X,LOCc(JX+NRHS-1)).
124* On entry, this array contains the local pieces of the
125* N-by-NRHS distributed submatrix sub( X ) if TRANS = 'N',
126* and the M-by-NRHS distributed submatrix sub( X ) otherwise.
127*
128* IX (global input) INTEGER
129* The row index in the global array X indicating the first
130* row of sub( X ).
131*
132* JX (global input) INTEGER
133* The column index in the global array X indicating the
134* first column of sub( X ).
135*
136* DESCX (global and local input) INTEGER array of dimension DLEN_.
137* The array descriptor for the distributed matrix X.
138*
139* WORK (local workspace) REAL array dimension (LWORK)
140* If TRANS='N', LWORK >= MNRHSP * NQ + LTAU + LWF and
141* LWORK >= MP * NNRHSQ + LTAU + LWF otherwise, where
142*
143* IF TRANS='N', (LQ fact)
144* MNRHSP = NUMROC( M+NRHS+IROFFA, MB_A, MYROW, IAROW,
145* NPROW )
146* LTAU = NUMROC( IA+MIN( M+NRHS, N )-1, MB_A, MYROW,
147* RSRC_A, NPROW )
148* LWF = MB_A * ( MB_A + MNRHSP + NQ0 )
149* ELSE (QR fact)
150* NNRHSQ = NUMROC( N+NRHS+ICOFFA, NB_A, MYCOL, IACOL,
151* NPCOL )
152* LTAU = NUMROC( JA+MIN( M, N+NRHS )-1, NB_A, MYCOL,
153* CSRC_A, NPCOL )
154* LWF = NB_A * ( NB_A + MP0 + NNRHSQ )
155* END IF
156*
157* and,
158*
159* IROFFA = MOD( IA-1, MB_A ), ICOFFA = MOD( JA-1, NB_A ),
160* IAROW = INDXG2P( IA, MB_A, MYROW, RSRC_A, NPROW ),
161* IACOL = INDXG2P( JA, NB_A, MYCOL, CSRC_A, NPCOL ),
162* MP0 = NUMROC( M+IROFFA, MB_A, MYROW, IAROW, NPROW ),
163* NQ0 = NUMROC( N+ICOFFA, NB_A, MYCOL, IACOL, NPCOL ).
164*
165* INDXG2P and NUMROC are ScaLAPACK tool functions;
166* MYROW, MYCOL, NPROW and NPCOL can be determined by calling
167* the subroutine BLACS_GRIDINFO.
168*
169*
170* =====================================================================
171*
172* .. Parameters ..
173 INTEGER block_cyclic_2d, csrc_, ctxt_, dlen_, dtype_,
174 $ lld_, mb_, m_, nb_, n_, rsrc_
175 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
176 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
177 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
178 REAL one, zero
179 parameter( zero = 0.0e+0, one = 1.0e+0 )
180* ..
181* .. Local Scalars ..
182 LOGICAL tpsd
183 INTEGER iacol, iarow, icoffa, ictxt, idum, iia, info,
184 $ iptau, ipw, ipwa, iroffa, iwa, iwx, j, jja,
185 $ jwa, jwx, ldw, lwork, mpwa, mpw, mqw, mycol,
186 $ myrow, npcol, nprow, npw, nqwa, nqw
187 REAL amax, anrm, err, xnrm
188* ..
189* .. Local Arrays ..
190 INTEGER descw( dlen_ ), idum1( 1 ), idum2( 1 )
191 REAL rwork( 1 )
192* ..
193* .. External Functions ..
194 LOGICAL lsame
195 INTEGER numroc
196 REAL pslange, pslamch
197 EXTERNAL lsame, numroc, pslange, pslamch
198* ..
199* .. External Subroutines ..
200 EXTERNAL blacs_gridinfo, descset, infog2l, psamax,
201 $ pscopy, psgelqf, psgeqrf, pslacpy,
202 $ pslascl, pxerbla, sgamx2d
203* ..
204* .. Intrinsic Functions ..
205 INTRINSIC abs, max, min, mod, real
206* ..
207* .. Executable Statements ..
208*
209* Get grid parameters
210*
211 ictxt = desca( ctxt_ )
212 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
213*
214 psqrt14 = zero
215*
216 ipwa = 1
217 iroffa = mod( ia-1, desca( mb_ ) )
218 icoffa = mod( ja-1, desca( nb_ ) )
219 iwa = iroffa + 1
220 jwa = icoffa + 1
221 CALL infog2l( ia, ja, desca, nprow, npcol, myrow, mycol, iia,
222 $ jja, iarow, iacol )
223 mpwa = numroc( m+iroffa, desca( mb_ ), myrow, iarow, nprow )
224 nqwa = numroc( n+icoffa, desca( nb_ ), mycol, iacol, npcol )
225*
226 info = 0
227 IF( lsame( trans, 'N' ) ) THEN
228 IF( n.LE.0 .OR. nrhs.LE.0 )
229 $ RETURN
230 tpsd = .false.
231 mpw = numroc( m+nrhs+iroffa, desca( mb_ ), myrow, iarow,
232 $ nprow )
233 nqw = nqwa
234*
235* Assign descriptor DESCW for workspace WORK and pointers to
236* matrices sub( A ) and sub( X ) in workspace
237*
238 iwx = iwa + m
239 jwx = jwa
240 ldw = max( 1, mpw )
241 CALL descset( descw, m+nrhs+iroffa, n+icoffa, desca( mb_ ),
242 $ desca( nb_ ), iarow, iacol, ictxt, ldw )
243*
244 ELSE IF( lsame( trans, 'T' ) ) THEN
245 IF( m.LE.0 .OR. nrhs.LE.0 )
246 $ RETURN
247 tpsd = .true.
248 mpw = mpwa
249 nqw = numroc( n+nrhs+icoffa, desca( nb_ ), mycol, iacol,
250 $ npcol )
251*
252* Assign descriptor DESCW for workspace WORK and pointers to
253* matrices sub( A ) and sub( X ) in workspace
254*
255 iwx = iwa
256 jwx = jwa + n
257 ldw = max( 1, mpw )
258 CALL descset( descw, m+iroffa, n+nrhs+icoffa, desca( mb_ ),
259 $ desca( nb_ ), iarow, iacol, ictxt, ldw )
260 ELSE
261 CALL pxerbla( ictxt, 'PSQRT14', -1 )
262 RETURN
263 END IF
264*
265* Copy and scale sub( A )
266*
267 iptau = ipwa + mpw*nqw
268 CALL pslacpy( 'All', m, n, a, ia, ja, desca, work( ipwa ), iwa,
269 $ jwa, descw )
270 rwork( 1 ) = zero
271 anrm = pslange( 'M', m, n, work( ipwa ), iwa, jwa, descw, rwork )
272 IF( anrm.NE.zero )
273 $ CALL pslascl( 'G', anrm, one, m, n, work( ipwa ), iwa,
274 $ jwa, descw, info )
275*
276* Copy sub( X ) or sub( X )' into the right place and scale it
277*
278 IF( tpsd ) THEN
279*
280* Copy sub( X ) into columns jwa+n:jwa+n+nrhs-1 of work
281*
282 DO 10 j = 1, nrhs
283 CALL pscopy( m, x, ix, jx+j-1, descx, 1, work( ipwa ), iwx,
284 $ jwx+j-1, descw, 1 )
285 10 CONTINUE
286 xnrm = pslange( 'M', m, nrhs, work( ipwa ), iwx, jwx, descw,
287 $ rwork )
288 IF( xnrm.NE.zero )
289 $ CALL pslascl( 'G', xnrm, one, m, nrhs, work( ipwa ), iwx,
290 $ jwx, descw, info )
291*
292* Compute QR factorization of work(iwa:iwa+m-1,jwa:jwa+n+nrhs-1)
293*
294 mqw = numroc( m+icoffa, desca( nb_ ), mycol, iacol, npcol )
295 ipw = iptau + min( mqw, nqw )
296 lwork = descw( nb_ ) * ( mpw + nqw + descw( nb_ ) )
297 CALL psgeqrf( m, n+nrhs, work( ipwa ), iwa, jwa, descw,
298 $ work( iptau ), work( ipw ), lwork, info )
299*
300* Compute largest entry in upper triangle of
301* work(iwa+n:iwa+m-1,jwa+n:jwa+n+nrhs-1)
302*
303 err = zero
304 IF( n.LT.m ) THEN
305 DO 20 j = jwx, jwa+n+nrhs-1
306 CALL psamax( min(m-n,j-jwx+1), amax, idum, work( ipwa ),
307 $ iwa+n, j, descw, 1 )
308 err = max( err, abs( amax ) )
309 20 CONTINUE
310 END IF
311 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, idum1, idum2,
312 $ -1, -1, 0 )
313*
314 ELSE
315*
316* Copy sub( X )' into rows iwa+m:iwa+m+nrhs-1 of work
317*
318 DO 30 j = 1, nrhs
319 CALL pscopy( n, x, ix, jx+j-1, descx, 1, work( ipwa ),
320 $ iwx+j-1, jwx, descw, descw( m_ ) )
321 30 CONTINUE
322*
323 xnrm = pslange( 'M', nrhs, n, work( ipwa ), iwx, jwx, descw,
324 $ rwork )
325 IF( xnrm.NE.zero )
326 $ CALL pslascl( 'G', xnrm, one, nrhs, n, work( ipwa ), iwx,
327 $ jwx, descw, info )
328*
329* Compute LQ factorization of work(iwa:iwa+m+nrhs-1,jwa:jwa+n-1)
330*
331 npw = numroc( n+iroffa, desca( mb_ ), myrow, iarow, nprow )
332 ipw = iptau + min( mpw, npw )
333 lwork = descw( mb_ ) * ( mpw + nqw + descw( mb_ ) )
334 CALL psgelqf( m+nrhs, n, work( ipwa ), iwa, jwa, descw,
335 $ work( iptau ), work( ipw ), lwork, info )
336*
337* Compute largest entry in lower triangle in
338* work(iwa+m:iwa+m+nrhs-1,jwa+m:jwa+n-1)
339*
340 err = zero
341 DO 40 j = jwa+m, min( jwa+n-1, jwa+m+nrhs-1 )
342 CALL psamax( jwa+m+nrhs-j, amax, idum, work( ipwa ),
343 $ iwx+j-jwa-m, j, descw, 1 )
344 err = max( err, abs( amax ) )
345 40 CONTINUE
346 CALL sgamx2d( ictxt, 'All', ' ', 1, 1, err, 1, idum1, idum2,
347 $ -1, -1, 0 )
348*
349 END IF
350*
351 psqrt14 = err / ( real( max( m, n, nrhs ) ) *
352 $ pslamch( ictxt, 'Epsilon' ) )
353*
354 RETURN
355*
356* End of PSQRT14
357*
358 END
subroutine descset(desc, m, n, mb, nb, irsrc, icsrc, ictxt, lld)
Definition descset.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
integer function numroc(n, nb, iproc, isrcproc, nprocs)
Definition numroc.f:2
real function pslamch(ictxt, cmach)
Definition pcblastst.f:7455
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine psgelqf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition psgelqf.f:3
subroutine psgeqrf(m, n, a, ia, ja, desca, tau, work, lwork, info)
Definition psgeqrf.f:3
subroutine pslacpy(uplo, m, n, a, ia, ja, desca, b, ib, jb, descb)
Definition pslacpy.f:3
real function pslange(norm, m, n, a, ia, ja, desca, work)
Definition pslange.f:3
subroutine pslascl(type, cfrom, cto, m, n, a, ia, ja, desca, info)
Definition pslascl.f:3
real function psqrt14(trans, m, n, nrhs, a, ia, ja, desca, x, ix, jx, descx, work)
Definition psqrt14.f:3
subroutine pxerbla(ictxt, srname, info)
Definition pxerbla.f:2
logical function lsame(ca, cb)
Definition tools.f:1724