SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
pslasmsub.f
Go to the documentation of this file.
1 SUBROUTINE pslasmsub( A, DESCA, I, L, K, SMLNUM, BUF, LWORK )
2*
3* -- ScaLAPACK routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* May 1, 1997
7*
8* .. Scalar Arguments ..
9 INTEGER I, K, L, LWORK
10 REAL SMLNUM
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 REAL A( * ), BUF( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PSLASMSUB looks for a small subdiagonal element from the bottom
21* of the matrix that it can safely set to zero.
22*
23* Notes
24* =====
25*
26* Each global data object is described by an associated description
27* vector. This vector stores the information required to establish
28* the mapping between an object element and its corresponding process
29* and memory location.
30*
31* Let A be a generic term for any 2D block cyclicly distributed array.
32* Such a global array has an associated description vector DESCA.
33* In the following comments, the character _ should be read as
34* "of the global array".
35*
36* NOTATION STORED IN EXPLANATION
37* --------------- -------------- --------------------------------------
38* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
39* DTYPE_A = 1.
40* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
41* the BLACS process grid A is distribu-
42* ted over. The context itself is glo-
43* bal, but the handle (the integer
44* value) may vary.
45* M_A (global) DESCA( M_ ) The number of rows in the global
46* array A.
47* N_A (global) DESCA( N_ ) The number of columns in the global
48* array A.
49* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
50* the rows of the array.
51* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
52* the columns of the array.
53* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
54* row of the array A is distributed.
55* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
56* first column of the array A is
57* distributed.
58* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
59* array. LLD_A >= MAX(1,LOCr(M_A)).
60*
61* Let K be the number of rows or columns of a distributed matrix,
62* and assume that its process grid has dimension p x q.
63* LOCr( K ) denotes the number of elements of K that a process
64* would receive if K were distributed over the p processes of its
65* process column.
66* Similarly, LOCc( K ) denotes the number of elements of K that a
67* process would receive if K were distributed over the q processes of
68* its process row.
69* The values of LOCr() and LOCc() may be determined via a call to the
70* ScaLAPACK tool function, NUMROC:
71* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
72* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
73* An upper bound for these quantities may be computed by:
74* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
75* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
76*
77* Arguments
78* =========
79*
80* A (global input) REAL array, dimension
81* (DESCA(LLD_),*)
82* On entry, the Hessenberg matrix whose tridiagonal part is
83* being scanned.
84* Unchanged on exit.
85*
86* DESCA (global and local input) INTEGER array of dimension DLEN_.
87* The array descriptor for the distributed matrix A.
88*
89* I (global input) INTEGER
90* The global location of the bottom of the unreduced
91* submatrix of A.
92* Unchanged on exit.
93*
94* L (global input) INTEGER
95* The global location of the top of the unreduced submatrix
96* of A.
97* Unchanged on exit.
98*
99* K (global output) INTEGER
100* On exit, this yields the bottom portion of the unreduced
101* submatrix. This will satisfy: L <= M <= I-1.
102*
103* SMLNUM (global input) REAL
104* On entry, a "small number" for the given matrix.
105* Unchanged on exit.
106*
107* BUF (local output) REAL array of size LWORK.
108*
109* LWORK (global input) INTEGER
110* On exit, LWORK is the size of the work buffer.
111* This must be at least 2*Ceil( Ceil( (I-L)/HBL ) /
112* LCM(NPROW,NPCOL) )
113* Here LCM is least common multiple, and NPROWxNPCOL is the
114* logical grid size.
115*
116* Notes:
117*
118* This routine does a global maximum and must be called by all
119* processes.
120*
121* This code is basically a parallelization of the following snip
122* of LAPACK code from SLAHQR:
123*
124* Look for a single small subdiagonal element.
125*
126* DO 20 K = I, L + 1, -1
127* TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) )
128* IF( TST1.EQ.ZERO )
129* $ TST1 = DLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
130* IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
131* $ GO TO 30
132* 20 CONTINUE
133* 30 CONTINUE
134*
135* Implemented by: G. Henry, November 17, 1996
136*
137* =====================================================================
138*
139* .. Parameters ..
140 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
141 $ LLD_, MB_, M_, NB_, N_, RSRC_
142 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
143 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
144 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
145 REAL ZERO
146 parameter( zero = 0.0e+0 )
147* ..
148* .. Local Scalars ..
149 INTEGER CONTXT, DOWN, HBL, IAFIRST, IBUF1, IBUF2,
150 $ ICOL1, ICOL2, II, III, IRCV1, IRCV2, IROW1,
151 $ IROW2, ISRC, ISTR1, ISTR2, ITMP1, ITMP2,
152 $ JAFIRST, JJ, JJJ, JSRC, LDA, LEFT, MODKM1,
153 $ MYCOL, MYROW, NPCOL, NPROW, NUM, RIGHT, UP
154 REAL H10, H11, H22, TST1, ULP
155* ..
156* .. External Functions ..
157 INTEGER ILCM, NUMROC
158 REAL PSLAMCH
159 EXTERNAL ilcm, numroc, pslamch
160* ..
161* .. External Subroutines ..
162 EXTERNAL blacs_gridinfo, sgerv2d, sgesd2d, igamx2d,
164* ..
165* .. Intrinsic Functions ..
166 INTRINSIC abs, max, mod
167* ..
168* .. Executable Statements ..
169*
170 hbl = desca( mb_ )
171 contxt = desca( ctxt_ )
172 lda = desca( lld_ )
173 iafirst = desca( rsrc_ )
174 jafirst = desca( csrc_ )
175 ulp = pslamch( contxt, 'PRECISION' )
176 CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
177 left = mod( mycol+npcol-1, npcol )
178 right = mod( mycol+1, npcol )
179 up = mod( myrow+nprow-1, nprow )
180 down = mod( myrow+1, nprow )
181 num = nprow*npcol
182*
183* BUFFER1 STARTS AT BUF(ISTR1+1) AND WILL CONTAINS IBUF1 ELEMENTS
184* BUFFER2 STARTS AT BUF(ISTR2+1) AND WILL CONTAINS IBUF2 ELEMENTS
185*
186 istr1 = 0
187 istr2 = ( ( i-l ) / hbl )
188 IF( istr2*hbl.LT.( i-l ) )
189 $ istr2 = istr2 + 1
190 ii = istr2 / ilcm( nprow, npcol )
191 IF( ii*ilcm( nprow, npcol ).LT.istr2 ) THEN
192 istr2 = ii + 1
193 ELSE
194 istr2 = ii
195 END IF
196 IF( lwork.LT.2*istr2 ) THEN
197*
198* Error!
199*
200 RETURN
201 END IF
202 CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow1,
203 $ icol1, ii, jj )
204 modkm1 = mod( i-1+hbl, hbl )
205*
206* COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO
207* 2 BUFFERS TO SEND TO WHOMEVER OWNS H(K,K) AS K MOVES DIAGONALLY
208* UP THE TRIDIAGONAL
209*
210 ibuf1 = 0
211 ibuf2 = 0
212 ircv1 = 0
213 ircv2 = 0
214 DO 10 k = i, l + 1, -1
215 IF( ( modkm1.EQ.0 ) .AND. ( down.EQ.ii ) .AND.
216 $ ( right.EQ.jj ) ) THEN
217*
218* WE MUST PACK H(K-1,K-1) AND SEND IT DIAGONAL DOWN
219*
220 IF( ( down.NE.myrow ) .OR. ( right.NE.mycol ) ) THEN
221 CALL infog2l( k-1, k-1, desca, nprow, npcol, myrow,
222 $ mycol, irow1, icol1, isrc, jsrc )
223 ibuf1 = ibuf1 + 1
224 buf( istr1+ibuf1 ) = a( ( icol1-1 )*lda+irow1 )
225 END IF
226 END IF
227 IF( ( modkm1.EQ.0 ) .AND. ( myrow.EQ.ii ) .AND.
228 $ ( right.EQ.jj ) ) THEN
229*
230* WE MUST PACK H(K ,K-1) AND SEND IT RIGHT
231*
232 IF( npcol.GT.1 ) THEN
233 CALL infog2l( k, k-1, desca, nprow, npcol, myrow, mycol,
234 $ irow1, icol1, isrc, jsrc )
235 ibuf2 = ibuf2 + 1
236 buf( istr2+ibuf2 ) = a( ( icol1-1 )*lda+irow1 )
237 END IF
238 END IF
239*
240* ADD UP THE RECEIVES
241*
242 IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
243 IF( ( modkm1.EQ.0 ) .AND. ( ( nprow.GT.1 ) .OR. ( npcol.GT.
244 $ 1 ) ) ) THEN
245*
246* WE MUST RECEIVE H(K-1,K-1) FROM DIAGONAL UP
247*
248 ircv1 = ircv1 + 1
249 END IF
250 IF( ( modkm1.EQ.0 ) .AND. ( npcol.GT.1 ) ) THEN
251*
252* WE MUST RECEIVE H(K ,K-1) FROM LEFT
253*
254 ircv2 = ircv2 + 1
255 END IF
256 END IF
257*
258* POSSIBLY CHANGE OWNERS (OCCURS ONLY WHEN MOD(K-1,HBL) = 0)
259*
260 IF( modkm1.EQ.0 ) THEN
261 ii = ii - 1
262 jj = jj - 1
263 IF( ii.LT.0 )
264 $ ii = nprow - 1
265 IF( jj.LT.0 )
266 $ jj = npcol - 1
267 END IF
268 modkm1 = modkm1 - 1
269 IF( modkm1.LT.0 )
270 $ modkm1 = hbl - 1
271 10 CONTINUE
272*
273* SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND
274*
275 IF( ibuf1.GT.0 ) THEN
276 CALL sgesd2d( contxt, ibuf1, 1, buf( istr1+1 ), ibuf1, down,
277 $ right )
278 END IF
279 IF( ibuf2.GT.0 ) THEN
280 CALL sgesd2d( contxt, ibuf2, 1, buf( istr2+1 ), ibuf2, myrow,
281 $ right )
282 END IF
283*
284* RECEIVE APPROPRIATE DATA IF THERE IS ANY
285*
286 IF( ircv1.GT.0 ) THEN
287 CALL sgerv2d( contxt, ircv1, 1, buf( istr1+1 ), ircv1, up,
288 $ left )
289 END IF
290 IF( ircv2.GT.0 ) THEN
291 CALL sgerv2d( contxt, ircv2, 1, buf( istr2+1 ), ircv2, myrow,
292 $ left )
293 END IF
294*
295* START MAIN LOOP
296*
297 ibuf1 = 0
298 ibuf2 = 0
299 CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow1,
300 $ icol1, ii, jj )
301 modkm1 = mod( i-1+hbl, hbl )
302*
303* LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT.
304*
305* Start loop for subdiagonal search
306*
307 DO 40 k = i, l + 1, -1
308 IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
309 IF( modkm1.EQ.0 ) THEN
310*
311* Grab information from WORK array
312*
313 IF( num.GT.1 ) THEN
314 ibuf1 = ibuf1 + 1
315 h11 = buf( istr1+ibuf1 )
316 ELSE
317 h11 = a( ( icol1-2 )*lda+irow1-1 )
318 END IF
319 IF( npcol.GT.1 ) THEN
320 ibuf2 = ibuf2 + 1
321 h10 = buf( istr2+ibuf2 )
322 ELSE
323 h10 = a( ( icol1-2 )*lda+irow1 )
324 END IF
325 ELSE
326*
327* Information is local
328*
329 h11 = a( ( icol1-2 )*lda+irow1-1 )
330 h10 = a( ( icol1-2 )*lda+irow1 )
331 END IF
332 h22 = a( ( icol1-1 )*lda+irow1 )
333 tst1 = abs( h11 ) + abs( h22 )
334 IF( tst1.EQ.zero ) THEN
335*
336* FIND SOME NORM OF THE LOCAL H(L:I,L:I)
337*
338 CALL infog1l( l, hbl, nprow, myrow, iafirst, itmp1, iii )
339 irow2 = numroc( i, hbl, myrow, iafirst, nprow )
340 CALL infog1l( l, hbl, npcol, mycol, jafirst, itmp2, iii )
341 icol2 = numroc( i, hbl, mycol, jafirst, npcol )
342 DO 30 iii = itmp1, irow2
343 DO 20 jjj = itmp2, icol2
344 tst1 = tst1 + abs( a( ( jjj-1 )*lda+iii ) )
345 20 CONTINUE
346 30 CONTINUE
347 END IF
348 IF( abs( h10 ).LE.max( ulp*tst1, smlnum ) )
349 $ GO TO 50
350 irow1 = irow1 - 1
351 icol1 = icol1 - 1
352 END IF
353 modkm1 = modkm1 - 1
354 IF( modkm1.LT.0 )
355 $ modkm1 = hbl - 1
356 IF( ( modkm1.EQ.hbl-1 ) .AND. ( k.GT.2 ) ) THEN
357 ii = mod( ii+nprow-1, nprow )
358 jj = mod( jj+npcol-1, npcol )
359 CALL infog2l( k-1, k-1, desca, nprow, npcol, myrow, mycol,
360 $ irow1, icol1, itmp1, itmp2 )
361 END IF
362 40 CONTINUE
363 50 CONTINUE
364 CALL igamx2d( contxt, 'ALL', ' ', 1, 1, k, 1, itmp1, itmp2, -1,
365 $ -1, -1 )
366 RETURN
367*
368* End of PSLASMSUB
369*
370 END
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
subroutine pslasmsub(a, desca, i, l, k, smlnum, buf, lwork)
Definition pslasmsub.f:2