SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pclasmsub.f
Go to the documentation of this file.
1 SUBROUTINE pclasmsub( A, DESCA, I, L, K, SMLNUM, BUF, LWORK )
2*
3* -- ScaLAPACK routine (version 1.7) --
4* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5* and University of California, Berkeley.
6* July 31, 2001
7*
8* .. Scalar Arguments ..
9 INTEGER I, K, L, LWORK
10 REAL SMLNUM
11* ..
12* .. Array Arguments ..
13 INTEGER DESCA( * )
14 COMPLEX A( * ), BUF( * )
15* ..
16*
17* Purpose
18* =======
19*
20* PCLASMSUB looks for a small subdiagonal element from the bottom
21* of the matrix that it can safely set to zero.
22*
23* Notes
24* =====
25*
26* Each global data object is described by an associated description
27* vector. This vector stores the information required to establish
28* the mapping between an object element and its corresponding process
29* and memory location.
30*
31* Let A be a generic term for any 2D block cyclicly distributed array.
32* Such a global array has an associated description vector DESCA.
33* In the following comments, the character _ should be read as
34* "of the global array".
35*
36* NOTATION STORED IN EXPLANATION
37* --------------- -------------- --------------------------------------
38* DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
39* DTYPE_A = 1.
40* CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
41* the BLACS process grid A is distribu-
42* ted over. The context itself is glo-
43* bal, but the handle (the integer
44* value) may vary.
45* M_A (global) DESCA( M_ ) The number of rows in the global
46* array A.
47* N_A (global) DESCA( N_ ) The number of columns in the global
48* array A.
49* MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
50* the rows of the array.
51* NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
52* the columns of the array.
53* RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
54* row of the array A is distributed.
55* CSRC_A (global) DESCA( CSRC_ ) The process column over which the
56* first column of the array A is
57* distributed.
58* LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
59* array. LLD_A >= MAX(1,LOCr(M_A)).
60*
61* Let K be the number of rows or columns of a distributed matrix,
62* and assume that its process grid has dimension p x q.
63* LOCr( K ) denotes the number of elements of K that a process
64* would receive if K were distributed over the p processes of its
65* process column.
66* Similarly, LOCc( K ) denotes the number of elements of K that a
67* process would receive if K were distributed over the q processes of
68* its process row.
69* The values of LOCr() and LOCc() may be determined via a call to the
70* ScaLAPACK tool function, NUMROC:
71* LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
72* LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
73* An upper bound for these quantities may be computed by:
74* LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
75* LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
76*
77* Arguments
78* =========
79*
80* A (global input) COMPLEX array, dimension (DESCA(LLD_),*)
81* On entry, the Hessenberg matrix whose tridiagonal part is
82* being scanned.
83* Unchanged on exit.
84*
85* DESCA (global and local input) INTEGER array of dimension DLEN_.
86* The array descriptor for the distributed matrix A.
87*
88* I (global input) INTEGER
89* The global location of the bottom of the unreduced
90* submatrix of A.
91* Unchanged on exit.
92*
93* L (global input) INTEGER
94* The global location of the top of the unreduced submatrix
95* of A.
96* Unchanged on exit.
97*
98* K (global output) INTEGER
99* On exit, this yields the bottom portion of the unreduced
100* submatrix. This will satisfy: L <= M <= I-1.
101*
102* SMLNUM (global input) REAL
103* On entry, a "small number" for the given matrix.
104* Unchanged on exit.
105*
106* BUF (local output) COMPLEX array of size LWORK.
107*
108* LWORK (global input) INTEGER
109* On exit, LWORK is the size of the work buffer.
110* This must be at least 2*Ceil( Ceil( (I-L)/HBL ) /
111* LCM(NPROW,NPCOL) )
112* Here LCM is least common multiple, and NPROWxNPCOL is the
113* logical grid size.
114*
115* Notes:
116*
117* This routine does a global maximum and must be called by all
118* processes.
119*
120* This code is basically a parallelization of the following snip
121* of LAPACK code from CLAHQR:
122*
123* Look for a single small subdiagonal element.
124*
125* DO 20 K = I, L + 1, -1
126* TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) )
127* IF( TST1.EQ.ZERO )
128* $ TST1 = CLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
129* IF( CABS1( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
130* $ GO TO 30
131* 20 CONTINUE
132* 30 CONTINUE
133*
134* Further Details
135* ===============
136*
137* Implemented by: M. Fahey, May 28, 1999
138*
139* =====================================================================
140*
141* .. Parameters ..
142 INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
143 $ LLD_, MB_, M_, NB_, N_, RSRC_
144 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
145 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
146 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
147 REAL ZERO
148 parameter( zero = 0.0e+0 )
149* ..
150* .. Local Scalars ..
151 INTEGER CONTXT, DOWN, HBL, IBUF1, IBUF2, ICOL1, ICOL2,
152 $ II, III, IRCV1, IRCV2, IROW1, IROW2, ISRC,
153 $ ISTR1, ISTR2, ITMP1, ITMP2, JJ, JJJ, JSRC, LDA,
154 $ LEFT, MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM,
155 $ RIGHT, UP
156 REAL TST1, ULP
157 COMPLEX CDUM, H10, H11, H22
158* ..
159* .. External Functions ..
160 INTEGER ILCM, NUMROC
161 REAL PSLAMCH
162 EXTERNAL ilcm, numroc, pslamch
163* ..
164* .. External Subroutines ..
165 EXTERNAL blacs_gridinfo, igamx2d, infog1l, infog2l,
166 $ cgerv2d, cgesd2d
167* ..
168* .. Intrinsic Functions ..
169 INTRINSIC abs, real, aimag, max, mod
170* ..
171* .. Statement Functions ..
172 REAL CABS1
173* ..
174* .. Statement Function definitions ..
175 cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
176* ..
177* .. Executable Statements ..
178*
179 hbl = desca( mb_ )
180 contxt = desca( ctxt_ )
181 lda = desca( lld_ )
182 ulp = pslamch( contxt, 'PRECISION' )
183 CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
184 left = mod( mycol+npcol-1, npcol )
185 right = mod( mycol+1, npcol )
186 up = mod( myrow+nprow-1, nprow )
187 down = mod( myrow+1, nprow )
188 num = nprow*npcol
189*
190* BUFFER1 STARTS AT BUF(ISTR1+1) AND WILL CONTAINS IBUF1 ELEMENTS
191* BUFFER2 STARTS AT BUF(ISTR2+1) AND WILL CONTAINS IBUF2 ELEMENTS
192*
193 istr1 = 0
194 istr2 = ( ( i-l ) / hbl )
195 IF( istr2*hbl.LT.( i-l ) )
196 $ istr2 = istr2 + 1
197 ii = istr2 / ilcm( nprow, npcol )
198 IF( ii*ilcm( nprow, npcol ).LT.istr2 ) THEN
199 istr2 = ii + 1
200 ELSE
201 istr2 = ii
202 END IF
203 IF( lwork.LT.2*istr2 ) THEN
204*
205* Error!
206*
207 RETURN
208 END IF
209 CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow1,
210 $ icol1, ii, jj )
211 modkm1 = mod( i-1+hbl, hbl )
212*
213* COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO
214* 2 BUFFERS TO SEND TO WHOMEVER OWNS H(K,K) AS K MOVES DIAGONALLY
215* UP THE TRIDIAGONAL
216*
217 ibuf1 = 0
218 ibuf2 = 0
219 ircv1 = 0
220 ircv2 = 0
221 DO 10 k = i, l + 1, -1
222 IF( ( modkm1.EQ.0 ) .AND. ( down.EQ.ii ) .AND.
223 $ ( right.EQ.jj ) ) THEN
224*
225* WE MUST PACK H(K-1,K-1) AND SEND IT DIAGONAL DOWN
226*
227 IF( ( down.NE.myrow ) .OR. ( right.NE.mycol ) ) THEN
228 CALL infog2l( k-1, k-1, desca, nprow, npcol, myrow,
229 $ mycol, irow1, icol1, isrc, jsrc )
230 ibuf1 = ibuf1 + 1
231 buf( istr1+ibuf1 ) = a( ( icol1-1 )*lda+irow1 )
232 END IF
233 END IF
234 IF( ( modkm1.EQ.0 ) .AND. ( myrow.EQ.ii ) .AND.
235 $ ( right.EQ.jj ) ) THEN
236*
237* WE MUST PACK H(K ,K-1) AND SEND IT RIGHT
238*
239 IF( npcol.GT.1 ) THEN
240 CALL infog2l( k, k-1, desca, nprow, npcol, myrow, mycol,
241 $ irow1, icol1, isrc, jsrc )
242 ibuf2 = ibuf2 + 1
243 buf( istr2+ibuf2 ) = a( ( icol1-1 )*lda+irow1 )
244 END IF
245 END IF
246*
247* ADD UP THE RECEIVES
248*
249 IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
250 IF( ( modkm1.EQ.0 ) .AND. ( ( nprow.GT.1 ) .OR. ( npcol.GT.
251 $ 1 ) ) ) THEN
252*
253* WE MUST RECEIVE H(K-1,K-1) FROM DIAGONAL UP
254*
255 ircv1 = ircv1 + 1
256 END IF
257 IF( ( modkm1.EQ.0 ) .AND. ( npcol.GT.1 ) ) THEN
258*
259* WE MUST RECEIVE H(K ,K-1) FROM LEFT
260*
261 ircv2 = ircv2 + 1
262 END IF
263 END IF
264*
265* POSSIBLY CHANGE OWNERS (OCCURS ONLY WHEN MOD(K-1,HBL) = 0)
266*
267 IF( modkm1.EQ.0 ) THEN
268 ii = ii - 1
269 jj = jj - 1
270 IF( ii.LT.0 )
271 $ ii = nprow - 1
272 IF( jj.LT.0 )
273 $ jj = npcol - 1
274 END IF
275 modkm1 = modkm1 - 1
276 IF( modkm1.LT.0 )
277 $ modkm1 = hbl - 1
278 10 CONTINUE
279*
280* SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND
281*
282 IF( ibuf1.GT.0 ) THEN
283 CALL cgesd2d( contxt, ibuf1, 1, buf( istr1+1 ), ibuf1, down,
284 $ right )
285 END IF
286 IF( ibuf2.GT.0 ) THEN
287 CALL cgesd2d( contxt, ibuf2, 1, buf( istr2+1 ), ibuf2, myrow,
288 $ right )
289 END IF
290*
291* RECEIVE APPROPRIATE DATA IF THERE IS ANY
292*
293 IF( ircv1.GT.0 ) THEN
294 CALL cgerv2d( contxt, ircv1, 1, buf( istr1+1 ), ircv1, up,
295 $ left )
296 END IF
297 IF( ircv2.GT.0 ) THEN
298 CALL cgerv2d( contxt, ircv2, 1, buf( istr2+1 ), ircv2, myrow,
299 $ left )
300 END IF
301*
302* START MAIN LOOP
303*
304 ibuf1 = 0
305 ibuf2 = 0
306 CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow1,
307 $ icol1, ii, jj )
308 modkm1 = mod( i-1+hbl, hbl )
309*
310* LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT.
311*
312* Start loop for subdiagonal search
313*
314 DO 40 k = i, l + 1, -1
315 IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
316 IF( modkm1.EQ.0 ) THEN
317*
318* Grab information from WORK array
319*
320 IF( num.GT.1 ) THEN
321 ibuf1 = ibuf1 + 1
322 h11 = buf( istr1+ibuf1 )
323 ELSE
324 h11 = a( ( icol1-2 )*lda+irow1-1 )
325 END IF
326 IF( npcol.GT.1 ) THEN
327 ibuf2 = ibuf2 + 1
328 h10 = buf( istr2+ibuf2 )
329 ELSE
330 h10 = a( ( icol1-2 )*lda+irow1 )
331 END IF
332 ELSE
333*
334* Information is local
335*
336 h11 = a( ( icol1-2 )*lda+irow1-1 )
337 h10 = a( ( icol1-2 )*lda+irow1 )
338 END IF
339 h22 = a( ( icol1-1 )*lda+irow1 )
340 tst1 = cabs1( h11 ) + cabs1( h22 )
341 IF( tst1.EQ.zero ) THEN
342*
343* FIND SOME NORM OF THE LOCAL H(L:I,L:I)
344*
345 CALL infog1l( l, hbl, nprow, myrow, 0, itmp1, iii )
346 irow2 = numroc( i, hbl, myrow, 0, nprow )
347 CALL infog1l( l, hbl, npcol, mycol, 0, itmp2, iii )
348 icol2 = numroc( i, hbl, mycol, 0, npcol )
349 DO 30 iii = itmp1, irow2
350 DO 20 jjj = itmp2, icol2
351 tst1 = tst1 + cabs1( a( ( jjj-1 )*lda+iii ) )
352 20 CONTINUE
353 30 CONTINUE
354 END IF
355 IF( cabs1( h10 ).LE.max( ulp*tst1, smlnum ) )
356 $ GO TO 50
357 irow1 = irow1 - 1
358 icol1 = icol1 - 1
359 END IF
360 modkm1 = modkm1 - 1
361 IF( modkm1.LT.0 )
362 $ modkm1 = hbl - 1
363 IF( ( modkm1.EQ.hbl-1 ) .AND. ( k.GT.2 ) ) THEN
364 ii = mod( ii+nprow-1, nprow )
365 jj = mod( jj+npcol-1, npcol )
366 CALL infog2l( k-1, k-1, desca, nprow, npcol, myrow, mycol,
367 $ irow1, icol1, itmp1, itmp2 )
368 END IF
369 40 CONTINUE
370 50 CONTINUE
371 CALL igamx2d( contxt, 'ALL', ' ', 1, 1, k, 1, itmp1, itmp2, -1,
372 $ -1, -1 )
373 RETURN
374*
375* End of PCLASMSUB
376*
377 END
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
subroutine infog2l(grindx, gcindx, desc, nprow, npcol, myrow, mycol, lrindx, lcindx, rsrc, csrc)
Definition infog2l.f:3
#define max(A, B)
Definition pcgemr.c:180
subroutine pclasmsub(a, desca, i, l, k, smlnum, buf, lwork)
Definition pclasmsub.f:2