ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pzlasmsub.f
Go to the documentation of this file.
1  SUBROUTINE pzlasmsub( A, DESCA, I, L, K, SMLNUM, BUF, LWORK )
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * July 31, 2001
7 *
8 * .. Scalar Arguments ..
9  INTEGER I, K, L, LWORK
10  DOUBLE PRECISION SMLNUM
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  COMPLEX*16 A( * ), BUF( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PZLASMSUB looks for a small subdiagonal element from the bottom
21 * of the matrix that it can safely set to zero.
22 *
23 * Notes
24 * =====
25 *
26 * Each global data object is described by an associated description
27 * vector. This vector stores the information required to establish
28 * the mapping between an object element and its corresponding process
29 * and memory location.
30 *
31 * Let A be a generic term for any 2D block cyclicly distributed array.
32 * Such a global array has an associated description vector DESCA.
33 * In the following comments, the character _ should be read as
34 * "of the global array".
35 *
36 * NOTATION STORED IN EXPLANATION
37 * --------------- -------------- --------------------------------------
38 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
39 * DTYPE_A = 1.
40 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
41 * the BLACS process grid A is distribu-
42 * ted over. The context itself is glo-
43 * bal, but the handle (the integer
44 * value) may vary.
45 * M_A (global) DESCA( M_ ) The number of rows in the global
46 * array A.
47 * N_A (global) DESCA( N_ ) The number of columns in the global
48 * array A.
49 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
50 * the rows of the array.
51 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
52 * the columns of the array.
53 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
54 * row of the array A is distributed.
55 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
56 * first column of the array A is
57 * distributed.
58 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
59 * array. LLD_A >= MAX(1,LOCr(M_A)).
60 *
61 * Let K be the number of rows or columns of a distributed matrix,
62 * and assume that its process grid has dimension p x q.
63 * LOCr( K ) denotes the number of elements of K that a process
64 * would receive if K were distributed over the p processes of its
65 * process column.
66 * Similarly, LOCc( K ) denotes the number of elements of K that a
67 * process would receive if K were distributed over the q processes of
68 * its process row.
69 * The values of LOCr() and LOCc() may be determined via a call to the
70 * ScaLAPACK tool function, NUMROC:
71 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
72 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
73 * An upper bound for these quantities may be computed by:
74 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
75 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
76 *
77 * Arguments
78 * =========
79 *
80 * A (global input) COMPLEX*16 array, dimension (DESCA(LLD_),*)
81 * On entry, the Hessenberg matrix whose tridiagonal part is
82 * being scanned.
83 * Unchanged on exit.
84 *
85 * DESCA (global and local input) INTEGER array of dimension DLEN_.
86 * The array descriptor for the distributed matrix A.
87 *
88 * I (global input) INTEGER
89 * The global location of the bottom of the unreduced
90 * submatrix of A.
91 * Unchanged on exit.
92 *
93 * L (global input) INTEGER
94 * The global location of the top of the unreduced submatrix
95 * of A.
96 * Unchanged on exit.
97 *
98 * K (global output) INTEGER
99 * On exit, this yields the bottom portion of the unreduced
100 * submatrix. This will satisfy: L <= M <= I-1.
101 *
102 * SMLNUM (global input) DOUBLE PRECISION
103 * On entry, a "small number" for the given matrix.
104 * Unchanged on exit.
105 *
106 * BUF (local output) COMPLEX*16 array of size LWORK.
107 *
108 * LWORK (global input) INTEGER
109 * On exit, LWORK is the size of the work buffer.
110 * This must be at least 2*Ceil( Ceil( (I-L)/HBL ) /
111 * LCM(NPROW,NPCOL) )
112 * Here LCM is least common multiple, and NPROWxNPCOL is the
113 * logical grid size.
114 *
115 * Notes:
116 *
117 * This routine does a global maximum and must be called by all
118 * processes.
119 *
120 * This code is basically a parallelization of the following snip
121 * of LAPACK code from ZLAHQR:
122 *
123 * Look for a single small subdiagonal element.
124 *
125 * DO 20 K = I, L + 1, -1
126 * TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) )
127 * IF( TST1.EQ.ZERO )
128 * $ TST1 = ZLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
129 * IF( CABS1( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
130 * $ GO TO 30
131 * 20 CONTINUE
132 * 30 CONTINUE
133 *
134 * Further Details
135 * ===============
136 *
137 * Implemented by: M. Fahey, May 28, 1999
138 *
139 * =====================================================================
140 *
141 * .. Parameters ..
142  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
143  $ LLD_, MB_, M_, NB_, N_, RSRC_
144  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
145  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
146  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
147  DOUBLE PRECISION ZERO
148  parameter( zero = 0.0d+0 )
149 * ..
150 * .. Local Scalars ..
151  INTEGER CONTXT, DOWN, HBL, IBUF1, IBUF2, ICOL1, ICOL2,
152  $ II, III, IRCV1, IRCV2, IROW1, IROW2, ISRC,
153  $ ISTR1, ISTR2, ITMP1, ITMP2, JJ, JJJ, JSRC, LDA,
154  $ LEFT, MODKM1, MYCOL, MYROW, NPCOL, NPROW, NUM,
155  $ RIGHT, UP
156  DOUBLE PRECISION TST1, ULP
157  COMPLEX*16 CDUM, H10, H11, H22
158 * ..
159 * .. External Functions ..
160  INTEGER ILCM, NUMROC
161  DOUBLE PRECISION PDLAMCH
162  EXTERNAL ilcm, numroc, pdlamch
163 * ..
164 * .. External Subroutines ..
165  EXTERNAL blacs_gridinfo, igamx2d, infog1l, infog2l,
166  $ zgerv2d, zgesd2d
167 * ..
168 * .. Intrinsic Functions ..
169  INTRINSIC abs, dble, dimag, max, mod
170 * ..
171 * .. Statement Functions ..
172  DOUBLE PRECISION CABS1
173 * ..
174 * .. Statement Function definitions ..
175  cabs1( cdum ) = abs( dble( cdum ) ) + abs( dimag( cdum ) )
176 * ..
177 * .. Executable Statements ..
178 *
179  hbl = desca( mb_ )
180  contxt = desca( ctxt_ )
181  lda = desca( lld_ )
182  ulp = pdlamch( contxt, 'PRECISION' )
183  CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
184  left = mod( mycol+npcol-1, npcol )
185  right = mod( mycol+1, npcol )
186  up = mod( myrow+nprow-1, nprow )
187  down = mod( myrow+1, nprow )
188  num = nprow*npcol
189 *
190 * BUFFER1 STARTS AT BUF(ISTR1+1) AND WILL CONTAINS IBUF1 ELEMENTS
191 * BUFFER2 STARTS AT BUF(ISTR2+1) AND WILL CONTAINS IBUF2 ELEMENTS
192 *
193  istr1 = 0
194  istr2 = ( ( i-l ) / hbl )
195  IF( istr2*hbl.LT.( i-l ) )
196  $ istr2 = istr2 + 1
197  ii = istr2 / ilcm( nprow, npcol )
198  IF( ii*ilcm( nprow, npcol ).LT.istr2 ) THEN
199  istr2 = ii + 1
200  ELSE
201  istr2 = ii
202  END IF
203  IF( lwork.LT.2*istr2 ) THEN
204 *
205 * Error!
206 *
207  RETURN
208  END IF
209  CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow1,
210  $ icol1, ii, jj )
211  modkm1 = mod( i-1+hbl, hbl )
212 *
213 * COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO
214 * 2 BUFFERS TO SEND TO WHOMEVER OWNS H(K,K) AS K MOVES DIAGONALLY
215 * UP THE TRIDIAGONAL
216 *
217  ibuf1 = 0
218  ibuf2 = 0
219  ircv1 = 0
220  ircv2 = 0
221  DO 10 k = i, l + 1, -1
222  IF( ( modkm1.EQ.0 ) .AND. ( down.EQ.ii ) .AND.
223  $ ( right.EQ.jj ) ) THEN
224 *
225 * WE MUST PACK H(K-1,K-1) AND SEND IT DIAGONAL DOWN
226 *
227  IF( ( down.NE.myrow ) .OR. ( right.NE.mycol ) ) THEN
228  CALL infog2l( k-1, k-1, desca, nprow, npcol, myrow,
229  $ mycol, irow1, icol1, isrc, jsrc )
230  ibuf1 = ibuf1 + 1
231  buf( istr1+ibuf1 ) = a( ( icol1-1 )*lda+irow1 )
232  END IF
233  END IF
234  IF( ( modkm1.EQ.0 ) .AND. ( myrow.EQ.ii ) .AND.
235  $ ( right.EQ.jj ) ) THEN
236 *
237 * WE MUST PACK H(K ,K-1) AND SEND IT RIGHT
238 *
239  IF( npcol.GT.1 ) THEN
240  CALL infog2l( k, k-1, desca, nprow, npcol, myrow, mycol,
241  $ irow1, icol1, isrc, jsrc )
242  ibuf2 = ibuf2 + 1
243  buf( istr2+ibuf2 ) = a( ( icol1-1 )*lda+irow1 )
244  END IF
245  END IF
246 *
247 * ADD UP THE RECEIVES
248 *
249  IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
250  IF( ( modkm1.EQ.0 ) .AND. ( ( nprow.GT.1 ) .OR. ( npcol.GT.
251  $ 1 ) ) ) THEN
252 *
253 * WE MUST RECEIVE H(K-1,K-1) FROM DIAGONAL UP
254 *
255  ircv1 = ircv1 + 1
256  END IF
257  IF( ( modkm1.EQ.0 ) .AND. ( npcol.GT.1 ) ) THEN
258 *
259 * WE MUST RECEIVE H(K ,K-1) FROM LEFT
260 *
261  ircv2 = ircv2 + 1
262  END IF
263  END IF
264 *
265 * POSSIBLY CHANGE OWNERS (OCCURS ONLY WHEN MOD(K-1,HBL) = 0)
266 *
267  IF( modkm1.EQ.0 ) THEN
268  ii = ii - 1
269  jj = jj - 1
270  IF( ii.LT.0 )
271  $ ii = nprow - 1
272  IF( jj.LT.0 )
273  $ jj = npcol - 1
274  END IF
275  modkm1 = modkm1 - 1
276  IF( modkm1.LT.0 )
277  $ modkm1 = hbl - 1
278  10 CONTINUE
279 *
280 * SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND
281 *
282  IF( ibuf1.GT.0 ) THEN
283  CALL zgesd2d( contxt, ibuf1, 1, buf( istr1+1 ), ibuf1, down,
284  $ right )
285  END IF
286  IF( ibuf2.GT.0 ) THEN
287  CALL zgesd2d( contxt, ibuf2, 1, buf( istr2+1 ), ibuf2, myrow,
288  $ right )
289  END IF
290 *
291 * RECEIVE APPROPRIATE DATA IF THERE IS ANY
292 *
293  IF( ircv1.GT.0 ) THEN
294  CALL zgerv2d( contxt, ircv1, 1, buf( istr1+1 ), ircv1, up,
295  $ left )
296  END IF
297  IF( ircv2.GT.0 ) THEN
298  CALL zgerv2d( contxt, ircv2, 1, buf( istr2+1 ), ircv2, myrow,
299  $ left )
300  END IF
301 *
302 * START MAIN LOOP
303 *
304  ibuf1 = 0
305  ibuf2 = 0
306  CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow1,
307  $ icol1, ii, jj )
308  modkm1 = mod( i-1+hbl, hbl )
309 *
310 * LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT.
311 *
312 * Start loop for subdiagonal search
313 *
314  DO 40 k = i, l + 1, -1
315  IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
316  IF( modkm1.EQ.0 ) THEN
317 *
318 * Grab information from WORK array
319 *
320  IF( num.GT.1 ) THEN
321  ibuf1 = ibuf1 + 1
322  h11 = buf( istr1+ibuf1 )
323  ELSE
324  h11 = a( ( icol1-2 )*lda+irow1-1 )
325  END IF
326  IF( npcol.GT.1 ) THEN
327  ibuf2 = ibuf2 + 1
328  h10 = buf( istr2+ibuf2 )
329  ELSE
330  h10 = a( ( icol1-2 )*lda+irow1 )
331  END IF
332  ELSE
333 *
334 * Information is local
335 *
336  h11 = a( ( icol1-2 )*lda+irow1-1 )
337  h10 = a( ( icol1-2 )*lda+irow1 )
338  END IF
339  h22 = a( ( icol1-1 )*lda+irow1 )
340  tst1 = cabs1( h11 ) + cabs1( h22 )
341  IF( tst1.EQ.zero ) THEN
342 *
343 * FIND SOME NORM OF THE LOCAL H(L:I,L:I)
344 *
345  CALL infog1l( l, hbl, nprow, myrow, 0, itmp1, iii )
346  irow2 = numroc( i, hbl, myrow, 0, nprow )
347  CALL infog1l( l, hbl, npcol, mycol, 0, itmp2, iii )
348  icol2 = numroc( i, hbl, mycol, 0, npcol )
349  DO 30 iii = itmp1, irow2
350  DO 20 jjj = itmp2, icol2
351  tst1 = tst1 + cabs1( a( ( jjj-1 )*lda+iii ) )
352  20 CONTINUE
353  30 CONTINUE
354  END IF
355  IF( cabs1( h10 ).LE.max( ulp*tst1, smlnum ) )
356  $ GO TO 50
357  irow1 = irow1 - 1
358  icol1 = icol1 - 1
359  END IF
360  modkm1 = modkm1 - 1
361  IF( modkm1.LT.0 )
362  $ modkm1 = hbl - 1
363  IF( ( modkm1.EQ.hbl-1 ) .AND. ( k.GT.2 ) ) THEN
364  ii = mod( ii+nprow-1, nprow )
365  jj = mod( jj+npcol-1, npcol )
366  CALL infog2l( k-1, k-1, desca, nprow, npcol, myrow, mycol,
367  $ irow1, icol1, itmp1, itmp2 )
368  END IF
369  40 CONTINUE
370  50 CONTINUE
371  CALL igamx2d( contxt, 'ALL', ' ', 1, 1, k, 1, itmp1, itmp2, -1,
372  $ -1, -1 )
373  RETURN
374 *
375 * End of PZLASMSUB
376 *
377  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pzlasmsub
subroutine pzlasmsub(A, DESCA, I, L, K, SMLNUM, BUF, LWORK)
Definition: pzlasmsub.f:2