ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslasmsub.f
Go to the documentation of this file.
1  SUBROUTINE pslasmsub( A, DESCA, I, L, K, SMLNUM, BUF, LWORK )
2 *
3 * -- ScaLAPACK routine (version 1.7) --
4 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
5 * and University of California, Berkeley.
6 * May 1, 1997
7 *
8 * .. Scalar Arguments ..
9  INTEGER I, K, L, LWORK
10  REAL SMLNUM
11 * ..
12 * .. Array Arguments ..
13  INTEGER DESCA( * )
14  REAL A( * ), BUF( * )
15 * ..
16 *
17 * Purpose
18 * =======
19 *
20 * PSLASMSUB looks for a small subdiagonal element from the bottom
21 * of the matrix that it can safely set to zero.
22 *
23 * Notes
24 * =====
25 *
26 * Each global data object is described by an associated description
27 * vector. This vector stores the information required to establish
28 * the mapping between an object element and its corresponding process
29 * and memory location.
30 *
31 * Let A be a generic term for any 2D block cyclicly distributed array.
32 * Such a global array has an associated description vector DESCA.
33 * In the following comments, the character _ should be read as
34 * "of the global array".
35 *
36 * NOTATION STORED IN EXPLANATION
37 * --------------- -------------- --------------------------------------
38 * DTYPE_A(global) DESCA( DTYPE_ )The descriptor type. In this case,
39 * DTYPE_A = 1.
40 * CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating
41 * the BLACS process grid A is distribu-
42 * ted over. The context itself is glo-
43 * bal, but the handle (the integer
44 * value) may vary.
45 * M_A (global) DESCA( M_ ) The number of rows in the global
46 * array A.
47 * N_A (global) DESCA( N_ ) The number of columns in the global
48 * array A.
49 * MB_A (global) DESCA( MB_ ) The blocking factor used to distribute
50 * the rows of the array.
51 * NB_A (global) DESCA( NB_ ) The blocking factor used to distribute
52 * the columns of the array.
53 * RSRC_A (global) DESCA( RSRC_ ) The process row over which the first
54 * row of the array A is distributed.
55 * CSRC_A (global) DESCA( CSRC_ ) The process column over which the
56 * first column of the array A is
57 * distributed.
58 * LLD_A (local) DESCA( LLD_ ) The leading dimension of the local
59 * array. LLD_A >= MAX(1,LOCr(M_A)).
60 *
61 * Let K be the number of rows or columns of a distributed matrix,
62 * and assume that its process grid has dimension p x q.
63 * LOCr( K ) denotes the number of elements of K that a process
64 * would receive if K were distributed over the p processes of its
65 * process column.
66 * Similarly, LOCc( K ) denotes the number of elements of K that a
67 * process would receive if K were distributed over the q processes of
68 * its process row.
69 * The values of LOCr() and LOCc() may be determined via a call to the
70 * ScaLAPACK tool function, NUMROC:
71 * LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ),
72 * LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ).
73 * An upper bound for these quantities may be computed by:
74 * LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A
75 * LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A
76 *
77 * Arguments
78 * =========
79 *
80 * A (global input) REAL array, dimension
81 * (DESCA(LLD_),*)
82 * On entry, the Hessenberg matrix whose tridiagonal part is
83 * being scanned.
84 * Unchanged on exit.
85 *
86 * DESCA (global and local input) INTEGER array of dimension DLEN_.
87 * The array descriptor for the distributed matrix A.
88 *
89 * I (global input) INTEGER
90 * The global location of the bottom of the unreduced
91 * submatrix of A.
92 * Unchanged on exit.
93 *
94 * L (global input) INTEGER
95 * The global location of the top of the unreduced submatrix
96 * of A.
97 * Unchanged on exit.
98 *
99 * K (global output) INTEGER
100 * On exit, this yields the bottom portion of the unreduced
101 * submatrix. This will satisfy: L <= M <= I-1.
102 *
103 * SMLNUM (global input) REAL
104 * On entry, a "small number" for the given matrix.
105 * Unchanged on exit.
106 *
107 * BUF (local output) REAL array of size LWORK.
108 *
109 * LWORK (global input) INTEGER
110 * On exit, LWORK is the size of the work buffer.
111 * This must be at least 2*Ceil( Ceil( (I-L)/HBL ) /
112 * LCM(NPROW,NPCOL) )
113 * Here LCM is least common multiple, and NPROWxNPCOL is the
114 * logical grid size.
115 *
116 * Notes:
117 *
118 * This routine does a global maximum and must be called by all
119 * processes.
120 *
121 * This code is basically a parallelization of the following snip
122 * of LAPACK code from SLAHQR:
123 *
124 * Look for a single small subdiagonal element.
125 *
126 * DO 20 K = I, L + 1, -1
127 * TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) )
128 * IF( TST1.EQ.ZERO )
129 * $ TST1 = DLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
130 * IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
131 * $ GO TO 30
132 * 20 CONTINUE
133 * 30 CONTINUE
134 *
135 * Implemented by: G. Henry, November 17, 1996
136 *
137 * =====================================================================
138 *
139 * .. Parameters ..
140  INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DTYPE_,
141  $ LLD_, MB_, M_, NB_, N_, RSRC_
142  parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
143  $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
144  $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
145  REAL ZERO
146  parameter( zero = 0.0e+0 )
147 * ..
148 * .. Local Scalars ..
149  INTEGER CONTXT, DOWN, HBL, IAFIRST, IBUF1, IBUF2,
150  $ ICOL1, ICOL2, II, III, IRCV1, IRCV2, IROW1,
151  $ IROW2, ISRC, ISTR1, ISTR2, ITMP1, ITMP2,
152  $ JAFIRST, JJ, JJJ, JSRC, LDA, LEFT, MODKM1,
153  $ MYCOL, MYROW, NPCOL, NPROW, NUM, RIGHT, UP
154  REAL H10, H11, H22, TST1, ULP
155 * ..
156 * .. External Functions ..
157  INTEGER ILCM, NUMROC
158  REAL PSLAMCH
159  EXTERNAL ilcm, numroc, pslamch
160 * ..
161 * .. External Subroutines ..
162  EXTERNAL blacs_gridinfo, sgerv2d, sgesd2d, igamx2d,
163  $ infog1l, infog2l
164 * ..
165 * .. Intrinsic Functions ..
166  INTRINSIC abs, max, mod
167 * ..
168 * .. Executable Statements ..
169 *
170  hbl = desca( mb_ )
171  contxt = desca( ctxt_ )
172  lda = desca( lld_ )
173  iafirst = desca( rsrc_ )
174  jafirst = desca( csrc_ )
175  ulp = pslamch( contxt, 'PRECISION' )
176  CALL blacs_gridinfo( contxt, nprow, npcol, myrow, mycol )
177  left = mod( mycol+npcol-1, npcol )
178  right = mod( mycol+1, npcol )
179  up = mod( myrow+nprow-1, nprow )
180  down = mod( myrow+1, nprow )
181  num = nprow*npcol
182 *
183 * BUFFER1 STARTS AT BUF(ISTR1+1) AND WILL CONTAINS IBUF1 ELEMENTS
184 * BUFFER2 STARTS AT BUF(ISTR2+1) AND WILL CONTAINS IBUF2 ELEMENTS
185 *
186  istr1 = 0
187  istr2 = ( ( i-l ) / hbl )
188  IF( istr2*hbl.LT.( i-l ) )
189  $ istr2 = istr2 + 1
190  ii = istr2 / ilcm( nprow, npcol )
191  IF( ii*ilcm( nprow, npcol ).LT.istr2 ) THEN
192  istr2 = ii + 1
193  ELSE
194  istr2 = ii
195  END IF
196  IF( lwork.LT.2*istr2 ) THEN
197 *
198 * Error!
199 *
200  RETURN
201  END IF
202  CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow1,
203  $ icol1, ii, jj )
204  modkm1 = mod( i-1+hbl, hbl )
205 *
206 * COPY OUR RELEVANT PIECES OF TRIADIAGONAL THAT WE OWE INTO
207 * 2 BUFFERS TO SEND TO WHOMEVER OWNS H(K,K) AS K MOVES DIAGONALLY
208 * UP THE TRIDIAGONAL
209 *
210  ibuf1 = 0
211  ibuf2 = 0
212  ircv1 = 0
213  ircv2 = 0
214  DO 10 k = i, l + 1, -1
215  IF( ( modkm1.EQ.0 ) .AND. ( down.EQ.ii ) .AND.
216  $ ( right.EQ.jj ) ) THEN
217 *
218 * WE MUST PACK H(K-1,K-1) AND SEND IT DIAGONAL DOWN
219 *
220  IF( ( down.NE.myrow ) .OR. ( right.NE.mycol ) ) THEN
221  CALL infog2l( k-1, k-1, desca, nprow, npcol, myrow,
222  $ mycol, irow1, icol1, isrc, jsrc )
223  ibuf1 = ibuf1 + 1
224  buf( istr1+ibuf1 ) = a( ( icol1-1 )*lda+irow1 )
225  END IF
226  END IF
227  IF( ( modkm1.EQ.0 ) .AND. ( myrow.EQ.ii ) .AND.
228  $ ( right.EQ.jj ) ) THEN
229 *
230 * WE MUST PACK H(K ,K-1) AND SEND IT RIGHT
231 *
232  IF( npcol.GT.1 ) THEN
233  CALL infog2l( k, k-1, desca, nprow, npcol, myrow, mycol,
234  $ irow1, icol1, isrc, jsrc )
235  ibuf2 = ibuf2 + 1
236  buf( istr2+ibuf2 ) = a( ( icol1-1 )*lda+irow1 )
237  END IF
238  END IF
239 *
240 * ADD UP THE RECEIVES
241 *
242  IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
243  IF( ( modkm1.EQ.0 ) .AND. ( ( nprow.GT.1 ) .OR. ( npcol.GT.
244  $ 1 ) ) ) THEN
245 *
246 * WE MUST RECEIVE H(K-1,K-1) FROM DIAGONAL UP
247 *
248  ircv1 = ircv1 + 1
249  END IF
250  IF( ( modkm1.EQ.0 ) .AND. ( npcol.GT.1 ) ) THEN
251 *
252 * WE MUST RECEIVE H(K ,K-1) FROM LEFT
253 *
254  ircv2 = ircv2 + 1
255  END IF
256  END IF
257 *
258 * POSSIBLY CHANGE OWNERS (OCCURS ONLY WHEN MOD(K-1,HBL) = 0)
259 *
260  IF( modkm1.EQ.0 ) THEN
261  ii = ii - 1
262  jj = jj - 1
263  IF( ii.LT.0 )
264  $ ii = nprow - 1
265  IF( jj.LT.0 )
266  $ jj = npcol - 1
267  END IF
268  modkm1 = modkm1 - 1
269  IF( modkm1.LT.0 )
270  $ modkm1 = hbl - 1
271  10 CONTINUE
272 *
273 * SEND DATA ON TO THE APPROPRIATE NODE IF THERE IS ANY DATA TO SEND
274 *
275  IF( ibuf1.GT.0 ) THEN
276  CALL sgesd2d( contxt, ibuf1, 1, buf( istr1+1 ), ibuf1, down,
277  $ right )
278  END IF
279  IF( ibuf2.GT.0 ) THEN
280  CALL sgesd2d( contxt, ibuf2, 1, buf( istr2+1 ), ibuf2, myrow,
281  $ right )
282  END IF
283 *
284 * RECEIVE APPROPRIATE DATA IF THERE IS ANY
285 *
286  IF( ircv1.GT.0 ) THEN
287  CALL sgerv2d( contxt, ircv1, 1, buf( istr1+1 ), ircv1, up,
288  $ left )
289  END IF
290  IF( ircv2.GT.0 ) THEN
291  CALL sgerv2d( contxt, ircv2, 1, buf( istr2+1 ), ircv2, myrow,
292  $ left )
293  END IF
294 *
295 * START MAIN LOOP
296 *
297  ibuf1 = 0
298  ibuf2 = 0
299  CALL infog2l( i, i, desca, nprow, npcol, myrow, mycol, irow1,
300  $ icol1, ii, jj )
301  modkm1 = mod( i-1+hbl, hbl )
302 *
303 * LOOK FOR A SINGLE SMALL SUBDIAGONAL ELEMENT.
304 *
305 * Start loop for subdiagonal search
306 *
307  DO 40 k = i, l + 1, -1
308  IF( ( myrow.EQ.ii ) .AND. ( mycol.EQ.jj ) ) THEN
309  IF( modkm1.EQ.0 ) THEN
310 *
311 * Grab information from WORK array
312 *
313  IF( num.GT.1 ) THEN
314  ibuf1 = ibuf1 + 1
315  h11 = buf( istr1+ibuf1 )
316  ELSE
317  h11 = a( ( icol1-2 )*lda+irow1-1 )
318  END IF
319  IF( npcol.GT.1 ) THEN
320  ibuf2 = ibuf2 + 1
321  h10 = buf( istr2+ibuf2 )
322  ELSE
323  h10 = a( ( icol1-2 )*lda+irow1 )
324  END IF
325  ELSE
326 *
327 * Information is local
328 *
329  h11 = a( ( icol1-2 )*lda+irow1-1 )
330  h10 = a( ( icol1-2 )*lda+irow1 )
331  END IF
332  h22 = a( ( icol1-1 )*lda+irow1 )
333  tst1 = abs( h11 ) + abs( h22 )
334  IF( tst1.EQ.zero ) THEN
335 *
336 * FIND SOME NORM OF THE LOCAL H(L:I,L:I)
337 *
338  CALL infog1l( l, hbl, nprow, myrow, iafirst, itmp1, iii )
339  irow2 = numroc( i, hbl, myrow, iafirst, nprow )
340  CALL infog1l( l, hbl, npcol, mycol, jafirst, itmp2, iii )
341  icol2 = numroc( i, hbl, mycol, jafirst, npcol )
342  DO 30 iii = itmp1, irow2
343  DO 20 jjj = itmp2, icol2
344  tst1 = tst1 + abs( a( ( jjj-1 )*lda+iii ) )
345  20 CONTINUE
346  30 CONTINUE
347  END IF
348  IF( abs( h10 ).LE.max( ulp*tst1, smlnum ) )
349  $ GO TO 50
350  irow1 = irow1 - 1
351  icol1 = icol1 - 1
352  END IF
353  modkm1 = modkm1 - 1
354  IF( modkm1.LT.0 )
355  $ modkm1 = hbl - 1
356  IF( ( modkm1.EQ.hbl-1 ) .AND. ( k.GT.2 ) ) THEN
357  ii = mod( ii+nprow-1, nprow )
358  jj = mod( jj+npcol-1, npcol )
359  CALL infog2l( k-1, k-1, desca, nprow, npcol, myrow, mycol,
360  $ irow1, icol1, itmp1, itmp2 )
361  END IF
362  40 CONTINUE
363  50 CONTINUE
364  CALL igamx2d( contxt, 'ALL', ' ', 1, 1, k, 1, itmp1, itmp2, -1,
365  $ -1, -1 )
366  RETURN
367 *
368 * End of PSLASMSUB
369 *
370  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
infog2l
subroutine infog2l(GRINDX, GCINDX, DESC, NPROW, NPCOL, MYROW, MYCOL, LRINDX, LCINDX, RSRC, CSRC)
Definition: infog2l.f:3
pslasmsub
subroutine pslasmsub(A, DESCA, I, L, K, SMLNUM, BUF, LWORK)
Definition: pslasmsub.f:2