SCALAPACK 2.2.2
LAPACK: Linear Algebra PACKage
All Classes Files Functions Variables Typedefs Macros
pslaed2.f
Go to the documentation of this file.
1 SUBROUTINE pslaed2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ,
2 $ RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM,
3 $ NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN,
4 $ NN1, NN2, IB1, IB2 )
5*
6* -- ScaLAPACK auxiliary routine (version 1.7) --
7* University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8* and University of California, Berkeley.
9* December 31, 1998
10*
11* .. Scalar Arguments ..
12 INTEGER DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N,
13 $ N1, NB, NN, NN1, NN2, NPCOL
14 REAL RHO
15* ..
16* .. Array Arguments ..
17 INTEGER COLTYP( * ), CTOT( 0: NPCOL-1, 4 ),
18 $ INDCOL( N ), INDX( * ), INDXC( * ), INDXP( * ),
19 $ PSM( 0: NPCOL-1, 4 )
20 REAL D( * ), DLAMDA( * ), Q( LDQ, * ),
21 $ Q2( LDQ2, * ), QBUF( * ), W( * ), Z( * )
22* ..
23*
24* Purpose
25* =======
26*
27* PSLAED2 sorts the two sets of eigenvalues together into a single
28* sorted set. Then it tries to deflate the size of the problem.
29* There are two ways in which deflation can occur: when two or more
30* eigenvalues are close together or if there is a tiny entry in the
31* Z vector. For each such occurrence the order of the related secular
32* equation problem is reduced by one.
33*
34* Arguments
35* =========
36*
37* ICTXT (global input) INTEGER
38* The BLACS context handle, indicating the global context of
39* the operation on the matrix. The context itself is global.
40*
41* K (output) INTEGER
42* The number of non-deflated eigenvalues, and the order of the
43* related secular equation. 0 <= K <=N.
44*
45* N (input) INTEGER
46* The dimension of the symmetric tridiagonal matrix. N >= 0.
47*
48* N1 (input) INTEGER
49* The location of the last eigenvalue in the leading sub-matrix.
50* min(1,N) < N1 < N.
51*
52* NB (global input) INTEGER
53* The blocking factor used to distribute the columns of the
54* matrix. NB >= 1.
55*
56* D (input/output) REAL array, dimension (N)
57* On entry, D contains the eigenvalues of the two submatrices to
58* be combined.
59* On exit, D contains the trailing (N-K) updated eigenvalues
60* (those which were deflated) sorted into increasing order.
61*
62* DROW (global input) INTEGER
63* The process row over which the first row of the matrix D is
64* distributed. 0 <= DROW < NPROW.
65*
66* DCOL (global input) INTEGER
67* The process column over which the first column of the
68* matrix D is distributed. 0 <= DCOL < NPCOL.
69*
70* Q (input/output) REAL array, dimension (LDQ, N)
71* On entry, Q contains the eigenvectors of two submatrices in
72* the two square blocks with corners at (1,1), (N1,N1)
73* and (N1+1, N1+1), (N,N).
74* On exit, Q contains the trailing (N-K) updated eigenvectors
75* (those which were deflated) in its last N-K columns.
76*
77* LDQ (input) INTEGER
78* The leading dimension of the array Q. LDQ >= max(1,NQ).
79*
80* RHO (global input/output) REAL
81* On entry, the off-diagonal element associated with the rank-1
82* cut which originally split the two submatrices which are now
83* being recombined.
84* On exit, RHO has been modified to the value required by
85* PSLAED3.
86*
87* Z (global input) REAL array, dimension (N)
88* On entry, Z contains the updating vector (the last
89* row of the first sub-eigenvector matrix and the first row of
90* the second sub-eigenvector matrix).
91* On exit, the contents of Z have been destroyed by the updating
92* process.
93*
94* DLAMDA (global output) REAL array, dimension (N)
95* A copy of the first K eigenvalues which will be used by
96* SLAED3 to form the secular equation.
97*
98* W (global output) REAL array, dimension (N)
99* The first k values of the final deflation-altered z-vector
100* which will be passed to SLAED3.
101*
102* Q2 (output) REAL array, dimension (LDQ2, NQ)
103* A copy of the first K eigenvectors which will be used by
104*
105* LDQ2 (input) INTEGER
106* The leading dimension of the array Q2.
107*
108* QBUF (workspace) REAL array, dimension 3*N
109*
110* CTOT (workspace) INTEGER array, dimension( NPCOL, 4)
111*
112* PSM (workspace) INTEGER array, dimension( NPCOL, 4)
113*
114* NPCOL (global input) INTEGER
115* The total number of columns over which the distributed
116* submatrix is distributed.
117*
118* INDX (workspace) INTEGER array, dimension (N)
119* The permutation used to sort the contents of DLAMDA into
120* ascending order.
121*
122* INDXC (output) INTEGER array, dimension (N)
123* The permutation used to arrange the columns of the deflated
124* Q matrix into three groups: the first group contains non-zero
125* elements only at and above N1, the second contains
126* non-zero elements only below N1, and the third is dense.
127*
128* INDXP (workspace) INTEGER array, dimension (N)
129* The permutation used to place deflated values of D at the end
130* of the array. INDXP(1:K) points to the nondeflated D-values
131* and INDXP(K+1:N) points to the deflated eigenvalues.
132*
133* INDCOL (workspace) INTEGER array, dimension (N)
134*
135* COLTYP (workspace/output) INTEGER array, dimension (N)
136* During execution, a label which will indicate which of the
137* following types a column in the Q2 matrix is:
138* 1 : non-zero in the upper half only;
139* 2 : dense;
140* 3 : non-zero in the lower half only;
141* 4 : deflated.
142*
143* NN (global output) INTEGER, the order of matrix U, (PSLAED1).
144* NN1 (global output) INTEGER, the order of matrix Q1, (PSLAED1).
145* NN2 (global output) INTEGER, the order of matrix Q2, (PSLAED1).
146* IB1 (global output) INTEGER, pointeur on Q1, (PSLAED1).
147* IB2 (global output) INTEGER, pointeur on Q2, (PSLAED1).
148*
149* =====================================================================
150*
151* .. Parameters ..
152 REAL MONE, ZERO, ONE, TWO, EIGHT
153 PARAMETER ( MONE = -1.0e0, zero = 0.0e0, one = 1.0e0,
154 $ two = 2.0e0, eight = 8.0e0 )
155* ..
156* .. Local Scalars ..
157 INTEGER COL, CT, I, IAM, IE1, IE2, IMAX, INFO, J, JJQ2,
158 $ JJS, JMAX, JS, K2, MYCOL, MYROW, N1P1, N2, NJ,
159 $ NJCOL, NJJ, NP, NPROCS, NPROW, PJ, PJCOL, PJJ
160 REAL C, EPS, S, T, TAU, TOL
161* ..
162* .. External Functions ..
163 INTEGER INDXG2L, INDXL2G, ISAMAX, NUMROC
164 REAL PSLAMCH, SLAPY2
165 EXTERNAL INDXG2L, INDXL2G, ISAMAX, NUMROC, PSLAMCH,
166 $ slapy2
167* ..
168* .. External Subroutines ..
169 EXTERNAL blacs_gridinfo, blacs_pinfo, infog1l, scopy,
170 $ sgerv2d, sgesd2d, slapst, srot, sscal
171* ..
172* .. Intrinsic Functions ..
173 INTRINSIC abs, max, min, mod, sqrt
174* ..
175* .. External Functions ..
176* ..
177* .. Local Arrays ..
178 INTEGER PTT( 4 )
179* ..
180* .. Executable Statements ..
181*
182* Quick return if possible
183*
184 IF( n.EQ.0 )
185 $ RETURN
186*
187 CALL blacs_pinfo( iam, nprocs )
188 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
189 np = numroc( n, nb, myrow, drow, nprow )
190*
191 n2 = n - n1
192 n1p1 = n1 + 1
193*
194 IF( rho.LT.zero ) THEN
195 CALL sscal( n2, mone, z( n1p1 ), 1 )
196 END IF
197*
198* Normalize z so that norm(z) = 1. Since z is the concatenation of
199* two normalized vectors, norm2(z) = sqrt(2).
200*
201 t = one / sqrt( two )
202 CALL sscal( n, t, z, 1 )
203*
204* RHO = ABS( norm(z)**2 * RHO )
205*
206 rho = abs( two*rho )
207*
208* Calculate the allowable deflation tolerance
209*
210 imax = isamax( n, z, 1 )
211 jmax = isamax( n, d, 1 )
212 eps = pslamch( ictxt, 'Epsilon' )
213 tol = eight*eps*max( abs( d( jmax ) ), abs( z( imax ) ) )
214*
215* If the rank-1 modifier is small enough, no more needs to be done
216* except to reorganize Q so that its columns correspond with the
217* elements in D.
218*
219 IF( rho*abs( z( imax ) ).LE.tol ) THEN
220 k = 0
221 GO TO 220
222 END IF
223*
224* If there are multiple eigenvalues then the problem deflates. Here
225* the number of equal eigenvalues are found. As each equal
226* eigenvalue is found, an elementary reflector is computed to rotate
227* the corresponding eigensubspace so that the corresponding
228* components of Z are zero in this new basis.
229*
230*
231 CALL slapst( 'I', n, d, indx, info )
232*
233 DO 10 i = 1, n1
234 coltyp( i ) = 1
235 10 CONTINUE
236 DO 20 i = n1p1, n
237 coltyp( i ) = 3
238 20 CONTINUE
239 col = dcol
240 DO 40 i = 1, n, nb
241 DO 30 j = 0, nb - 1
242 IF( i+j.LE.n )
243 $ indcol( i+j ) = col
244 30 CONTINUE
245 col = mod( col+1, npcol )
246 40 CONTINUE
247*
248 k = 0
249 k2 = n + 1
250 DO 50 j = 1, n
251 nj = indx( j )
252 IF( rho*abs( z( nj ) ).LE.tol ) THEN
253*
254* Deflate due to small z component.
255*
256 k2 = k2 - 1
257 coltyp( nj ) = 4
258 indxp( k2 ) = nj
259 IF( j.EQ.n )
260 $ GO TO 80
261 ELSE
262 pj = nj
263 GO TO 60
264 END IF
265 50 CONTINUE
266 60 CONTINUE
267 j = j + 1
268 nj = indx( j )
269 IF( j.GT.n )
270 $ GO TO 80
271 IF( rho*abs( z( nj ) ).LE.tol ) THEN
272*
273* Deflate due to small z component.
274*
275 k2 = k2 - 1
276 coltyp( nj ) = 4
277 indxp( k2 ) = nj
278 ELSE
279*
280* Check if eigenvalues are close enough to allow deflation.
281*
282 s = z( pj )
283 c = z( nj )
284*
285* Find sqrt(a**2+b**2) without overflow or
286* destructive underflow.
287*
288 tau = slapy2( c, s )
289 t = d( nj ) - d( pj )
290 c = c / tau
291 s = -s / tau
292 IF( abs( t*c*s ).LE.tol ) THEN
293*
294* Deflation is possible.
295*
296 z( nj ) = tau
297 z( pj ) = zero
298 IF( coltyp( nj ).NE.coltyp( pj ) )
299 $ coltyp( nj ) = 2
300 coltyp( pj ) = 4
301 CALL infog1l( nj, nb, npcol, mycol, dcol, njj, njcol )
302 CALL infog1l( pj, nb, npcol, mycol, dcol, pjj, pjcol )
303 IF( indcol( pj ).EQ.indcol( nj ) .AND. mycol.EQ.njcol ) THEN
304 CALL srot( np, q( 1, pjj ), 1, q( 1, njj ), 1, c, s )
305 ELSE IF( mycol.EQ.pjcol ) THEN
306 CALL sgesd2d( ictxt, np, 1, q( 1, pjj ), np, myrow,
307 $ njcol )
308 CALL sgerv2d( ictxt, np, 1, qbuf, np, myrow, njcol )
309 CALL srot( np, q( 1, pjj ), 1, qbuf, 1, c, s )
310 ELSE IF( mycol.EQ.njcol ) THEN
311 CALL sgesd2d( ictxt, np, 1, q( 1, njj ), np, myrow,
312 $ pjcol )
313 CALL sgerv2d( ictxt, np, 1, qbuf, np, myrow, pjcol )
314 CALL srot( np, qbuf, 1, q( 1, njj ), 1, c, s )
315 END IF
316 t = d( pj )*c**2 + d( nj )*s**2
317 d( nj ) = d( pj )*s**2 + d( nj )*c**2
318 d( pj ) = t
319 k2 = k2 - 1
320 i = 1
321 70 CONTINUE
322 IF( k2+i.LE.n ) THEN
323 IF( d( pj ).LT.d( indxp( k2+i ) ) ) THEN
324 indxp( k2+i-1 ) = indxp( k2+i )
325 indxp( k2+i ) = pj
326 i = i + 1
327 GO TO 70
328 ELSE
329 indxp( k2+i-1 ) = pj
330 END IF
331 ELSE
332 indxp( k2+i-1 ) = pj
333 END IF
334 pj = nj
335 ELSE
336 k = k + 1
337 dlamda( k ) = d( pj )
338 w( k ) = z( pj )
339 indxp( k ) = pj
340 pj = nj
341 END IF
342 END IF
343 GO TO 60
344 80 CONTINUE
345*
346* Record the last eigenvalue.
347*
348 k = k + 1
349 dlamda( k ) = d( pj )
350 w( k ) = z( pj )
351 indxp( k ) = pj
352*
353* Count up the total number of the various types of columns, then
354* form a permutation which positions the four column types into
355* four uniform groups (although one or more of these groups may be
356* empty).
357*
358 DO 100 j = 1, 4
359 DO 90 i = 0, npcol - 1
360 ctot( i, j ) = 0
361 90 CONTINUE
362 ptt( j ) = 0
363 100 CONTINUE
364 DO 110 j = 1, n
365 ct = coltyp( j )
366 col = indcol( j )
367 ctot( col, ct ) = ctot( col, ct ) + 1
368 110 CONTINUE
369*
370* PSM(*) = Position in SubMatrix (of types 1 through 4)
371*
372 DO 120 col = 0, npcol - 1
373 psm( col, 1 ) = 1
374 psm( col, 2 ) = 1 + ctot( col, 1 )
375 psm( col, 3 ) = psm( col, 2 ) + ctot( col, 2 )
376 psm( col, 4 ) = psm( col, 3 ) + ctot( col, 3 )
377 120 CONTINUE
378 ptt( 1 ) = 1
379 DO 140 i = 2, 4
380 ct = 0
381 DO 130 j = 0, npcol - 1
382 ct = ct + ctot( j, i-1 )
383 130 CONTINUE
384 ptt( i ) = ptt( i-1 ) + ct
385 140 CONTINUE
386*
387* Fill out the INDXC array so that the permutation which it induces
388* will place all type-1 columns first, all type-2 columns next,
389* then all type-3's, and finally all type-4's.
390*
391 DO 150 j = 1, n
392 js = indxp( j )
393 col = indcol( js )
394 ct = coltyp( js )
395 i = indxl2g( psm( col, ct ), nb, col, dcol, npcol )
396 indx( j ) = i
397 indxc( ptt( ct ) ) = i
398 psm( col, ct ) = psm( col, ct ) + 1
399 ptt( ct ) = ptt( ct ) + 1
400 150 CONTINUE
401 DO 160 j = 1, n
402 js = indxp( j )
403 jjs = indxg2l( js, nb, j, j, npcol )
404 col = indcol( js )
405 IF( col.EQ.mycol ) THEN
406 i = indx( j )
407 jjq2 = indxg2l( i, nb, j, j, npcol )
408 CALL scopy( np, q( 1, jjs ), 1, q2( 1, jjq2 ), 1 )
409 END IF
410 160 CONTINUE
411*
412*
413* The deflated eigenvalues and their corresponding vectors go back
414* into the last N - K slots of D and Q respectively.
415*
416 CALL scopy( n, d, 1, z, 1 )
417 DO 170 j = k + 1, n
418 js = indxp( j )
419 i = indx( j )
420 d( i ) = z( js )
421 170 CONTINUE
422*
423 ptt( 1 ) = 1
424 DO 190 i = 2, 4
425 ct = 0
426 DO 180 j = 0, npcol - 1
427 ct = ct + ctot( j, i-1 )
428 180 CONTINUE
429 ptt( i ) = ptt( i-1 ) + ct
430 190 CONTINUE
431*
432*
433 ib1 = indxc( 1 )
434 ie1 = ib1
435 ib2 = indxc( ptt( 2 ) )
436 ie2 = ib2
437 DO 200 i = 2, ptt( 3 ) - 1
438 ib1 = min( ib1, indxc( i ) )
439 ie1 = max( ie1, indxc( i ) )
440 200 CONTINUE
441 DO 210 i = ptt( 2 ), ptt( 4 ) - 1
442 ib2 = min( ib2, indxc( i ) )
443 ie2 = max( ie2, indxc( i ) )
444 210 CONTINUE
445 nn1 = ie1 - ib1 + 1
446 nn2 = ie2 - ib2 + 1
447 nn = max( ie1, ie2 ) - min( ib1, ib2 ) + 1
448 220 CONTINUE
449 RETURN
450*
451* End of PSLAED2
452*
453 END
subroutine infog1l(gindx, nb, nprocs, myroc, isrcproc, lindx, rocsrc)
Definition infog1l.f:3
#define max(A, B)
Definition pcgemr.c:180
#define min(A, B)
Definition pcgemr.c:181
subroutine pslaed2(ictxt, k, n, n1, nb, d, drow, dcol, q, ldq, rho, z, w, dlamda, q2, ldq2, qbuf, ctot, psm, npcol, indx, indxc, indxp, indcol, coltyp, nn, nn1, nn2, ib1, ib2)
Definition pslaed2.f:5
subroutine slapst(id, n, d, indx, info)
Definition slapst.f:2