ScaLAPACK 2.1  2.1
ScaLAPACK: Scalable Linear Algebra PACKage
pslaed2.f
Go to the documentation of this file.
1  SUBROUTINE pslaed2( ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ,
2  $ RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM,
3  $ NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN,
4  $ NN1, NN2, IB1, IB2 )
5 *
6 * -- ScaLAPACK auxiliary routine (version 1.7) --
7 * University of Tennessee, Knoxville, Oak Ridge National Laboratory,
8 * and University of California, Berkeley.
9 * December 31, 1998
10 *
11 * .. Scalar Arguments ..
12  INTEGER DCOL, DROW, IB1, IB2, ICTXT, K, LDQ, LDQ2, N,
13  $ N1, NB, NN, NN1, NN2, NPCOL
14  REAL RHO
15 * ..
16 * .. Array Arguments ..
17  INTEGER COLTYP( * ), CTOT( 0: NPCOL-1, 4 ),
18  $ INDCOL( N ), INDX( * ), INDXC( * ), INDXP( * ),
19  $ PSM( 0: NPCOL-1, 4 )
20  REAL D( * ), DLAMDA( * ), Q( LDQ, * ),
21  $ Q2( LDQ2, * ), QBUF( * ), W( * ), Z( * )
22 * ..
23 *
24 * Purpose
25 * =======
26 *
27 * PSLAED2 sorts the two sets of eigenvalues together into a single
28 * sorted set. Then it tries to deflate the size of the problem.
29 * There are two ways in which deflation can occur: when two or more
30 * eigenvalues are close together or if there is a tiny entry in the
31 * Z vector. For each such occurrence the order of the related secular
32 * equation problem is reduced by one.
33 *
34 * Arguments
35 * =========
36 *
37 * ICTXT (global input) INTEGER
38 * The BLACS context handle, indicating the global context of
39 * the operation on the matrix. The context itself is global.
40 *
41 * K (output) INTEGER
42 * The number of non-deflated eigenvalues, and the order of the
43 * related secular equation. 0 <= K <=N.
44 *
45 * N (input) INTEGER
46 * The dimension of the symmetric tridiagonal matrix. N >= 0.
47 *
48 * N1 (input) INTEGER
49 * The location of the last eigenvalue in the leading sub-matrix.
50 * min(1,N) < N1 < N.
51 *
52 * NB (global input) INTEGER
53 * The blocking factor used to distribute the columns of the
54 * matrix. NB >= 1.
55 *
56 * D (input/output) REAL array, dimension (N)
57 * On entry, D contains the eigenvalues of the two submatrices to
58 * be combined.
59 * On exit, D contains the trailing (N-K) updated eigenvalues
60 * (those which were deflated) sorted into increasing order.
61 *
62 * DROW (global input) INTEGER
63 * The process row over which the first row of the matrix D is
64 * distributed. 0 <= DROW < NPROW.
65 *
66 * DCOL (global input) INTEGER
67 * The process column over which the first column of the
68 * matrix D is distributed. 0 <= DCOL < NPCOL.
69 *
70 * Q (input/output) REAL array, dimension (LDQ, N)
71 * On entry, Q contains the eigenvectors of two submatrices in
72 * the two square blocks with corners at (1,1), (N1,N1)
73 * and (N1+1, N1+1), (N,N).
74 * On exit, Q contains the trailing (N-K) updated eigenvectors
75 * (those which were deflated) in its last N-K columns.
76 *
77 * LDQ (input) INTEGER
78 * The leading dimension of the array Q. LDQ >= max(1,NQ).
79 *
80 * RHO (global input/output) REAL
81 * On entry, the off-diagonal element associated with the rank-1
82 * cut which originally split the two submatrices which are now
83 * being recombined.
84 * On exit, RHO has been modified to the value required by
85 * PSLAED3.
86 *
87 * Z (global input) REAL array, dimension (N)
88 * On entry, Z contains the updating vector (the last
89 * row of the first sub-eigenvector matrix and the first row of
90 * the second sub-eigenvector matrix).
91 * On exit, the contents of Z have been destroyed by the updating
92 * process.
93 *
94 * DLAMDA (global output) REAL array, dimension (N)
95 * A copy of the first K eigenvalues which will be used by
96 * SLAED3 to form the secular equation.
97 *
98 * W (global output) REAL array, dimension (N)
99 * The first k values of the final deflation-altered z-vector
100 * which will be passed to SLAED3.
101 *
102 * Q2 (output) REAL array, dimension (LDQ2, NQ)
103 * A copy of the first K eigenvectors which will be used by
104 *
105 * LDQ2 (input) INTEGER
106 * The leading dimension of the array Q2.
107 *
108 * QBUF (workspace) REAL array, dimension 3*N
109 *
110 * CTOT (workspace) INTEGER array, dimension( NPCOL, 4)
111 *
112 * PSM (workspace) INTEGER array, dimension( NPCOL, 4)
113 *
114 * NPCOL (global input) INTEGER
115 * The total number of columns over which the distributed
116 * submatrix is distributed.
117 *
118 * INDX (workspace) INTEGER array, dimension (N)
119 * The permutation used to sort the contents of DLAMDA into
120 * ascending order.
121 *
122 * INDXC (output) INTEGER array, dimension (N)
123 * The permutation used to arrange the columns of the deflated
124 * Q matrix into three groups: the first group contains non-zero
125 * elements only at and above N1, the second contains
126 * non-zero elements only below N1, and the third is dense.
127 *
128 * INDXP (workspace) INTEGER array, dimension (N)
129 * The permutation used to place deflated values of D at the end
130 * of the array. INDXP(1:K) points to the nondeflated D-values
131 * and INDXP(K+1:N) points to the deflated eigenvalues.
132 *
133 * INDCOL (workspace) INTEGER array, dimension (N)
134 *
135 * COLTYP (workspace/output) INTEGER array, dimension (N)
136 * During execution, a label which will indicate which of the
137 * following types a column in the Q2 matrix is:
138 * 1 : non-zero in the upper half only;
139 * 2 : dense;
140 * 3 : non-zero in the lower half only;
141 * 4 : deflated.
142 *
143 * NN (global output) INTEGER, the order of matrix U, (PSLAED1).
144 * NN1 (global output) INTEGER, the order of matrix Q1, (PSLAED1).
145 * NN2 (global output) INTEGER, the order of matrix Q2, (PSLAED1).
146 * IB1 (global output) INTEGER, pointeur on Q1, (PSLAED1).
147 * IB2 (global output) INTEGER, pointeur on Q2, (PSLAED1).
148 *
149 * =====================================================================
150 *
151 * .. Parameters ..
152  REAL MONE, ZERO, ONE, TWO, EIGHT
153  PARAMETER ( MONE = -1.0e0, zero = 0.0e0, one = 1.0e0,
154  $ two = 2.0e0, eight = 8.0e0 )
155 * ..
156 * .. Local Scalars ..
157  INTEGER COL, CT, I, IAM, IE1, IE2, IMAX, INFO, J, JJQ2,
158  $ JJS, JMAX, JS, K2, MYCOL, MYROW, N1P1, N2, NJ,
159  $ NJCOL, NJJ, NP, NPROCS, NPROW, PJ, PJCOL, PJJ
160  REAL C, EPS, S, T, TAU, TOL
161 * ..
162 * .. External Functions ..
163  INTEGER INDXG2L, INDXL2G, ISAMAX, NUMROC
164  REAL PSLAMCH, SLAPY2
165  EXTERNAL INDXG2L, INDXL2G, ISAMAX, NUMROC, PSLAMCH,
166  $ slapy2
167 * ..
168 * .. External Subroutines ..
169  EXTERNAL blacs_gridinfo, blacs_pinfo, infog1l, scopy,
170  $ sgerv2d, sgesd2d, slapst, srot, sscal
171 * ..
172 * .. Intrinsic Functions ..
173  INTRINSIC abs, max, min, mod, sqrt
174 * ..
175 * .. External Functions ..
176 * ..
177 * .. Local Arrays ..
178  INTEGER PTT( 4 )
179 * ..
180 * .. Executable Statements ..
181 *
182 * Quick return if possible
183 *
184  IF( n.EQ.0 )
185  $ RETURN
186 *
187  CALL blacs_pinfo( iam, nprocs )
188  CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
189  np = numroc( n, nb, myrow, drow, nprow )
190 *
191  n2 = n - n1
192  n1p1 = n1 + 1
193 *
194  IF( rho.LT.zero ) THEN
195  CALL sscal( n2, mone, z( n1p1 ), 1 )
196  END IF
197 *
198 * Normalize z so that norm(z) = 1. Since z is the concatenation of
199 * two normalized vectors, norm2(z) = sqrt(2).
200 *
201  t = one / sqrt( two )
202  CALL sscal( n, t, z, 1 )
203 *
204 * RHO = ABS( norm(z)**2 * RHO )
205 *
206  rho = abs( two*rho )
207 *
208 * Calculate the allowable deflation tolerance
209 *
210  imax = isamax( n, z, 1 )
211  jmax = isamax( n, d, 1 )
212  eps = pslamch( ictxt, 'Epsilon' )
213  tol = eight*eps*max( abs( d( jmax ) ), abs( z( imax ) ) )
214 *
215 * If the rank-1 modifier is small enough, no more needs to be done
216 * except to reorganize Q so that its columns correspond with the
217 * elements in D.
218 *
219  IF( rho*abs( z( imax ) ).LE.tol ) THEN
220  k = 0
221  GO TO 220
222  END IF
223 *
224 * If there are multiple eigenvalues then the problem deflates. Here
225 * the number of equal eigenvalues are found. As each equal
226 * eigenvalue is found, an elementary reflector is computed to rotate
227 * the corresponding eigensubspace so that the corresponding
228 * components of Z are zero in this new basis.
229 *
230 *
231  CALL slapst( 'I', n, d, indx, info )
232 *
233  DO 10 i = 1, n1
234  coltyp( i ) = 1
235  10 CONTINUE
236  DO 20 i = n1p1, n
237  coltyp( i ) = 3
238  20 CONTINUE
239  col = dcol
240  DO 40 i = 1, n, nb
241  DO 30 j = 0, nb - 1
242  IF( i+j.LE.n )
243  $ indcol( i+j ) = col
244  30 CONTINUE
245  col = mod( col+1, npcol )
246  40 CONTINUE
247 *
248  k = 0
249  k2 = n + 1
250  DO 50 j = 1, n
251  nj = indx( j )
252  IF( rho*abs( z( nj ) ).LE.tol ) THEN
253 *
254 * Deflate due to small z component.
255 *
256  k2 = k2 - 1
257  coltyp( nj ) = 4
258  indxp( k2 ) = nj
259  IF( j.EQ.n )
260  $ GO TO 80
261  ELSE
262  pj = nj
263  GO TO 60
264  END IF
265  50 CONTINUE
266  60 CONTINUE
267  j = j + 1
268  nj = indx( j )
269  IF( j.GT.n )
270  $ GO TO 80
271  IF( rho*abs( z( nj ) ).LE.tol ) THEN
272 *
273 * Deflate due to small z component.
274 *
275  k2 = k2 - 1
276  coltyp( nj ) = 4
277  indxp( k2 ) = nj
278  ELSE
279 *
280 * Check if eigenvalues are close enough to allow deflation.
281 *
282  s = z( pj )
283  c = z( nj )
284 *
285 * Find sqrt(a**2+b**2) without overflow or
286 * destructive underflow.
287 *
288  tau = slapy2( c, s )
289  t = d( nj ) - d( pj )
290  c = c / tau
291  s = -s / tau
292  IF( abs( t*c*s ).LE.tol ) THEN
293 *
294 * Deflation is possible.
295 *
296  z( nj ) = tau
297  z( pj ) = zero
298  IF( coltyp( nj ).NE.coltyp( pj ) )
299  $ coltyp( nj ) = 2
300  coltyp( pj ) = 4
301  CALL infog1l( nj, nb, npcol, mycol, dcol, njj, njcol )
302  CALL infog1l( pj, nb, npcol, mycol, dcol, pjj, pjcol )
303  IF( indcol( pj ).EQ.indcol( nj ) .AND. mycol.EQ.njcol ) THEN
304  CALL srot( np, q( 1, pjj ), 1, q( 1, njj ), 1, c, s )
305  ELSE IF( mycol.EQ.pjcol ) THEN
306  CALL sgesd2d( ictxt, np, 1, q( 1, pjj ), np, myrow,
307  $ njcol )
308  CALL sgerv2d( ictxt, np, 1, qbuf, np, myrow, njcol )
309  CALL srot( np, q( 1, pjj ), 1, qbuf, 1, c, s )
310  ELSE IF( mycol.EQ.njcol ) THEN
311  CALL sgesd2d( ictxt, np, 1, q( 1, njj ), np, myrow,
312  $ pjcol )
313  CALL sgerv2d( ictxt, np, 1, qbuf, np, myrow, pjcol )
314  CALL srot( np, qbuf, 1, q( 1, njj ), 1, c, s )
315  END IF
316  t = d( pj )*c**2 + d( nj )*s**2
317  d( nj ) = d( pj )*s**2 + d( nj )*c**2
318  d( pj ) = t
319  k2 = k2 - 1
320  i = 1
321  70 CONTINUE
322  IF( k2+i.LE.n ) THEN
323  IF( d( pj ).LT.d( indxp( k2+i ) ) ) THEN
324  indxp( k2+i-1 ) = indxp( k2+i )
325  indxp( k2+i ) = pj
326  i = i + 1
327  GO TO 70
328  ELSE
329  indxp( k2+i-1 ) = pj
330  END IF
331  ELSE
332  indxp( k2+i-1 ) = pj
333  END IF
334  pj = nj
335  ELSE
336  k = k + 1
337  dlamda( k ) = d( pj )
338  w( k ) = z( pj )
339  indxp( k ) = pj
340  pj = nj
341  END IF
342  END IF
343  GO TO 60
344  80 CONTINUE
345 *
346 * Record the last eigenvalue.
347 *
348  k = k + 1
349  dlamda( k ) = d( pj )
350  w( k ) = z( pj )
351  indxp( k ) = pj
352 *
353 * Count up the total number of the various types of columns, then
354 * form a permutation which positions the four column types into
355 * four uniform groups (although one or more of these groups may be
356 * empty).
357 *
358  DO 100 j = 1, 4
359  DO 90 i = 0, npcol - 1
360  ctot( i, j ) = 0
361  90 CONTINUE
362  ptt( j ) = 0
363  100 CONTINUE
364  DO 110 j = 1, n
365  ct = coltyp( j )
366  col = indcol( j )
367  ctot( col, ct ) = ctot( col, ct ) + 1
368  110 CONTINUE
369 *
370 * PSM(*) = Position in SubMatrix (of types 1 through 4)
371 *
372  DO 120 col = 0, npcol - 1
373  psm( col, 1 ) = 1
374  psm( col, 2 ) = 1 + ctot( col, 1 )
375  psm( col, 3 ) = psm( col, 2 ) + ctot( col, 2 )
376  psm( col, 4 ) = psm( col, 3 ) + ctot( col, 3 )
377  120 CONTINUE
378  ptt( 1 ) = 1
379  DO 140 i = 2, 4
380  ct = 0
381  DO 130 j = 0, npcol - 1
382  ct = ct + ctot( j, i-1 )
383  130 CONTINUE
384  ptt( i ) = ptt( i-1 ) + ct
385  140 CONTINUE
386 *
387 * Fill out the INDXC array so that the permutation which it induces
388 * will place all type-1 columns first, all type-2 columns next,
389 * then all type-3's, and finally all type-4's.
390 *
391  DO 150 j = 1, n
392  js = indxp( j )
393  col = indcol( js )
394  ct = coltyp( js )
395  i = indxl2g( psm( col, ct ), nb, col, dcol, npcol )
396  indx( j ) = i
397  indxc( ptt( ct ) ) = i
398  psm( col, ct ) = psm( col, ct ) + 1
399  ptt( ct ) = ptt( ct ) + 1
400  150 CONTINUE
401  DO 160 j = 1, n
402  js = indxp( j )
403  jjs = indxg2l( js, nb, j, j, npcol )
404  col = indcol( js )
405  IF( col.EQ.mycol ) THEN
406  i = indx( j )
407  jjq2 = indxg2l( i, nb, j, j, npcol )
408  CALL scopy( np, q( 1, jjs ), 1, q2( 1, jjq2 ), 1 )
409  END IF
410  160 CONTINUE
411 *
412 *
413 * The deflated eigenvalues and their corresponding vectors go back
414 * into the last N - K slots of D and Q respectively.
415 *
416  CALL scopy( n, d, 1, z, 1 )
417  DO 170 j = k + 1, n
418  js = indxp( j )
419  i = indx( j )
420  d( i ) = z( js )
421  170 CONTINUE
422 *
423  ptt( 1 ) = 1
424  DO 190 i = 2, 4
425  ct = 0
426  DO 180 j = 0, npcol - 1
427  ct = ct + ctot( j, i-1 )
428  180 CONTINUE
429  ptt( i ) = ptt( i-1 ) + ct
430  190 CONTINUE
431 *
432 *
433  ib1 = indxc( 1 )
434  ie1 = ib1
435  ib2 = indxc( ptt( 2 ) )
436  ie2 = ib2
437  DO 200 i = 2, ptt( 3 ) - 1
438  ib1 = min( ib1, indxc( i ) )
439  ie1 = max( ie1, indxc( i ) )
440  200 CONTINUE
441  DO 210 i = ptt( 2 ), ptt( 4 ) - 1
442  ib2 = min( ib2, indxc( i ) )
443  ie2 = max( ie2, indxc( i ) )
444  210 CONTINUE
445  nn1 = ie1 - ib1 + 1
446  nn2 = ie2 - ib2 + 1
447  nn = max( ie1, ie2 ) - min( ib1, ib2 ) + 1
448  220 CONTINUE
449  RETURN
450 *
451 * End of PSLAED2
452 *
453  END
max
#define max(A, B)
Definition: pcgemr.c:180
infog1l
subroutine infog1l(GINDX, NB, NPROCS, MYROC, ISRCPROC, LINDX, ROCSRC)
Definition: infog1l.f:3
slapst
subroutine slapst(ID, N, D, INDX, INFO)
Definition: slapst.f:2
min
#define min(A, B)
Definition: pcgemr.c:181
pslaed2
subroutine pslaed2(ICTXT, K, N, N1, NB, D, DROW, DCOL, Q, LDQ, RHO, Z, W, DLAMDA, Q2, LDQ2, QBUF, CTOT, PSM, NPCOL, INDX, INDXC, INDXP, INDCOL, COLTYP, NN, NN1, NN2, IB1, IB2)
Definition: pslaed2.f:5