LAPACK  3.4.2
LAPACK: Linear Algebra PACKage
 All Files Functions Groups
slasd2.f
Go to the documentation of this file.
1 *> \brief \b SLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SLASD2 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slasd2.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slasd2.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slasd2.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
22 * LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
23 * IDXC, IDXQ, COLTYP, INFO )
24 *
25 * .. Scalar Arguments ..
26 * INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
27 * REAL ALPHA, BETA
28 * ..
29 * .. Array Arguments ..
30 * INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
31 * $ IDXQ( * )
32 * REAL D( * ), DSIGMA( * ), U( LDU, * ),
33 * $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
34 * $ Z( * )
35 * ..
36 *
37 *
38 *> \par Purpose:
39 * =============
40 *>
41 *> \verbatim
42 *>
43 *> SLASD2 merges the two sets of singular values together into a single
44 *> sorted set. Then it tries to deflate the size of the problem.
45 *> There are two ways in which deflation can occur: when two or more
46 *> singular values are close together or if there is a tiny entry in the
47 *> Z vector. For each such occurrence the order of the related secular
48 *> equation problem is reduced by one.
49 *>
50 *> SLASD2 is called from SLASD1.
51 *> \endverbatim
52 *
53 * Arguments:
54 * ==========
55 *
56 *> \param[in] NL
57 *> \verbatim
58 *> NL is INTEGER
59 *> The row dimension of the upper block. NL >= 1.
60 *> \endverbatim
61 *>
62 *> \param[in] NR
63 *> \verbatim
64 *> NR is INTEGER
65 *> The row dimension of the lower block. NR >= 1.
66 *> \endverbatim
67 *>
68 *> \param[in] SQRE
69 *> \verbatim
70 *> SQRE is INTEGER
71 *> = 0: the lower block is an NR-by-NR square matrix.
72 *> = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
73 *>
74 *> The bidiagonal matrix has N = NL + NR + 1 rows and
75 *> M = N + SQRE >= N columns.
76 *> \endverbatim
77 *>
78 *> \param[out] K
79 *> \verbatim
80 *> K is INTEGER
81 *> Contains the dimension of the non-deflated matrix,
82 *> This is the order of the related secular equation. 1 <= K <=N.
83 *> \endverbatim
84 *>
85 *> \param[in,out] D
86 *> \verbatim
87 *> D is REAL array, dimension (N)
88 *> On entry D contains the singular values of the two submatrices
89 *> to be combined. On exit D contains the trailing (N-K) updated
90 *> singular values (those which were deflated) sorted into
91 *> increasing order.
92 *> \endverbatim
93 *>
94 *> \param[out] Z
95 *> \verbatim
96 *> Z is REAL array, dimension (N)
97 *> On exit Z contains the updating row vector in the secular
98 *> equation.
99 *> \endverbatim
100 *>
101 *> \param[in] ALPHA
102 *> \verbatim
103 *> ALPHA is REAL
104 *> Contains the diagonal element associated with the added row.
105 *> \endverbatim
106 *>
107 *> \param[in] BETA
108 *> \verbatim
109 *> BETA is REAL
110 *> Contains the off-diagonal element associated with the added
111 *> row.
112 *> \endverbatim
113 *>
114 *> \param[in,out] U
115 *> \verbatim
116 *> U is REAL array, dimension (LDU,N)
117 *> On entry U contains the left singular vectors of two
118 *> submatrices in the two square blocks with corners at (1,1),
119 *> (NL, NL), and (NL+2, NL+2), (N,N).
120 *> On exit U contains the trailing (N-K) updated left singular
121 *> vectors (those which were deflated) in its last N-K columns.
122 *> \endverbatim
123 *>
124 *> \param[in] LDU
125 *> \verbatim
126 *> LDU is INTEGER
127 *> The leading dimension of the array U. LDU >= N.
128 *> \endverbatim
129 *>
130 *> \param[in,out] VT
131 *> \verbatim
132 *> VT is REAL array, dimension (LDVT,M)
133 *> On entry VT**T contains the right singular vectors of two
134 *> submatrices in the two square blocks with corners at (1,1),
135 *> (NL+1, NL+1), and (NL+2, NL+2), (M,M).
136 *> On exit VT**T contains the trailing (N-K) updated right singular
137 *> vectors (those which were deflated) in its last N-K columns.
138 *> In case SQRE =1, the last row of VT spans the right null
139 *> space.
140 *> \endverbatim
141 *>
142 *> \param[in] LDVT
143 *> \verbatim
144 *> LDVT is INTEGER
145 *> The leading dimension of the array VT. LDVT >= M.
146 *> \endverbatim
147 *>
148 *> \param[out] DSIGMA
149 *> \verbatim
150 *> DSIGMA is REAL array, dimension (N)
151 *> Contains a copy of the diagonal elements (K-1 singular values
152 *> and one zero) in the secular equation.
153 *> \endverbatim
154 *>
155 *> \param[out] U2
156 *> \verbatim
157 *> U2 is REAL array, dimension (LDU2,N)
158 *> Contains a copy of the first K-1 left singular vectors which
159 *> will be used by SLASD3 in a matrix multiply (SGEMM) to solve
160 *> for the new left singular vectors. U2 is arranged into four
161 *> blocks. The first block contains a column with 1 at NL+1 and
162 *> zero everywhere else; the second block contains non-zero
163 *> entries only at and above NL; the third contains non-zero
164 *> entries only below NL+1; and the fourth is dense.
165 *> \endverbatim
166 *>
167 *> \param[in] LDU2
168 *> \verbatim
169 *> LDU2 is INTEGER
170 *> The leading dimension of the array U2. LDU2 >= N.
171 *> \endverbatim
172 *>
173 *> \param[out] VT2
174 *> \verbatim
175 *> VT2 is REAL array, dimension (LDVT2,N)
176 *> VT2**T contains a copy of the first K right singular vectors
177 *> which will be used by SLASD3 in a matrix multiply (SGEMM) to
178 *> solve for the new right singular vectors. VT2 is arranged into
179 *> three blocks. The first block contains a row that corresponds
180 *> to the special 0 diagonal element in SIGMA; the second block
181 *> contains non-zeros only at and before NL +1; the third block
182 *> contains non-zeros only at and after NL +2.
183 *> \endverbatim
184 *>
185 *> \param[in] LDVT2
186 *> \verbatim
187 *> LDVT2 is INTEGER
188 *> The leading dimension of the array VT2. LDVT2 >= M.
189 *> \endverbatim
190 *>
191 *> \param[out] IDXP
192 *> \verbatim
193 *> IDXP is INTEGER array, dimension (N)
194 *> This will contain the permutation used to place deflated
195 *> values of D at the end of the array. On output IDXP(2:K)
196 *> points to the nondeflated D-values and IDXP(K+1:N)
197 *> points to the deflated singular values.
198 *> \endverbatim
199 *>
200 *> \param[out] IDX
201 *> \verbatim
202 *> IDX is INTEGER array, dimension (N)
203 *> This will contain the permutation used to sort the contents of
204 *> D into ascending order.
205 *> \endverbatim
206 *>
207 *> \param[out] IDXC
208 *> \verbatim
209 *> IDXC is INTEGER array, dimension (N)
210 *> This will contain the permutation used to arrange the columns
211 *> of the deflated U matrix into three groups: the first group
212 *> contains non-zero entries only at and above NL, the second
213 *> contains non-zero entries only below NL+2, and the third is
214 *> dense.
215 *> \endverbatim
216 *>
217 *> \param[in,out] IDXQ
218 *> \verbatim
219 *> IDXQ is INTEGER array, dimension (N)
220 *> This contains the permutation which separately sorts the two
221 *> sub-problems in D into ascending order. Note that entries in
222 *> the first hlaf of this permutation must first be moved one
223 *> position backward; and entries in the second half
224 *> must first have NL+1 added to their values.
225 *> \endverbatim
226 *>
227 *> \param[out] COLTYP
228 *> \verbatim
229 *> COLTYP is INTEGER array, dimension (N)
230 *> As workspace, this will contain a label which will indicate
231 *> which of the following types a column in the U2 matrix or a
232 *> row in the VT2 matrix is:
233 *> 1 : non-zero in the upper half only
234 *> 2 : non-zero in the lower half only
235 *> 3 : dense
236 *> 4 : deflated
237 *>
238 *> On exit, it is an array of dimension 4, with COLTYP(I) being
239 *> the dimension of the I-th type columns.
240 *> \endverbatim
241 *>
242 *> \param[out] INFO
243 *> \verbatim
244 *> INFO is INTEGER
245 *> = 0: successful exit.
246 *> < 0: if INFO = -i, the i-th argument had an illegal value.
247 *> \endverbatim
248 *
249 * Authors:
250 * ========
251 *
252 *> \author Univ. of Tennessee
253 *> \author Univ. of California Berkeley
254 *> \author Univ. of Colorado Denver
255 *> \author NAG Ltd.
256 *
257 *> \date September 2012
258 *
259 *> \ingroup auxOTHERauxiliary
260 *
261 *> \par Contributors:
262 * ==================
263 *>
264 *> Ming Gu and Huan Ren, Computer Science Division, University of
265 *> California at Berkeley, USA
266 *>
267 * =====================================================================
268  SUBROUTINE slasd2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
269  $ ldvt, dsigma, u2, ldu2, vt2, ldvt2, idxp, idx,
270  $ idxc, idxq, coltyp, info )
271 *
272 * -- LAPACK auxiliary routine (version 3.4.2) --
273 * -- LAPACK is a software package provided by Univ. of Tennessee, --
274 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
275 * September 2012
276 *
277 * .. Scalar Arguments ..
278  INTEGER info, k, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre
279  REAL alpha, beta
280 * ..
281 * .. Array Arguments ..
282  INTEGER coltyp( * ), idx( * ), idxc( * ), idxp( * ),
283  $ idxq( * )
284  REAL d( * ), dsigma( * ), u( ldu, * ),
285  $ u2( ldu2, * ), vt( ldvt, * ), vt2( ldvt2, * ),
286  $ z( * )
287 * ..
288 *
289 * =====================================================================
290 *
291 * .. Parameters ..
292  REAL zero, one, two, eight
293  parameter( zero = 0.0e+0, one = 1.0e+0, two = 2.0e+0,
294  $ eight = 8.0e+0 )
295 * ..
296 * .. Local Arrays ..
297  INTEGER ctot( 4 ), psm( 4 )
298 * ..
299 * .. Local Scalars ..
300  INTEGER ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m,
301  $ n, nlp1, nlp2
302  REAL c, eps, hlftol, s, tau, tol, z1
303 * ..
304 * .. External Functions ..
305  REAL slamch, slapy2
306  EXTERNAL slamch, slapy2
307 * ..
308 * .. External Subroutines ..
309  EXTERNAL scopy, slacpy, slamrg, slaset, srot, xerbla
310 * ..
311 * .. Intrinsic Functions ..
312  INTRINSIC abs, max
313 * ..
314 * .. Executable Statements ..
315 *
316 * Test the input parameters.
317 *
318  info = 0
319 *
320  IF( nl.LT.1 ) THEN
321  info = -1
322  ELSE IF( nr.LT.1 ) THEN
323  info = -2
324  ELSE IF( ( sqre.NE.1 ) .AND. ( sqre.NE.0 ) ) THEN
325  info = -3
326  END IF
327 *
328  n = nl + nr + 1
329  m = n + sqre
330 *
331  IF( ldu.LT.n ) THEN
332  info = -10
333  ELSE IF( ldvt.LT.m ) THEN
334  info = -12
335  ELSE IF( ldu2.LT.n ) THEN
336  info = -15
337  ELSE IF( ldvt2.LT.m ) THEN
338  info = -17
339  END IF
340  IF( info.NE.0 ) THEN
341  CALL xerbla( 'SLASD2', -info )
342  return
343  END IF
344 *
345  nlp1 = nl + 1
346  nlp2 = nl + 2
347 *
348 * Generate the first part of the vector Z; and move the singular
349 * values in the first part of D one position backward.
350 *
351  z1 = alpha*vt( nlp1, nlp1 )
352  z( 1 ) = z1
353  DO 10 i = nl, 1, -1
354  z( i+1 ) = alpha*vt( i, nlp1 )
355  d( i+1 ) = d( i )
356  idxq( i+1 ) = idxq( i ) + 1
357  10 continue
358 *
359 * Generate the second part of the vector Z.
360 *
361  DO 20 i = nlp2, m
362  z( i ) = beta*vt( i, nlp2 )
363  20 continue
364 *
365 * Initialize some reference arrays.
366 *
367  DO 30 i = 2, nlp1
368  coltyp( i ) = 1
369  30 continue
370  DO 40 i = nlp2, n
371  coltyp( i ) = 2
372  40 continue
373 *
374 * Sort the singular values into increasing order
375 *
376  DO 50 i = nlp2, n
377  idxq( i ) = idxq( i ) + nlp1
378  50 continue
379 *
380 * DSIGMA, IDXC, IDXC, and the first column of U2
381 * are used as storage space.
382 *
383  DO 60 i = 2, n
384  dsigma( i ) = d( idxq( i ) )
385  u2( i, 1 ) = z( idxq( i ) )
386  idxc( i ) = coltyp( idxq( i ) )
387  60 continue
388 *
389  CALL slamrg( nl, nr, dsigma( 2 ), 1, 1, idx( 2 ) )
390 *
391  DO 70 i = 2, n
392  idxi = 1 + idx( i )
393  d( i ) = dsigma( idxi )
394  z( i ) = u2( idxi, 1 )
395  coltyp( i ) = idxc( idxi )
396  70 continue
397 *
398 * Calculate the allowable deflation tolerance
399 *
400  eps = slamch( 'Epsilon' )
401  tol = max( abs( alpha ), abs( beta ) )
402  tol = eight*eps*max( abs( d( n ) ), tol )
403 *
404 * There are 2 kinds of deflation -- first a value in the z-vector
405 * is small, second two (or more) singular values are very close
406 * together (their difference is small).
407 *
408 * If the value in the z-vector is small, we simply permute the
409 * array so that the corresponding singular value is moved to the
410 * end.
411 *
412 * If two values in the D-vector are close, we perform a two-sided
413 * rotation designed to make one of the corresponding z-vector
414 * entries zero, and then permute the array so that the deflated
415 * singular value is moved to the end.
416 *
417 * If there are multiple singular values then the problem deflates.
418 * Here the number of equal singular values are found. As each equal
419 * singular value is found, an elementary reflector is computed to
420 * rotate the corresponding singular subspace so that the
421 * corresponding components of Z are zero in this new basis.
422 *
423  k = 1
424  k2 = n + 1
425  DO 80 j = 2, n
426  IF( abs( z( j ) ).LE.tol ) THEN
427 *
428 * Deflate due to small z component.
429 *
430  k2 = k2 - 1
431  idxp( k2 ) = j
432  coltyp( j ) = 4
433  IF( j.EQ.n )
434  $ go to 120
435  ELSE
436  jprev = j
437  go to 90
438  END IF
439  80 continue
440  90 continue
441  j = jprev
442  100 continue
443  j = j + 1
444  IF( j.GT.n )
445  $ go to 110
446  IF( abs( z( j ) ).LE.tol ) THEN
447 *
448 * Deflate due to small z component.
449 *
450  k2 = k2 - 1
451  idxp( k2 ) = j
452  coltyp( j ) = 4
453  ELSE
454 *
455 * Check if singular values are close enough to allow deflation.
456 *
457  IF( abs( d( j )-d( jprev ) ).LE.tol ) THEN
458 *
459 * Deflation is possible.
460 *
461  s = z( jprev )
462  c = z( j )
463 *
464 * Find sqrt(a**2+b**2) without overflow or
465 * destructive underflow.
466 *
467  tau = slapy2( c, s )
468  c = c / tau
469  s = -s / tau
470  z( j ) = tau
471  z( jprev ) = zero
472 *
473 * Apply back the Givens rotation to the left and right
474 * singular vector matrices.
475 *
476  idxjp = idxq( idx( jprev )+1 )
477  idxj = idxq( idx( j )+1 )
478  IF( idxjp.LE.nlp1 ) THEN
479  idxjp = idxjp - 1
480  END IF
481  IF( idxj.LE.nlp1 ) THEN
482  idxj = idxj - 1
483  END IF
484  CALL srot( n, u( 1, idxjp ), 1, u( 1, idxj ), 1, c, s )
485  CALL srot( m, vt( idxjp, 1 ), ldvt, vt( idxj, 1 ), ldvt, c,
486  $ s )
487  IF( coltyp( j ).NE.coltyp( jprev ) ) THEN
488  coltyp( j ) = 3
489  END IF
490  coltyp( jprev ) = 4
491  k2 = k2 - 1
492  idxp( k2 ) = jprev
493  jprev = j
494  ELSE
495  k = k + 1
496  u2( k, 1 ) = z( jprev )
497  dsigma( k ) = d( jprev )
498  idxp( k ) = jprev
499  jprev = j
500  END IF
501  END IF
502  go to 100
503  110 continue
504 *
505 * Record the last singular value.
506 *
507  k = k + 1
508  u2( k, 1 ) = z( jprev )
509  dsigma( k ) = d( jprev )
510  idxp( k ) = jprev
511 *
512  120 continue
513 *
514 * Count up the total number of the various types of columns, then
515 * form a permutation which positions the four column types into
516 * four groups of uniform structure (although one or more of these
517 * groups may be empty).
518 *
519  DO 130 j = 1, 4
520  ctot( j ) = 0
521  130 continue
522  DO 140 j = 2, n
523  ct = coltyp( j )
524  ctot( ct ) = ctot( ct ) + 1
525  140 continue
526 *
527 * PSM(*) = Position in SubMatrix (of types 1 through 4)
528 *
529  psm( 1 ) = 2
530  psm( 2 ) = 2 + ctot( 1 )
531  psm( 3 ) = psm( 2 ) + ctot( 2 )
532  psm( 4 ) = psm( 3 ) + ctot( 3 )
533 *
534 * Fill out the IDXC array so that the permutation which it induces
535 * will place all type-1 columns first, all type-2 columns next,
536 * then all type-3's, and finally all type-4's, starting from the
537 * second column. This applies similarly to the rows of VT.
538 *
539  DO 150 j = 2, n
540  jp = idxp( j )
541  ct = coltyp( jp )
542  idxc( psm( ct ) ) = j
543  psm( ct ) = psm( ct ) + 1
544  150 continue
545 *
546 * Sort the singular values and corresponding singular vectors into
547 * DSIGMA, U2, and VT2 respectively. The singular values/vectors
548 * which were not deflated go into the first K slots of DSIGMA, U2,
549 * and VT2 respectively, while those which were deflated go into the
550 * last N - K slots, except that the first column/row will be treated
551 * separately.
552 *
553  DO 160 j = 2, n
554  jp = idxp( j )
555  dsigma( j ) = d( jp )
556  idxj = idxq( idx( idxp( idxc( j ) ) )+1 )
557  IF( idxj.LE.nlp1 ) THEN
558  idxj = idxj - 1
559  END IF
560  CALL scopy( n, u( 1, idxj ), 1, u2( 1, j ), 1 )
561  CALL scopy( m, vt( idxj, 1 ), ldvt, vt2( j, 1 ), ldvt2 )
562  160 continue
563 *
564 * Determine DSIGMA(1), DSIGMA(2) and Z(1)
565 *
566  dsigma( 1 ) = zero
567  hlftol = tol / two
568  IF( abs( dsigma( 2 ) ).LE.hlftol )
569  $ dsigma( 2 ) = hlftol
570  IF( m.GT.n ) THEN
571  z( 1 ) = slapy2( z1, z( m ) )
572  IF( z( 1 ).LE.tol ) THEN
573  c = one
574  s = zero
575  z( 1 ) = tol
576  ELSE
577  c = z1 / z( 1 )
578  s = z( m ) / z( 1 )
579  END IF
580  ELSE
581  IF( abs( z1 ).LE.tol ) THEN
582  z( 1 ) = tol
583  ELSE
584  z( 1 ) = z1
585  END IF
586  END IF
587 *
588 * Move the rest of the updating row to Z.
589 *
590  CALL scopy( k-1, u2( 2, 1 ), 1, z( 2 ), 1 )
591 *
592 * Determine the first column of U2, the first row of VT2 and the
593 * last row of VT.
594 *
595  CALL slaset( 'A', n, 1, zero, zero, u2, ldu2 )
596  u2( nlp1, 1 ) = one
597  IF( m.GT.n ) THEN
598  DO 170 i = 1, nlp1
599  vt( m, i ) = -s*vt( nlp1, i )
600  vt2( 1, i ) = c*vt( nlp1, i )
601  170 continue
602  DO 180 i = nlp2, m
603  vt2( 1, i ) = s*vt( m, i )
604  vt( m, i ) = c*vt( m, i )
605  180 continue
606  ELSE
607  CALL scopy( m, vt( nlp1, 1 ), ldvt, vt2( 1, 1 ), ldvt2 )
608  END IF
609  IF( m.GT.n ) THEN
610  CALL scopy( m, vt( m, 1 ), ldvt, vt2( m, 1 ), ldvt2 )
611  END IF
612 *
613 * The deflated singular values and their corresponding vectors go
614 * into the back of D, U, and V respectively.
615 *
616  IF( n.GT.k ) THEN
617  CALL scopy( n-k, dsigma( k+1 ), 1, d( k+1 ), 1 )
618  CALL slacpy( 'A', n, n-k, u2( 1, k+1 ), ldu2, u( 1, k+1 ),
619  $ ldu )
620  CALL slacpy( 'A', n-k, m, vt2( k+1, 1 ), ldvt2, vt( k+1, 1 ),
621  $ ldvt )
622  END IF
623 *
624 * Copy CTOT into COLTYP for referencing in SLASD3.
625 *
626  DO 190 j = 1, 4
627  coltyp( j ) = ctot( j )
628  190 continue
629 *
630  return
631 *
632 * End of SLASD2
633 *
634  END