LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
slasd2.f
Go to the documentation of this file.
1*> \brief \b SLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SLASD2 + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slasd2.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slasd2.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slasd2.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
22* LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
23* IDXC, IDXQ, COLTYP, INFO )
24*
25* .. Scalar Arguments ..
26* INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
27* REAL ALPHA, BETA
28* ..
29* .. Array Arguments ..
30* INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
31* $ IDXQ( * )
32* REAL D( * ), DSIGMA( * ), U( LDU, * ),
33* $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
34* $ Z( * )
35* ..
36*
37*
38*> \par Purpose:
39* =============
40*>
41*> \verbatim
42*>
43*> SLASD2 merges the two sets of singular values together into a single
44*> sorted set. Then it tries to deflate the size of the problem.
45*> There are two ways in which deflation can occur: when two or more
46*> singular values are close together or if there is a tiny entry in the
47*> Z vector. For each such occurrence the order of the related secular
48*> equation problem is reduced by one.
49*>
50*> SLASD2 is called from SLASD1.
51*> \endverbatim
52*
53* Arguments:
54* ==========
55*
56*> \param[in] NL
57*> \verbatim
58*> NL is INTEGER
59*> The row dimension of the upper block. NL >= 1.
60*> \endverbatim
61*>
62*> \param[in] NR
63*> \verbatim
64*> NR is INTEGER
65*> The row dimension of the lower block. NR >= 1.
66*> \endverbatim
67*>
68*> \param[in] SQRE
69*> \verbatim
70*> SQRE is INTEGER
71*> = 0: the lower block is an NR-by-NR square matrix.
72*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
73*>
74*> The bidiagonal matrix has N = NL + NR + 1 rows and
75*> M = N + SQRE >= N columns.
76*> \endverbatim
77*>
78*> \param[out] K
79*> \verbatim
80*> K is INTEGER
81*> Contains the dimension of the non-deflated matrix,
82*> This is the order of the related secular equation. 1 <= K <=N.
83*> \endverbatim
84*>
85*> \param[in,out] D
86*> \verbatim
87*> D is REAL array, dimension (N)
88*> On entry D contains the singular values of the two submatrices
89*> to be combined. On exit D contains the trailing (N-K) updated
90*> singular values (those which were deflated) sorted into
91*> increasing order.
92*> \endverbatim
93*>
94*> \param[out] Z
95*> \verbatim
96*> Z is REAL array, dimension (N)
97*> On exit Z contains the updating row vector in the secular
98*> equation.
99*> \endverbatim
100*>
101*> \param[in] ALPHA
102*> \verbatim
103*> ALPHA is REAL
104*> Contains the diagonal element associated with the added row.
105*> \endverbatim
106*>
107*> \param[in] BETA
108*> \verbatim
109*> BETA is REAL
110*> Contains the off-diagonal element associated with the added
111*> row.
112*> \endverbatim
113*>
114*> \param[in,out] U
115*> \verbatim
116*> U is REAL array, dimension (LDU,N)
117*> On entry U contains the left singular vectors of two
118*> submatrices in the two square blocks with corners at (1,1),
119*> (NL, NL), and (NL+2, NL+2), (N,N).
120*> On exit U contains the trailing (N-K) updated left singular
121*> vectors (those which were deflated) in its last N-K columns.
122*> \endverbatim
123*>
124*> \param[in] LDU
125*> \verbatim
126*> LDU is INTEGER
127*> The leading dimension of the array U. LDU >= N.
128*> \endverbatim
129*>
130*> \param[in,out] VT
131*> \verbatim
132*> VT is REAL array, dimension (LDVT,M)
133*> On entry VT**T contains the right singular vectors of two
134*> submatrices in the two square blocks with corners at (1,1),
135*> (NL+1, NL+1), and (NL+2, NL+2), (M,M).
136*> On exit VT**T contains the trailing (N-K) updated right singular
137*> vectors (those which were deflated) in its last N-K columns.
138*> In case SQRE =1, the last row of VT spans the right null
139*> space.
140*> \endverbatim
141*>
142*> \param[in] LDVT
143*> \verbatim
144*> LDVT is INTEGER
145*> The leading dimension of the array VT. LDVT >= M.
146*> \endverbatim
147*>
148*> \param[out] DSIGMA
149*> \verbatim
150*> DSIGMA is REAL array, dimension (N)
151*> Contains a copy of the diagonal elements (K-1 singular values
152*> and one zero) in the secular equation.
153*> \endverbatim
154*>
155*> \param[out] U2
156*> \verbatim
157*> U2 is REAL array, dimension (LDU2,N)
158*> Contains a copy of the first K-1 left singular vectors which
159*> will be used by SLASD3 in a matrix multiply (SGEMM) to solve
160*> for the new left singular vectors. U2 is arranged into four
161*> blocks. The first block contains a column with 1 at NL+1 and
162*> zero everywhere else; the second block contains non-zero
163*> entries only at and above NL; the third contains non-zero
164*> entries only below NL+1; and the fourth is dense.
165*> \endverbatim
166*>
167*> \param[in] LDU2
168*> \verbatim
169*> LDU2 is INTEGER
170*> The leading dimension of the array U2. LDU2 >= N.
171*> \endverbatim
172*>
173*> \param[out] VT2
174*> \verbatim
175*> VT2 is REAL array, dimension (LDVT2,N)
176*> VT2**T contains a copy of the first K right singular vectors
177*> which will be used by SLASD3 in a matrix multiply (SGEMM) to
178*> solve for the new right singular vectors. VT2 is arranged into
179*> three blocks. The first block contains a row that corresponds
180*> to the special 0 diagonal element in SIGMA; the second block
181*> contains non-zeros only at and before NL +1; the third block
182*> contains non-zeros only at and after NL +2.
183*> \endverbatim
184*>
185*> \param[in] LDVT2
186*> \verbatim
187*> LDVT2 is INTEGER
188*> The leading dimension of the array VT2. LDVT2 >= M.
189*> \endverbatim
190*>
191*> \param[out] IDXP
192*> \verbatim
193*> IDXP is INTEGER array, dimension (N)
194*> This will contain the permutation used to place deflated
195*> values of D at the end of the array. On output IDXP(2:K)
196*> points to the nondeflated D-values and IDXP(K+1:N)
197*> points to the deflated singular values.
198*> \endverbatim
199*>
200*> \param[out] IDX
201*> \verbatim
202*> IDX is INTEGER array, dimension (N)
203*> This will contain the permutation used to sort the contents of
204*> D into ascending order.
205*> \endverbatim
206*>
207*> \param[out] IDXC
208*> \verbatim
209*> IDXC is INTEGER array, dimension (N)
210*> This will contain the permutation used to arrange the columns
211*> of the deflated U matrix into three groups: the first group
212*> contains non-zero entries only at and above NL, the second
213*> contains non-zero entries only below NL+2, and the third is
214*> dense.
215*> \endverbatim
216*>
217*> \param[in,out] IDXQ
218*> \verbatim
219*> IDXQ is INTEGER array, dimension (N)
220*> This contains the permutation which separately sorts the two
221*> sub-problems in D into ascending order. Note that entries in
222*> the first hlaf of this permutation must first be moved one
223*> position backward; and entries in the second half
224*> must first have NL+1 added to their values.
225*> \endverbatim
226*>
227*> \param[out] COLTYP
228*> \verbatim
229*> COLTYP is INTEGER array, dimension (N)
230*> As workspace, this will contain a label which will indicate
231*> which of the following types a column in the U2 matrix or a
232*> row in the VT2 matrix is:
233*> 1 : non-zero in the upper half only
234*> 2 : non-zero in the lower half only
235*> 3 : dense
236*> 4 : deflated
237*>
238*> On exit, it is an array of dimension 4, with COLTYP(I) being
239*> the dimension of the I-th type columns.
240*> \endverbatim
241*>
242*> \param[out] INFO
243*> \verbatim
244*> INFO is INTEGER
245*> = 0: successful exit.
246*> < 0: if INFO = -i, the i-th argument had an illegal value.
247*> \endverbatim
248*
249* Authors:
250* ========
251*
252*> \author Univ. of Tennessee
253*> \author Univ. of California Berkeley
254*> \author Univ. of Colorado Denver
255*> \author NAG Ltd.
256*
257*> \ingroup lasd2
258*
259*> \par Contributors:
260* ==================
261*>
262*> Ming Gu and Huan Ren, Computer Science Division, University of
263*> California at Berkeley, USA
264*>
265* =====================================================================
266 SUBROUTINE slasd2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
267 $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
268 $ IDXC, IDXQ, COLTYP, INFO )
269*
270* -- LAPACK auxiliary routine --
271* -- LAPACK is a software package provided by Univ. of Tennessee, --
272* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
273*
274* .. Scalar Arguments ..
275 INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
276 REAL ALPHA, BETA
277* ..
278* .. Array Arguments ..
279 INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
280 $ IDXQ( * )
281 REAL D( * ), DSIGMA( * ), U( LDU, * ),
282 $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
283 $ z( * )
284* ..
285*
286* =====================================================================
287*
288* .. Parameters ..
289 REAL ZERO, ONE, TWO, EIGHT
290 PARAMETER ( ZERO = 0.0e+0, one = 1.0e+0, two = 2.0e+0,
291 $ eight = 8.0e+0 )
292* ..
293* .. Local Arrays ..
294 INTEGER CTOT( 4 ), PSM( 4 )
295* ..
296* .. Local Scalars ..
297 INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
298 $ N, NLP1, NLP2
299 REAL C, EPS, HLFTOL, S, TAU, TOL, Z1
300* ..
301* .. External Functions ..
302 REAL SLAMCH, SLAPY2
303 EXTERNAL SLAMCH, SLAPY2
304* ..
305* .. External Subroutines ..
306 EXTERNAL scopy, slacpy, slamrg, slaset, srot, xerbla
307* ..
308* .. Intrinsic Functions ..
309 INTRINSIC abs, max
310* ..
311* .. Executable Statements ..
312*
313* Test the input parameters.
314*
315 info = 0
316*
317 IF( nl.LT.1 ) THEN
318 info = -1
319 ELSE IF( nr.LT.1 ) THEN
320 info = -2
321 ELSE IF( ( sqre.NE.1 ) .AND. ( sqre.NE.0 ) ) THEN
322 info = -3
323 END IF
324*
325 n = nl + nr + 1
326 m = n + sqre
327*
328 IF( ldu.LT.n ) THEN
329 info = -10
330 ELSE IF( ldvt.LT.m ) THEN
331 info = -12
332 ELSE IF( ldu2.LT.n ) THEN
333 info = -15
334 ELSE IF( ldvt2.LT.m ) THEN
335 info = -17
336 END IF
337 IF( info.NE.0 ) THEN
338 CALL xerbla( 'SLASD2', -info )
339 RETURN
340 END IF
341*
342 nlp1 = nl + 1
343 nlp2 = nl + 2
344*
345* Generate the first part of the vector Z; and move the singular
346* values in the first part of D one position backward.
347*
348 z1 = alpha*vt( nlp1, nlp1 )
349 z( 1 ) = z1
350 DO 10 i = nl, 1, -1
351 z( i+1 ) = alpha*vt( i, nlp1 )
352 d( i+1 ) = d( i )
353 idxq( i+1 ) = idxq( i ) + 1
354 10 CONTINUE
355*
356* Generate the second part of the vector Z.
357*
358 DO 20 i = nlp2, m
359 z( i ) = beta*vt( i, nlp2 )
360 20 CONTINUE
361*
362* Initialize some reference arrays.
363*
364 DO 30 i = 2, nlp1
365 coltyp( i ) = 1
366 30 CONTINUE
367 DO 40 i = nlp2, n
368 coltyp( i ) = 2
369 40 CONTINUE
370*
371* Sort the singular values into increasing order
372*
373 DO 50 i = nlp2, n
374 idxq( i ) = idxq( i ) + nlp1
375 50 CONTINUE
376*
377* DSIGMA, IDXC, IDXC, and the first column of U2
378* are used as storage space.
379*
380 DO 60 i = 2, n
381 dsigma( i ) = d( idxq( i ) )
382 u2( i, 1 ) = z( idxq( i ) )
383 idxc( i ) = coltyp( idxq( i ) )
384 60 CONTINUE
385*
386 CALL slamrg( nl, nr, dsigma( 2 ), 1, 1, idx( 2 ) )
387*
388 DO 70 i = 2, n
389 idxi = 1 + idx( i )
390 d( i ) = dsigma( idxi )
391 z( i ) = u2( idxi, 1 )
392 coltyp( i ) = idxc( idxi )
393 70 CONTINUE
394*
395* Calculate the allowable deflation tolerance
396*
397 eps = slamch( 'Epsilon' )
398 tol = max( abs( alpha ), abs( beta ) )
399 tol = eight*eps*max( abs( d( n ) ), tol )
400*
401* There are 2 kinds of deflation -- first a value in the z-vector
402* is small, second two (or more) singular values are very close
403* together (their difference is small).
404*
405* If the value in the z-vector is small, we simply permute the
406* array so that the corresponding singular value is moved to the
407* end.
408*
409* If two values in the D-vector are close, we perform a two-sided
410* rotation designed to make one of the corresponding z-vector
411* entries zero, and then permute the array so that the deflated
412* singular value is moved to the end.
413*
414* If there are multiple singular values then the problem deflates.
415* Here the number of equal singular values are found. As each equal
416* singular value is found, an elementary reflector is computed to
417* rotate the corresponding singular subspace so that the
418* corresponding components of Z are zero in this new basis.
419*
420 k = 1
421 k2 = n + 1
422 DO 80 j = 2, n
423 IF( abs( z( j ) ).LE.tol ) THEN
424*
425* Deflate due to small z component.
426*
427 k2 = k2 - 1
428 idxp( k2 ) = j
429 coltyp( j ) = 4
430 IF( j.EQ.n )
431 $ GO TO 120
432 ELSE
433 jprev = j
434 GO TO 90
435 END IF
436 80 CONTINUE
437 90 CONTINUE
438 j = jprev
439 100 CONTINUE
440 j = j + 1
441 IF( j.GT.n )
442 $ GO TO 110
443 IF( abs( z( j ) ).LE.tol ) THEN
444*
445* Deflate due to small z component.
446*
447 k2 = k2 - 1
448 idxp( k2 ) = j
449 coltyp( j ) = 4
450 ELSE
451*
452* Check if singular values are close enough to allow deflation.
453*
454 IF( abs( d( j )-d( jprev ) ).LE.tol ) THEN
455*
456* Deflation is possible.
457*
458 s = z( jprev )
459 c = z( j )
460*
461* Find sqrt(a**2+b**2) without overflow or
462* destructive underflow.
463*
464 tau = slapy2( c, s )
465 c = c / tau
466 s = -s / tau
467 z( j ) = tau
468 z( jprev ) = zero
469*
470* Apply back the Givens rotation to the left and right
471* singular vector matrices.
472*
473 idxjp = idxq( idx( jprev )+1 )
474 idxj = idxq( idx( j )+1 )
475 IF( idxjp.LE.nlp1 ) THEN
476 idxjp = idxjp - 1
477 END IF
478 IF( idxj.LE.nlp1 ) THEN
479 idxj = idxj - 1
480 END IF
481 CALL srot( n, u( 1, idxjp ), 1, u( 1, idxj ), 1, c, s )
482 CALL srot( m, vt( idxjp, 1 ), ldvt, vt( idxj, 1 ), ldvt, c,
483 $ s )
484 IF( coltyp( j ).NE.coltyp( jprev ) ) THEN
485 coltyp( j ) = 3
486 END IF
487 coltyp( jprev ) = 4
488 k2 = k2 - 1
489 idxp( k2 ) = jprev
490 jprev = j
491 ELSE
492 k = k + 1
493 u2( k, 1 ) = z( jprev )
494 dsigma( k ) = d( jprev )
495 idxp( k ) = jprev
496 jprev = j
497 END IF
498 END IF
499 GO TO 100
500 110 CONTINUE
501*
502* Record the last singular value.
503*
504 k = k + 1
505 u2( k, 1 ) = z( jprev )
506 dsigma( k ) = d( jprev )
507 idxp( k ) = jprev
508*
509 120 CONTINUE
510*
511* Count up the total number of the various types of columns, then
512* form a permutation which positions the four column types into
513* four groups of uniform structure (although one or more of these
514* groups may be empty).
515*
516 DO 130 j = 1, 4
517 ctot( j ) = 0
518 130 CONTINUE
519 DO 140 j = 2, n
520 ct = coltyp( j )
521 ctot( ct ) = ctot( ct ) + 1
522 140 CONTINUE
523*
524* PSM(*) = Position in SubMatrix (of types 1 through 4)
525*
526 psm( 1 ) = 2
527 psm( 2 ) = 2 + ctot( 1 )
528 psm( 3 ) = psm( 2 ) + ctot( 2 )
529 psm( 4 ) = psm( 3 ) + ctot( 3 )
530*
531* Fill out the IDXC array so that the permutation which it induces
532* will place all type-1 columns first, all type-2 columns next,
533* then all type-3's, and finally all type-4's, starting from the
534* second column. This applies similarly to the rows of VT.
535*
536 DO 150 j = 2, n
537 jp = idxp( j )
538 ct = coltyp( jp )
539 idxc( psm( ct ) ) = j
540 psm( ct ) = psm( ct ) + 1
541 150 CONTINUE
542*
543* Sort the singular values and corresponding singular vectors into
544* DSIGMA, U2, and VT2 respectively. The singular values/vectors
545* which were not deflated go into the first K slots of DSIGMA, U2,
546* and VT2 respectively, while those which were deflated go into the
547* last N - K slots, except that the first column/row will be treated
548* separately.
549*
550 DO 160 j = 2, n
551 jp = idxp( j )
552 dsigma( j ) = d( jp )
553 idxj = idxq( idx( idxp( idxc( j ) ) )+1 )
554 IF( idxj.LE.nlp1 ) THEN
555 idxj = idxj - 1
556 END IF
557 CALL scopy( n, u( 1, idxj ), 1, u2( 1, j ), 1 )
558 CALL scopy( m, vt( idxj, 1 ), ldvt, vt2( j, 1 ), ldvt2 )
559 160 CONTINUE
560*
561* Determine DSIGMA(1), DSIGMA(2) and Z(1)
562*
563 dsigma( 1 ) = zero
564 hlftol = tol / two
565 IF( abs( dsigma( 2 ) ).LE.hlftol )
566 $ dsigma( 2 ) = hlftol
567 IF( m.GT.n ) THEN
568 z( 1 ) = slapy2( z1, z( m ) )
569 IF( z( 1 ).LE.tol ) THEN
570 c = one
571 s = zero
572 z( 1 ) = tol
573 ELSE
574 c = z1 / z( 1 )
575 s = z( m ) / z( 1 )
576 END IF
577 ELSE
578 IF( abs( z1 ).LE.tol ) THEN
579 z( 1 ) = tol
580 ELSE
581 z( 1 ) = z1
582 END IF
583 END IF
584*
585* Move the rest of the updating row to Z.
586*
587 CALL scopy( k-1, u2( 2, 1 ), 1, z( 2 ), 1 )
588*
589* Determine the first column of U2, the first row of VT2 and the
590* last row of VT.
591*
592 CALL slaset( 'A', n, 1, zero, zero, u2, ldu2 )
593 u2( nlp1, 1 ) = one
594 IF( m.GT.n ) THEN
595 DO 170 i = 1, nlp1
596 vt( m, i ) = -s*vt( nlp1, i )
597 vt2( 1, i ) = c*vt( nlp1, i )
598 170 CONTINUE
599 DO 180 i = nlp2, m
600 vt2( 1, i ) = s*vt( m, i )
601 vt( m, i ) = c*vt( m, i )
602 180 CONTINUE
603 ELSE
604 CALL scopy( m, vt( nlp1, 1 ), ldvt, vt2( 1, 1 ), ldvt2 )
605 END IF
606 IF( m.GT.n ) THEN
607 CALL scopy( m, vt( m, 1 ), ldvt, vt2( m, 1 ), ldvt2 )
608 END IF
609*
610* The deflated singular values and their corresponding vectors go
611* into the back of D, U, and V respectively.
612*
613 IF( n.GT.k ) THEN
614 CALL scopy( n-k, dsigma( k+1 ), 1, d( k+1 ), 1 )
615 CALL slacpy( 'A', n, n-k, u2( 1, k+1 ), ldu2, u( 1, k+1 ),
616 $ ldu )
617 CALL slacpy( 'A', n-k, m, vt2( k+1, 1 ), ldvt2, vt( k+1, 1 ),
618 $ ldvt )
619 END IF
620*
621* Copy CTOT into COLTYP for referencing in SLASD3.
622*
623 DO 190 j = 1, 4
624 coltyp( j ) = ctot( j )
625 190 CONTINUE
626*
627 RETURN
628*
629* End of SLASD2
630*
631 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine scopy(n, sx, incx, sy, incy)
SCOPY
Definition scopy.f:82
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:103
subroutine slamrg(n1, n2, a, strd1, strd2, index)
SLAMRG creates a permutation list to merge the entries of two independently sorted sets into a single...
Definition slamrg.f:99
subroutine slasd2(nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt, ldvt, dsigma, u2, ldu2, vt2, ldvt2, idxp, idx, idxc, idxq, coltyp, info)
SLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc.
Definition slasd2.f:269
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:110
subroutine srot(n, sx, incx, sy, incy, c, s)
SROT
Definition srot.f:92