LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
dlasd2.f
Go to the documentation of this file.
1*> \brief \b DLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc.
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> Download DLASD2 + dependencies
9*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasd2.f">
10*> [TGZ]</a>
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasd2.f">
12*> [ZIP]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasd2.f">
14*> [TXT]</a>
15*
16* Definition:
17* ===========
18*
19* SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
20* LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
21* IDXC, IDXQ, COLTYP, INFO )
22*
23* .. Scalar Arguments ..
24* INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
25* DOUBLE PRECISION ALPHA, BETA
26* ..
27* .. Array Arguments ..
28* INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
29* $ IDXQ( * )
30* DOUBLE PRECISION D( * ), DSIGMA( * ), U( LDU, * ),
31* $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
32* $ Z( * )
33* ..
34*
35*
36*> \par Purpose:
37* =============
38*>
39*> \verbatim
40*>
41*> DLASD2 merges the two sets of singular values together into a single
42*> sorted set. Then it tries to deflate the size of the problem.
43*> There are two ways in which deflation can occur: when two or more
44*> singular values are close together or if there is a tiny entry in the
45*> Z vector. For each such occurrence the order of the related secular
46*> equation problem is reduced by one.
47*>
48*> DLASD2 is called from DLASD1.
49*> \endverbatim
50*
51* Arguments:
52* ==========
53*
54*> \param[in] NL
55*> \verbatim
56*> NL is INTEGER
57*> The row dimension of the upper block. NL >= 1.
58*> \endverbatim
59*>
60*> \param[in] NR
61*> \verbatim
62*> NR is INTEGER
63*> The row dimension of the lower block. NR >= 1.
64*> \endverbatim
65*>
66*> \param[in] SQRE
67*> \verbatim
68*> SQRE is INTEGER
69*> = 0: the lower block is an NR-by-NR square matrix.
70*> = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
71*>
72*> The bidiagonal matrix has N = NL + NR + 1 rows and
73*> M = N + SQRE >= N columns.
74*> \endverbatim
75*>
76*> \param[out] K
77*> \verbatim
78*> K is INTEGER
79*> Contains the dimension of the non-deflated matrix,
80*> This is the order of the related secular equation. 1 <= K <=N.
81*> \endverbatim
82*>
83*> \param[in,out] D
84*> \verbatim
85*> D is DOUBLE PRECISION array, dimension(N)
86*> On entry D contains the singular values of the two submatrices
87*> to be combined. On exit D contains the trailing (N-K) updated
88*> singular values (those which were deflated) sorted into
89*> increasing order.
90*> \endverbatim
91*>
92*> \param[out] Z
93*> \verbatim
94*> Z is DOUBLE PRECISION array, dimension(N)
95*> On exit Z contains the updating row vector in the secular
96*> equation.
97*> \endverbatim
98*>
99*> \param[in] ALPHA
100*> \verbatim
101*> ALPHA is DOUBLE PRECISION
102*> Contains the diagonal element associated with the added row.
103*> \endverbatim
104*>
105*> \param[in] BETA
106*> \verbatim
107*> BETA is DOUBLE PRECISION
108*> Contains the off-diagonal element associated with the added
109*> row.
110*> \endverbatim
111*>
112*> \param[in,out] U
113*> \verbatim
114*> U is DOUBLE PRECISION array, dimension(LDU,N)
115*> On entry U contains the left singular vectors of two
116*> submatrices in the two square blocks with corners at (1,1),
117*> (NL, NL), and (NL+2, NL+2), (N,N).
118*> On exit U contains the trailing (N-K) updated left singular
119*> vectors (those which were deflated) in its last N-K columns.
120*> \endverbatim
121*>
122*> \param[in] LDU
123*> \verbatim
124*> LDU is INTEGER
125*> The leading dimension of the array U. LDU >= N.
126*> \endverbatim
127*>
128*> \param[in,out] VT
129*> \verbatim
130*> VT is DOUBLE PRECISION array, dimension(LDVT,M)
131*> On entry VT**T contains the right singular vectors of two
132*> submatrices in the two square blocks with corners at (1,1),
133*> (NL+1, NL+1), and (NL+2, NL+2), (M,M).
134*> On exit VT**T contains the trailing (N-K) updated right singular
135*> vectors (those which were deflated) in its last N-K columns.
136*> In case SQRE =1, the last row of VT spans the right null
137*> space.
138*> \endverbatim
139*>
140*> \param[in] LDVT
141*> \verbatim
142*> LDVT is INTEGER
143*> The leading dimension of the array VT. LDVT >= M.
144*> \endverbatim
145*>
146*> \param[out] DSIGMA
147*> \verbatim
148*> DSIGMA is DOUBLE PRECISION array, dimension (N)
149*> Contains a copy of the diagonal elements (K-1 singular values
150*> and one zero) in the secular equation.
151*> \endverbatim
152*>
153*> \param[out] U2
154*> \verbatim
155*> U2 is DOUBLE PRECISION array, dimension(LDU2,N)
156*> Contains a copy of the first K-1 left singular vectors which
157*> will be used by DLASD3 in a matrix multiply (DGEMM) to solve
158*> for the new left singular vectors. U2 is arranged into four
159*> blocks. The first block contains a column with 1 at NL+1 and
160*> zero everywhere else; the second block contains non-zero
161*> entries only at and above NL; the third contains non-zero
162*> entries only below NL+1; and the fourth is dense.
163*> \endverbatim
164*>
165*> \param[in] LDU2
166*> \verbatim
167*> LDU2 is INTEGER
168*> The leading dimension of the array U2. LDU2 >= N.
169*> \endverbatim
170*>
171*> \param[out] VT2
172*> \verbatim
173*> VT2 is DOUBLE PRECISION array, dimension(LDVT2,N)
174*> VT2**T contains a copy of the first K right singular vectors
175*> which will be used by DLASD3 in a matrix multiply (DGEMM) to
176*> solve for the new right singular vectors. VT2 is arranged into
177*> three blocks. The first block contains a row that corresponds
178*> to the special 0 diagonal element in SIGMA; the second block
179*> contains non-zeros only at and before NL +1; the third block
180*> contains non-zeros only at and after NL +2.
181*> \endverbatim
182*>
183*> \param[in] LDVT2
184*> \verbatim
185*> LDVT2 is INTEGER
186*> The leading dimension of the array VT2. LDVT2 >= M.
187*> \endverbatim
188*>
189*> \param[out] IDXP
190*> \verbatim
191*> IDXP is INTEGER array, dimension(N)
192*> This will contain the permutation used to place deflated
193*> values of D at the end of the array. On output IDXP(2:K)
194*> points to the nondeflated D-values and IDXP(K+1:N)
195*> points to the deflated singular values.
196*> \endverbatim
197*>
198*> \param[out] IDX
199*> \verbatim
200*> IDX is INTEGER array, dimension(N)
201*> This will contain the permutation used to sort the contents of
202*> D into ascending order.
203*> \endverbatim
204*>
205*> \param[out] IDXC
206*> \verbatim
207*> IDXC is INTEGER array, dimension(N)
208*> This will contain the permutation used to arrange the columns
209*> of the deflated U matrix into three groups: the first group
210*> contains non-zero entries only at and above NL, the second
211*> contains non-zero entries only below NL+2, and the third is
212*> dense.
213*> \endverbatim
214*>
215*> \param[in,out] IDXQ
216*> \verbatim
217*> IDXQ is INTEGER array, dimension(N)
218*> This contains the permutation which separately sorts the two
219*> sub-problems in D into ascending order. Note that entries in
220*> the first hlaf of this permutation must first be moved one
221*> position backward; and entries in the second half
222*> must first have NL+1 added to their values.
223*> \endverbatim
224*>
225*> \param[out] COLTYP
226*> \verbatim
227*> COLTYP is INTEGER array, dimension(N)
228*> As workspace, this will contain a label which will indicate
229*> which of the following types a column in the U2 matrix or a
230*> row in the VT2 matrix is:
231*> 1 : non-zero in the upper half only
232*> 2 : non-zero in the lower half only
233*> 3 : dense
234*> 4 : deflated
235*>
236*> On exit, it is an array of dimension 4, with COLTYP(I) being
237*> the dimension of the I-th type columns.
238*> \endverbatim
239*>
240*> \param[out] INFO
241*> \verbatim
242*> INFO is INTEGER
243*> = 0: successful exit.
244*> < 0: if INFO = -i, the i-th argument had an illegal value.
245*> \endverbatim
246*
247* Authors:
248* ========
249*
250*> \author Univ. of Tennessee
251*> \author Univ. of California Berkeley
252*> \author Univ. of Colorado Denver
253*> \author NAG Ltd.
254*
255*> \ingroup lasd2
256*
257*> \par Contributors:
258* ==================
259*>
260*> Ming Gu and Huan Ren, Computer Science Division, University of
261*> California at Berkeley, USA
262*>
263* =====================================================================
264 SUBROUTINE dlasd2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU,
265 $ VT,
266 $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
267 $ IDXC, IDXQ, COLTYP, INFO )
268*
269* -- LAPACK auxiliary routine --
270* -- LAPACK is a software package provided by Univ. of Tennessee, --
271* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
272*
273* .. Scalar Arguments ..
274 INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
275 DOUBLE PRECISION ALPHA, BETA
276* ..
277* .. Array Arguments ..
278 INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
279 $ IDXQ( * )
280 DOUBLE PRECISION D( * ), DSIGMA( * ), U( LDU, * ),
281 $ u2( ldu2, * ), vt( ldvt, * ), vt2( ldvt2, * ),
282 $ z( * )
283* ..
284*
285* =====================================================================
286*
287* .. Parameters ..
288 DOUBLE PRECISION ZERO, ONE, TWO, EIGHT
289 PARAMETER ( ZERO = 0.0d+0, one = 1.0d+0, two = 2.0d+0,
290 $ eight = 8.0d+0 )
291* ..
292* .. Local Arrays ..
293 INTEGER CTOT( 4 ), PSM( 4 )
294* ..
295* .. Local Scalars ..
296 INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
297 $ N, NLP1, NLP2
298 DOUBLE PRECISION C, EPS, HLFTOL, S, TAU, TOL, Z1
299* ..
300* .. External Functions ..
301 DOUBLE PRECISION DLAMCH, DLAPY2
302 EXTERNAL DLAMCH, DLAPY2
303* ..
304* .. External Subroutines ..
305 EXTERNAL dcopy, dlacpy, dlamrg, dlaset, drot,
306 $ xerbla
307* ..
308* .. Intrinsic Functions ..
309 INTRINSIC abs, max
310* ..
311* .. Executable Statements ..
312*
313* Test the input parameters.
314*
315 info = 0
316*
317 IF( nl.LT.1 ) THEN
318 info = -1
319 ELSE IF( nr.LT.1 ) THEN
320 info = -2
321 ELSE IF( ( sqre.NE.1 ) .AND. ( sqre.NE.0 ) ) THEN
322 info = -3
323 END IF
324*
325 n = nl + nr + 1
326 m = n + sqre
327*
328 IF( ldu.LT.n ) THEN
329 info = -10
330 ELSE IF( ldvt.LT.m ) THEN
331 info = -12
332 ELSE IF( ldu2.LT.n ) THEN
333 info = -15
334 ELSE IF( ldvt2.LT.m ) THEN
335 info = -17
336 END IF
337 IF( info.NE.0 ) THEN
338 CALL xerbla( 'DLASD2', -info )
339 RETURN
340 END IF
341*
342 nlp1 = nl + 1
343 nlp2 = nl + 2
344*
345* Generate the first part of the vector Z; and move the singular
346* values in the first part of D one position backward.
347*
348 z1 = alpha*vt( nlp1, nlp1 )
349 z( 1 ) = z1
350 DO 10 i = nl, 1, -1
351 z( i+1 ) = alpha*vt( i, nlp1 )
352 d( i+1 ) = d( i )
353 idxq( i+1 ) = idxq( i ) + 1
354 10 CONTINUE
355*
356* Generate the second part of the vector Z.
357*
358 DO 20 i = nlp2, m
359 z( i ) = beta*vt( i, nlp2 )
360 20 CONTINUE
361*
362* Initialize some reference arrays.
363*
364 DO 30 i = 2, nlp1
365 coltyp( i ) = 1
366 30 CONTINUE
367 DO 40 i = nlp2, n
368 coltyp( i ) = 2
369 40 CONTINUE
370*
371* Sort the singular values into increasing order
372*
373 DO 50 i = nlp2, n
374 idxq( i ) = idxq( i ) + nlp1
375 50 CONTINUE
376*
377* DSIGMA, IDXC, IDXC, and the first column of U2
378* are used as storage space.
379*
380 DO 60 i = 2, n
381 dsigma( i ) = d( idxq( i ) )
382 u2( i, 1 ) = z( idxq( i ) )
383 idxc( i ) = coltyp( idxq( i ) )
384 60 CONTINUE
385*
386 CALL dlamrg( nl, nr, dsigma( 2 ), 1, 1, idx( 2 ) )
387*
388 DO 70 i = 2, n
389 idxi = 1 + idx( i )
390 d( i ) = dsigma( idxi )
391 z( i ) = u2( idxi, 1 )
392 coltyp( i ) = idxc( idxi )
393 70 CONTINUE
394*
395* Calculate the allowable deflation tolerance
396*
397 eps = dlamch( 'Epsilon' )
398 tol = max( abs( alpha ), abs( beta ) )
399 tol = eight*eps*max( abs( d( n ) ), tol )
400*
401* There are 2 kinds of deflation -- first a value in the z-vector
402* is small, second two (or more) singular values are very close
403* together (their difference is small).
404*
405* If the value in the z-vector is small, we simply permute the
406* array so that the corresponding singular value is moved to the
407* end.
408*
409* If two values in the D-vector are close, we perform a two-sided
410* rotation designed to make one of the corresponding z-vector
411* entries zero, and then permute the array so that the deflated
412* singular value is moved to the end.
413*
414* If there are multiple singular values then the problem deflates.
415* Here the number of equal singular values are found. As each equal
416* singular value is found, an elementary reflector is computed to
417* rotate the corresponding singular subspace so that the
418* corresponding components of Z are zero in this new basis.
419*
420 k = 1
421 k2 = n + 1
422 DO 80 j = 2, n
423 IF( abs( z( j ) ).LE.tol ) THEN
424*
425* Deflate due to small z component.
426*
427 k2 = k2 - 1
428 idxp( k2 ) = j
429 coltyp( j ) = 4
430 IF( j.EQ.n )
431 $ GO TO 120
432 ELSE
433 jprev = j
434 GO TO 90
435 END IF
436 80 CONTINUE
437 90 CONTINUE
438 j = jprev
439 100 CONTINUE
440 j = j + 1
441 IF( j.GT.n )
442 $ GO TO 110
443 IF( abs( z( j ) ).LE.tol ) THEN
444*
445* Deflate due to small z component.
446*
447 k2 = k2 - 1
448 idxp( k2 ) = j
449 coltyp( j ) = 4
450 ELSE
451*
452* Check if singular values are close enough to allow deflation.
453*
454 IF( abs( d( j )-d( jprev ) ).LE.tol ) THEN
455*
456* Deflation is possible.
457*
458 s = z( jprev )
459 c = z( j )
460*
461* Find sqrt(a**2+b**2) without overflow or
462* destructive underflow.
463*
464 tau = dlapy2( c, s )
465 c = c / tau
466 s = -s / tau
467 z( j ) = tau
468 z( jprev ) = zero
469*
470* Apply back the Givens rotation to the left and right
471* singular vector matrices.
472*
473 idxjp = idxq( idx( jprev )+1 )
474 idxj = idxq( idx( j )+1 )
475 IF( idxjp.LE.nlp1 ) THEN
476 idxjp = idxjp - 1
477 END IF
478 IF( idxj.LE.nlp1 ) THEN
479 idxj = idxj - 1
480 END IF
481 CALL drot( n, u( 1, idxjp ), 1, u( 1, idxj ), 1, c, s )
482 CALL drot( m, vt( idxjp, 1 ), ldvt, vt( idxj, 1 ), ldvt,
483 $ c,
484 $ s )
485 IF( coltyp( j ).NE.coltyp( jprev ) ) THEN
486 coltyp( j ) = 3
487 END IF
488 coltyp( jprev ) = 4
489 k2 = k2 - 1
490 idxp( k2 ) = jprev
491 jprev = j
492 ELSE
493 k = k + 1
494 u2( k, 1 ) = z( jprev )
495 dsigma( k ) = d( jprev )
496 idxp( k ) = jprev
497 jprev = j
498 END IF
499 END IF
500 GO TO 100
501 110 CONTINUE
502*
503* Record the last singular value.
504*
505 k = k + 1
506 u2( k, 1 ) = z( jprev )
507 dsigma( k ) = d( jprev )
508 idxp( k ) = jprev
509*
510 120 CONTINUE
511*
512* Count up the total number of the various types of columns, then
513* form a permutation which positions the four column types into
514* four groups of uniform structure (although one or more of these
515* groups may be empty).
516*
517 DO 130 j = 1, 4
518 ctot( j ) = 0
519 130 CONTINUE
520 DO 140 j = 2, n
521 ct = coltyp( j )
522 ctot( ct ) = ctot( ct ) + 1
523 140 CONTINUE
524*
525* PSM(*) = Position in SubMatrix (of types 1 through 4)
526*
527 psm( 1 ) = 2
528 psm( 2 ) = 2 + ctot( 1 )
529 psm( 3 ) = psm( 2 ) + ctot( 2 )
530 psm( 4 ) = psm( 3 ) + ctot( 3 )
531*
532* Fill out the IDXC array so that the permutation which it induces
533* will place all type-1 columns first, all type-2 columns next,
534* then all type-3's, and finally all type-4's, starting from the
535* second column. This applies similarly to the rows of VT.
536*
537 DO 150 j = 2, n
538 jp = idxp( j )
539 ct = coltyp( jp )
540 idxc( psm( ct ) ) = j
541 psm( ct ) = psm( ct ) + 1
542 150 CONTINUE
543*
544* Sort the singular values and corresponding singular vectors into
545* DSIGMA, U2, and VT2 respectively. The singular values/vectors
546* which were not deflated go into the first K slots of DSIGMA, U2,
547* and VT2 respectively, while those which were deflated go into the
548* last N - K slots, except that the first column/row will be treated
549* separately.
550*
551 DO 160 j = 2, n
552 jp = idxp( j )
553 dsigma( j ) = d( jp )
554 idxj = idxq( idx( idxp( idxc( j ) ) )+1 )
555 IF( idxj.LE.nlp1 ) THEN
556 idxj = idxj - 1
557 END IF
558 CALL dcopy( n, u( 1, idxj ), 1, u2( 1, j ), 1 )
559 CALL dcopy( m, vt( idxj, 1 ), ldvt, vt2( j, 1 ), ldvt2 )
560 160 CONTINUE
561*
562* Determine DSIGMA(1), DSIGMA(2) and Z(1)
563*
564 dsigma( 1 ) = zero
565 hlftol = tol / two
566 IF( abs( dsigma( 2 ) ).LE.hlftol )
567 $ dsigma( 2 ) = hlftol
568 IF( m.GT.n ) THEN
569 z( 1 ) = dlapy2( z1, z( m ) )
570 IF( z( 1 ).LE.tol ) THEN
571 c = one
572 s = zero
573 z( 1 ) = tol
574 ELSE
575 c = z1 / z( 1 )
576 s = z( m ) / z( 1 )
577 END IF
578 ELSE
579 IF( abs( z1 ).LE.tol ) THEN
580 z( 1 ) = tol
581 ELSE
582 z( 1 ) = z1
583 END IF
584 END IF
585*
586* Move the rest of the updating row to Z.
587*
588 CALL dcopy( k-1, u2( 2, 1 ), 1, z( 2 ), 1 )
589*
590* Determine the first column of U2, the first row of VT2 and the
591* last row of VT.
592*
593 CALL dlaset( 'A', n, 1, zero, zero, u2, ldu2 )
594 u2( nlp1, 1 ) = one
595 IF( m.GT.n ) THEN
596 DO 170 i = 1, nlp1
597 vt( m, i ) = -s*vt( nlp1, i )
598 vt2( 1, i ) = c*vt( nlp1, i )
599 170 CONTINUE
600 DO 180 i = nlp2, m
601 vt2( 1, i ) = s*vt( m, i )
602 vt( m, i ) = c*vt( m, i )
603 180 CONTINUE
604 ELSE
605 CALL dcopy( m, vt( nlp1, 1 ), ldvt, vt2( 1, 1 ), ldvt2 )
606 END IF
607 IF( m.GT.n ) THEN
608 CALL dcopy( m, vt( m, 1 ), ldvt, vt2( m, 1 ), ldvt2 )
609 END IF
610*
611* The deflated singular values and their corresponding vectors go
612* into the back of D, U, and V respectively.
613*
614 IF( n.GT.k ) THEN
615 CALL dcopy( n-k, dsigma( k+1 ), 1, d( k+1 ), 1 )
616 CALL dlacpy( 'A', n, n-k, u2( 1, k+1 ), ldu2, u( 1, k+1 ),
617 $ ldu )
618 CALL dlacpy( 'A', n-k, m, vt2( k+1, 1 ), ldvt2, vt( k+1,
619 $ 1 ),
620 $ ldvt )
621 END IF
622*
623* Copy CTOT into COLTYP for referencing in DLASD3.
624*
625 DO 190 j = 1, 4
626 coltyp( j ) = ctot( j )
627 190 CONTINUE
628*
629 RETURN
630*
631* End of DLASD2
632*
633 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine dcopy(n, dx, incx, dy, incy)
DCOPY
Definition dcopy.f:82
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:101
subroutine dlamrg(n1, n2, a, dtrd1, dtrd2, index)
DLAMRG creates a permutation list to merge the entries of two independently sorted sets into a single...
Definition dlamrg.f:97
subroutine dlasd2(nl, nr, sqre, k, d, z, alpha, beta, u, ldu, vt, ldvt, dsigma, u2, ldu2, vt2, ldvt2, idxp, idx, idxc, idxq, coltyp, info)
DLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc.
Definition dlasd2.f:268
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:108
subroutine drot(n, dx, incx, dy, incy, c, s)
DROT
Definition drot.f:92