001:       SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
002:      $                   LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
003:      $                   IDXC, IDXQ, COLTYP, INFO )
004: *
005: *  -- LAPACK auxiliary routine (version 3.2) --
006: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
007: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
008: *     November 2006
009: *
010: *     .. Scalar Arguments ..
011:       INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
012:       DOUBLE PRECISION   ALPHA, BETA
013: *     ..
014: *     .. Array Arguments ..
015:       INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
016:      $                   IDXQ( * )
017:       DOUBLE PRECISION   D( * ), DSIGMA( * ), U( LDU, * ),
018:      $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
019:      $                   Z( * )
020: *     ..
021: *
022: *  Purpose
023: *  =======
024: *
025: *  DLASD2 merges the two sets of singular values together into a single
026: *  sorted set.  Then it tries to deflate the size of the problem.
027: *  There are two ways in which deflation can occur:  when two or more
028: *  singular values are close together or if there is a tiny entry in the
029: *  Z vector.  For each such occurrence the order of the related secular
030: *  equation problem is reduced by one.
031: *
032: *  DLASD2 is called from DLASD1.
033: *
034: *  Arguments
035: *  =========
036: *
037: *  NL     (input) INTEGER
038: *         The row dimension of the upper block.  NL >= 1.
039: *
040: *  NR     (input) INTEGER
041: *         The row dimension of the lower block.  NR >= 1.
042: *
043: *  SQRE   (input) INTEGER
044: *         = 0: the lower block is an NR-by-NR square matrix.
045: *         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
046: *
047: *         The bidiagonal matrix has N = NL + NR + 1 rows and
048: *         M = N + SQRE >= N columns.
049: *
050: *  K      (output) INTEGER
051: *         Contains the dimension of the non-deflated matrix,
052: *         This is the order of the related secular equation. 1 <= K <=N.
053: *
054: *  D      (input/output) DOUBLE PRECISION array, dimension(N)
055: *         On entry D contains the singular values of the two submatrices
056: *         to be combined.  On exit D contains the trailing (N-K) updated
057: *         singular values (those which were deflated) sorted into
058: *         increasing order.
059: *
060: *  Z      (output) DOUBLE PRECISION array, dimension(N)
061: *         On exit Z contains the updating row vector in the secular
062: *         equation.
063: *
064: *  ALPHA  (input) DOUBLE PRECISION
065: *         Contains the diagonal element associated with the added row.
066: *
067: *  BETA   (input) DOUBLE PRECISION
068: *         Contains the off-diagonal element associated with the added
069: *         row.
070: *
071: *  U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
072: *         On entry U contains the left singular vectors of two
073: *         submatrices in the two square blocks with corners at (1,1),
074: *         (NL, NL), and (NL+2, NL+2), (N,N).
075: *         On exit U contains the trailing (N-K) updated left singular
076: *         vectors (those which were deflated) in its last N-K columns.
077: *
078: *  LDU    (input) INTEGER
079: *         The leading dimension of the array U.  LDU >= N.
080: *
081: *  VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
082: *         On entry VT' contains the right singular vectors of two
083: *         submatrices in the two square blocks with corners at (1,1),
084: *         (NL+1, NL+1), and (NL+2, NL+2), (M,M).
085: *         On exit VT' contains the trailing (N-K) updated right singular
086: *         vectors (those which were deflated) in its last N-K columns.
087: *         In case SQRE =1, the last row of VT spans the right null
088: *         space.
089: *
090: *  LDVT   (input) INTEGER
091: *         The leading dimension of the array VT.  LDVT >= M.
092: *
093: *  DSIGMA (output) DOUBLE PRECISION array, dimension (N)
094: *         Contains a copy of the diagonal elements (K-1 singular values
095: *         and one zero) in the secular equation.
096: *
097: *  U2     (output) DOUBLE PRECISION array, dimension(LDU2,N)
098: *         Contains a copy of the first K-1 left singular vectors which
099: *         will be used by DLASD3 in a matrix multiply (DGEMM) to solve
100: *         for the new left singular vectors. U2 is arranged into four
101: *         blocks. The first block contains a column with 1 at NL+1 and
102: *         zero everywhere else; the second block contains non-zero
103: *         entries only at and above NL; the third contains non-zero
104: *         entries only below NL+1; and the fourth is dense.
105: *
106: *  LDU2   (input) INTEGER
107: *         The leading dimension of the array U2.  LDU2 >= N.
108: *
109: *  VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)
110: *         VT2' contains a copy of the first K right singular vectors
111: *         which will be used by DLASD3 in a matrix multiply (DGEMM) to
112: *         solve for the new right singular vectors. VT2 is arranged into
113: *         three blocks. The first block contains a row that corresponds
114: *         to the special 0 diagonal element in SIGMA; the second block
115: *         contains non-zeros only at and before NL +1; the third block
116: *         contains non-zeros only at and after  NL +2.
117: *
118: *  LDVT2  (input) INTEGER
119: *         The leading dimension of the array VT2.  LDVT2 >= M.
120: *
121: *  IDXP   (workspace) INTEGER array dimension(N)
122: *         This will contain the permutation used to place deflated
123: *         values of D at the end of the array. On output IDXP(2:K)
124: *         points to the nondeflated D-values and IDXP(K+1:N)
125: *         points to the deflated singular values.
126: *
127: *  IDX    (workspace) INTEGER array dimension(N)
128: *         This will contain the permutation used to sort the contents of
129: *         D into ascending order.
130: *
131: *  IDXC   (output) INTEGER array dimension(N)
132: *         This will contain the permutation used to arrange the columns
133: *         of the deflated U matrix into three groups:  the first group
134: *         contains non-zero entries only at and above NL, the second
135: *         contains non-zero entries only below NL+2, and the third is
136: *         dense.
137: *
138: *  IDXQ   (input/output) INTEGER array dimension(N)
139: *         This contains the permutation which separately sorts the two
140: *         sub-problems in D into ascending order.  Note that entries in
141: *         the first hlaf of this permutation must first be moved one
142: *         position backward; and entries in the second half
143: *         must first have NL+1 added to their values.
144: *
145: *  COLTYP (workspace/output) INTEGER array dimension(N)
146: *         As workspace, this will contain a label which will indicate
147: *         which of the following types a column in the U2 matrix or a
148: *         row in the VT2 matrix is:
149: *         1 : non-zero in the upper half only
150: *         2 : non-zero in the lower half only
151: *         3 : dense
152: *         4 : deflated
153: *
154: *         On exit, it is an array of dimension 4, with COLTYP(I) being
155: *         the dimension of the I-th type columns.
156: *
157: *  INFO   (output) INTEGER
158: *          = 0:  successful exit.
159: *          < 0:  if INFO = -i, the i-th argument had an illegal value.
160: *
161: *  Further Details
162: *  ===============
163: *
164: *  Based on contributions by
165: *     Ming Gu and Huan Ren, Computer Science Division, University of
166: *     California at Berkeley, USA
167: *
168: *  =====================================================================
169: *
170: *     .. Parameters ..
171:       DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT
172:       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
173:      $                   EIGHT = 8.0D+0 )
174: *     ..
175: *     .. Local Arrays ..
176:       INTEGER            CTOT( 4 ), PSM( 4 )
177: *     ..
178: *     .. Local Scalars ..
179:       INTEGER            CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
180:      $                   N, NLP1, NLP2
181:       DOUBLE PRECISION   C, EPS, HLFTOL, S, TAU, TOL, Z1
182: *     ..
183: *     .. External Functions ..
184:       DOUBLE PRECISION   DLAMCH, DLAPY2
185:       EXTERNAL           DLAMCH, DLAPY2
186: *     ..
187: *     .. External Subroutines ..
188:       EXTERNAL           DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA
189: *     ..
190: *     .. Intrinsic Functions ..
191:       INTRINSIC          ABS, MAX
192: *     ..
193: *     .. Executable Statements ..
194: *
195: *     Test the input parameters.
196: *
197:       INFO = 0
198: *
199:       IF( NL.LT.1 ) THEN
200:          INFO = -1
201:       ELSE IF( NR.LT.1 ) THEN
202:          INFO = -2
203:       ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
204:          INFO = -3
205:       END IF
206: *
207:       N = NL + NR + 1
208:       M = N + SQRE
209: *
210:       IF( LDU.LT.N ) THEN
211:          INFO = -10
212:       ELSE IF( LDVT.LT.M ) THEN
213:          INFO = -12
214:       ELSE IF( LDU2.LT.N ) THEN
215:          INFO = -15
216:       ELSE IF( LDVT2.LT.M ) THEN
217:          INFO = -17
218:       END IF
219:       IF( INFO.NE.0 ) THEN
220:          CALL XERBLA( 'DLASD2', -INFO )
221:          RETURN
222:       END IF
223: *
224:       NLP1 = NL + 1
225:       NLP2 = NL + 2
226: *
227: *     Generate the first part of the vector Z; and move the singular
228: *     values in the first part of D one position backward.
229: *
230:       Z1 = ALPHA*VT( NLP1, NLP1 )
231:       Z( 1 ) = Z1
232:       DO 10 I = NL, 1, -1
233:          Z( I+1 ) = ALPHA*VT( I, NLP1 )
234:          D( I+1 ) = D( I )
235:          IDXQ( I+1 ) = IDXQ( I ) + 1
236:    10 CONTINUE
237: *
238: *     Generate the second part of the vector Z.
239: *
240:       DO 20 I = NLP2, M
241:          Z( I ) = BETA*VT( I, NLP2 )
242:    20 CONTINUE
243: *
244: *     Initialize some reference arrays.
245: *
246:       DO 30 I = 2, NLP1
247:          COLTYP( I ) = 1
248:    30 CONTINUE
249:       DO 40 I = NLP2, N
250:          COLTYP( I ) = 2
251:    40 CONTINUE
252: *
253: *     Sort the singular values into increasing order
254: *
255:       DO 50 I = NLP2, N
256:          IDXQ( I ) = IDXQ( I ) + NLP1
257:    50 CONTINUE
258: *
259: *     DSIGMA, IDXC, IDXC, and the first column of U2
260: *     are used as storage space.
261: *
262:       DO 60 I = 2, N
263:          DSIGMA( I ) = D( IDXQ( I ) )
264:          U2( I, 1 ) = Z( IDXQ( I ) )
265:          IDXC( I ) = COLTYP( IDXQ( I ) )
266:    60 CONTINUE
267: *
268:       CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
269: *
270:       DO 70 I = 2, N
271:          IDXI = 1 + IDX( I )
272:          D( I ) = DSIGMA( IDXI )
273:          Z( I ) = U2( IDXI, 1 )
274:          COLTYP( I ) = IDXC( IDXI )
275:    70 CONTINUE
276: *
277: *     Calculate the allowable deflation tolerance
278: *
279:       EPS = DLAMCH( 'Epsilon' )
280:       TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
281:       TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
282: *
283: *     There are 2 kinds of deflation -- first a value in the z-vector
284: *     is small, second two (or more) singular values are very close
285: *     together (their difference is small).
286: *
287: *     If the value in the z-vector is small, we simply permute the
288: *     array so that the corresponding singular value is moved to the
289: *     end.
290: *
291: *     If two values in the D-vector are close, we perform a two-sided
292: *     rotation designed to make one of the corresponding z-vector
293: *     entries zero, and then permute the array so that the deflated
294: *     singular value is moved to the end.
295: *
296: *     If there are multiple singular values then the problem deflates.
297: *     Here the number of equal singular values are found.  As each equal
298: *     singular value is found, an elementary reflector is computed to
299: *     rotate the corresponding singular subspace so that the
300: *     corresponding components of Z are zero in this new basis.
301: *
302:       K = 1
303:       K2 = N + 1
304:       DO 80 J = 2, N
305:          IF( ABS( Z( J ) ).LE.TOL ) THEN
306: *
307: *           Deflate due to small z component.
308: *
309:             K2 = K2 - 1
310:             IDXP( K2 ) = J
311:             COLTYP( J ) = 4
312:             IF( J.EQ.N )
313:      $         GO TO 120
314:          ELSE
315:             JPREV = J
316:             GO TO 90
317:          END IF
318:    80 CONTINUE
319:    90 CONTINUE
320:       J = JPREV
321:   100 CONTINUE
322:       J = J + 1
323:       IF( J.GT.N )
324:      $   GO TO 110
325:       IF( ABS( Z( J ) ).LE.TOL ) THEN
326: *
327: *        Deflate due to small z component.
328: *
329:          K2 = K2 - 1
330:          IDXP( K2 ) = J
331:          COLTYP( J ) = 4
332:       ELSE
333: *
334: *        Check if singular values are close enough to allow deflation.
335: *
336:          IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
337: *
338: *           Deflation is possible.
339: *
340:             S = Z( JPREV )
341:             C = Z( J )
342: *
343: *           Find sqrt(a**2+b**2) without overflow or
344: *           destructive underflow.
345: *
346:             TAU = DLAPY2( C, S )
347:             C = C / TAU
348:             S = -S / TAU
349:             Z( J ) = TAU
350:             Z( JPREV ) = ZERO
351: *
352: *           Apply back the Givens rotation to the left and right
353: *           singular vector matrices.
354: *
355:             IDXJP = IDXQ( IDX( JPREV )+1 )
356:             IDXJ = IDXQ( IDX( J )+1 )
357:             IF( IDXJP.LE.NLP1 ) THEN
358:                IDXJP = IDXJP - 1
359:             END IF
360:             IF( IDXJ.LE.NLP1 ) THEN
361:                IDXJ = IDXJ - 1
362:             END IF
363:             CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
364:             CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
365:      $                 S )
366:             IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
367:                COLTYP( J ) = 3
368:             END IF
369:             COLTYP( JPREV ) = 4
370:             K2 = K2 - 1
371:             IDXP( K2 ) = JPREV
372:             JPREV = J
373:          ELSE
374:             K = K + 1
375:             U2( K, 1 ) = Z( JPREV )
376:             DSIGMA( K ) = D( JPREV )
377:             IDXP( K ) = JPREV
378:             JPREV = J
379:          END IF
380:       END IF
381:       GO TO 100
382:   110 CONTINUE
383: *
384: *     Record the last singular value.
385: *
386:       K = K + 1
387:       U2( K, 1 ) = Z( JPREV )
388:       DSIGMA( K ) = D( JPREV )
389:       IDXP( K ) = JPREV
390: *
391:   120 CONTINUE
392: *
393: *     Count up the total number of the various types of columns, then
394: *     form a permutation which positions the four column types into
395: *     four groups of uniform structure (although one or more of these
396: *     groups may be empty).
397: *
398:       DO 130 J = 1, 4
399:          CTOT( J ) = 0
400:   130 CONTINUE
401:       DO 140 J = 2, N
402:          CT = COLTYP( J )
403:          CTOT( CT ) = CTOT( CT ) + 1
404:   140 CONTINUE
405: *
406: *     PSM(*) = Position in SubMatrix (of types 1 through 4)
407: *
408:       PSM( 1 ) = 2
409:       PSM( 2 ) = 2 + CTOT( 1 )
410:       PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
411:       PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
412: *
413: *     Fill out the IDXC array so that the permutation which it induces
414: *     will place all type-1 columns first, all type-2 columns next,
415: *     then all type-3's, and finally all type-4's, starting from the
416: *     second column. This applies similarly to the rows of VT.
417: *
418:       DO 150 J = 2, N
419:          JP = IDXP( J )
420:          CT = COLTYP( JP )
421:          IDXC( PSM( CT ) ) = J
422:          PSM( CT ) = PSM( CT ) + 1
423:   150 CONTINUE
424: *
425: *     Sort the singular values and corresponding singular vectors into
426: *     DSIGMA, U2, and VT2 respectively.  The singular values/vectors
427: *     which were not deflated go into the first K slots of DSIGMA, U2,
428: *     and VT2 respectively, while those which were deflated go into the
429: *     last N - K slots, except that the first column/row will be treated
430: *     separately.
431: *
432:       DO 160 J = 2, N
433:          JP = IDXP( J )
434:          DSIGMA( J ) = D( JP )
435:          IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
436:          IF( IDXJ.LE.NLP1 ) THEN
437:             IDXJ = IDXJ - 1
438:          END IF
439:          CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
440:          CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
441:   160 CONTINUE
442: *
443: *     Determine DSIGMA(1), DSIGMA(2) and Z(1)
444: *
445:       DSIGMA( 1 ) = ZERO
446:       HLFTOL = TOL / TWO
447:       IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
448:      $   DSIGMA( 2 ) = HLFTOL
449:       IF( M.GT.N ) THEN
450:          Z( 1 ) = DLAPY2( Z1, Z( M ) )
451:          IF( Z( 1 ).LE.TOL ) THEN
452:             C = ONE
453:             S = ZERO
454:             Z( 1 ) = TOL
455:          ELSE
456:             C = Z1 / Z( 1 )
457:             S = Z( M ) / Z( 1 )
458:          END IF
459:       ELSE
460:          IF( ABS( Z1 ).LE.TOL ) THEN
461:             Z( 1 ) = TOL
462:          ELSE
463:             Z( 1 ) = Z1
464:          END IF
465:       END IF
466: *
467: *     Move the rest of the updating row to Z.
468: *
469:       CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
470: *
471: *     Determine the first column of U2, the first row of VT2 and the
472: *     last row of VT.
473: *
474:       CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
475:       U2( NLP1, 1 ) = ONE
476:       IF( M.GT.N ) THEN
477:          DO 170 I = 1, NLP1
478:             VT( M, I ) = -S*VT( NLP1, I )
479:             VT2( 1, I ) = C*VT( NLP1, I )
480:   170    CONTINUE
481:          DO 180 I = NLP2, M
482:             VT2( 1, I ) = S*VT( M, I )
483:             VT( M, I ) = C*VT( M, I )
484:   180    CONTINUE
485:       ELSE
486:          CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
487:       END IF
488:       IF( M.GT.N ) THEN
489:          CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
490:       END IF
491: *
492: *     The deflated singular values and their corresponding vectors go
493: *     into the back of D, U, and V respectively.
494: *
495:       IF( N.GT.K ) THEN
496:          CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
497:          CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
498:      $                LDU )
499:          CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
500:      $                LDVT )
501:       END IF
502: *
503: *     Copy CTOT into COLTYP for referencing in DLASD3.
504: *
505:       DO 190 J = 1, 4
506:          COLTYP( J ) = CTOT( J )
507:   190 CONTINUE
508: *
509:       RETURN
510: *
511: *     End of DLASD2
512: *
513:       END
514: