001:       SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
002:      $                   LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
003:      $                   IDXC, IDXQ, COLTYP, INFO )
004: *
005: *  -- LAPACK auxiliary routine (version 3.2) --
006: *     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
007: *     November 2006
008: *
009: *     .. Scalar Arguments ..
010:       INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
011:       REAL               ALPHA, BETA
012: *     ..
013: *     .. Array Arguments ..
014:       INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
015:      $                   IDXQ( * )
016:       REAL               D( * ), DSIGMA( * ), U( LDU, * ),
017:      $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
018:      $                   Z( * )
019: *     ..
020: *
021: *  Purpose
022: *  =======
023: *
024: *  SLASD2 merges the two sets of singular values together into a single
025: *  sorted set.  Then it tries to deflate the size of the problem.
026: *  There are two ways in which deflation can occur:  when two or more
027: *  singular values are close together or if there is a tiny entry in the
028: *  Z vector.  For each such occurrence the order of the related secular
029: *  equation problem is reduced by one.
030: *
031: *  SLASD2 is called from SLASD1.
032: *
033: *  Arguments
034: *  =========
035: *
036: *  NL     (input) INTEGER
037: *         The row dimension of the upper block.  NL >= 1.
038: *
039: *  NR     (input) INTEGER
040: *         The row dimension of the lower block.  NR >= 1.
041: *
042: *  SQRE   (input) INTEGER
043: *         = 0: the lower block is an NR-by-NR square matrix.
044: *         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
045: *
046: *         The bidiagonal matrix has N = NL + NR + 1 rows and
047: *         M = N + SQRE >= N columns.
048: *
049: *  K      (output) INTEGER
050: *         Contains the dimension of the non-deflated matrix,
051: *         This is the order of the related secular equation. 1 <= K <=N.
052: *
053: *  D      (input/output) REAL array, dimension (N)
054: *         On entry D contains the singular values of the two submatrices
055: *         to be combined.  On exit D contains the trailing (N-K) updated
056: *         singular values (those which were deflated) sorted into
057: *         increasing order.
058: *
059: *  Z      (output) REAL array, dimension (N)
060: *         On exit Z contains the updating row vector in the secular
061: *         equation.
062: *
063: *  ALPHA  (input) REAL
064: *         Contains the diagonal element associated with the added row.
065: *
066: *  BETA   (input) REAL
067: *         Contains the off-diagonal element associated with the added
068: *         row.
069: *
070: *  U      (input/output) REAL array, dimension (LDU,N)
071: *         On entry U contains the left singular vectors of two
072: *         submatrices in the two square blocks with corners at (1,1),
073: *         (NL, NL), and (NL+2, NL+2), (N,N).
074: *         On exit U contains the trailing (N-K) updated left singular
075: *         vectors (those which were deflated) in its last N-K columns.
076: *
077: *  LDU    (input) INTEGER
078: *         The leading dimension of the array U.  LDU >= N.
079: *
080: *  VT     (input/output) REAL array, dimension (LDVT,M)
081: *         On entry VT' contains the right singular vectors of two
082: *         submatrices in the two square blocks with corners at (1,1),
083: *         (NL+1, NL+1), and (NL+2, NL+2), (M,M).
084: *         On exit VT' contains the trailing (N-K) updated right singular
085: *         vectors (those which were deflated) in its last N-K columns.
086: *         In case SQRE =1, the last row of VT spans the right null
087: *         space.
088: *
089: *  LDVT   (input) INTEGER
090: *         The leading dimension of the array VT.  LDVT >= M.
091: *
092: *  DSIGMA (output) REAL array, dimension (N)
093: *         Contains a copy of the diagonal elements (K-1 singular values
094: *         and one zero) in the secular equation.
095: *
096: *  U2     (output) REAL array, dimension (LDU2,N)
097: *         Contains a copy of the first K-1 left singular vectors which
098: *         will be used by SLASD3 in a matrix multiply (SGEMM) to solve
099: *         for the new left singular vectors. U2 is arranged into four
100: *         blocks. The first block contains a column with 1 at NL+1 and
101: *         zero everywhere else; the second block contains non-zero
102: *         entries only at and above NL; the third contains non-zero
103: *         entries only below NL+1; and the fourth is dense.
104: *
105: *  LDU2   (input) INTEGER
106: *         The leading dimension of the array U2.  LDU2 >= N.
107: *
108: *  VT2    (output) REAL array, dimension (LDVT2,N)
109: *         VT2' contains a copy of the first K right singular vectors
110: *         which will be used by SLASD3 in a matrix multiply (SGEMM) to
111: *         solve for the new right singular vectors. VT2 is arranged into
112: *         three blocks. The first block contains a row that corresponds
113: *         to the special 0 diagonal element in SIGMA; the second block
114: *         contains non-zeros only at and before NL +1; the third block
115: *         contains non-zeros only at and after  NL +2.
116: *
117: *  LDVT2  (input) INTEGER
118: *         The leading dimension of the array VT2.  LDVT2 >= M.
119: *
120: *  IDXP   (workspace) INTEGER array, dimension (N)
121: *         This will contain the permutation used to place deflated
122: *         values of D at the end of the array. On output IDXP(2:K)
123: *         points to the nondeflated D-values and IDXP(K+1:N)
124: *         points to the deflated singular values.
125: *
126: *  IDX    (workspace) INTEGER array, dimension (N)
127: *         This will contain the permutation used to sort the contents of
128: *         D into ascending order.
129: *
130: *  IDXC   (output) INTEGER array, dimension (N)
131: *         This will contain the permutation used to arrange the columns
132: *         of the deflated U matrix into three groups:  the first group
133: *         contains non-zero entries only at and above NL, the second
134: *         contains non-zero entries only below NL+2, and the third is
135: *         dense.
136: *
137: *  IDXQ   (input/output) INTEGER array, dimension (N)
138: *         This contains the permutation which separately sorts the two
139: *         sub-problems in D into ascending order.  Note that entries in
140: *         the first hlaf of this permutation must first be moved one
141: *         position backward; and entries in the second half
142: *         must first have NL+1 added to their values.
143: *
144: *  COLTYP (workspace/output) INTEGER array, dimension (N)
145: *         As workspace, this will contain a label which will indicate
146: *         which of the following types a column in the U2 matrix or a
147: *         row in the VT2 matrix is:
148: *         1 : non-zero in the upper half only
149: *         2 : non-zero in the lower half only
150: *         3 : dense
151: *         4 : deflated
152: *
153: *         On exit, it is an array of dimension 4, with COLTYP(I) being
154: *         the dimension of the I-th type columns.
155: *
156: *  INFO   (output) INTEGER
157: *          = 0:  successful exit.
158: *          < 0:  if INFO = -i, the i-th argument had an illegal value.
159: *
160: *  Further Details
161: *  ===============
162: *
163: *  Based on contributions by
164: *     Ming Gu and Huan Ren, Computer Science Division, University of
165: *     California at Berkeley, USA
166: *
167: *  =====================================================================
168: *
169: *     .. Parameters ..
170:       REAL               ZERO, ONE, TWO, EIGHT
171:       PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0,
172:      $                   EIGHT = 8.0E+0 )
173: *     ..
174: *     .. Local Arrays ..
175:       INTEGER            CTOT( 4 ), PSM( 4 )
176: *     ..
177: *     .. Local Scalars ..
178:       INTEGER            CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
179:      $                   N, NLP1, NLP2
180:       REAL               C, EPS, HLFTOL, S, TAU, TOL, Z1
181: *     ..
182: *     .. External Functions ..
183:       REAL               SLAMCH, SLAPY2
184:       EXTERNAL           SLAMCH, SLAPY2
185: *     ..
186: *     .. External Subroutines ..
187:       EXTERNAL           SCOPY, SLACPY, SLAMRG, SLASET, SROT, XERBLA
188: *     ..
189: *     .. Intrinsic Functions ..
190:       INTRINSIC          ABS, MAX
191: *     ..
192: *     .. Executable Statements ..
193: *
194: *     Test the input parameters.
195: *
196:       INFO = 0
197: *
198:       IF( NL.LT.1 ) THEN
199:          INFO = -1
200:       ELSE IF( NR.LT.1 ) THEN
201:          INFO = -2
202:       ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
203:          INFO = -3
204:       END IF
205: *
206:       N = NL + NR + 1
207:       M = N + SQRE
208: *
209:       IF( LDU.LT.N ) THEN
210:          INFO = -10
211:       ELSE IF( LDVT.LT.M ) THEN
212:          INFO = -12
213:       ELSE IF( LDU2.LT.N ) THEN
214:          INFO = -15
215:       ELSE IF( LDVT2.LT.M ) THEN
216:          INFO = -17
217:       END IF
218:       IF( INFO.NE.0 ) THEN
219:          CALL XERBLA( 'SLASD2', -INFO )
220:          RETURN
221:       END IF
222: *
223:       NLP1 = NL + 1
224:       NLP2 = NL + 2
225: *
226: *     Generate the first part of the vector Z; and move the singular
227: *     values in the first part of D one position backward.
228: *
229:       Z1 = ALPHA*VT( NLP1, NLP1 )
230:       Z( 1 ) = Z1
231:       DO 10 I = NL, 1, -1
232:          Z( I+1 ) = ALPHA*VT( I, NLP1 )
233:          D( I+1 ) = D( I )
234:          IDXQ( I+1 ) = IDXQ( I ) + 1
235:    10 CONTINUE
236: *
237: *     Generate the second part of the vector Z.
238: *
239:       DO 20 I = NLP2, M
240:          Z( I ) = BETA*VT( I, NLP2 )
241:    20 CONTINUE
242: *
243: *     Initialize some reference arrays.
244: *
245:       DO 30 I = 2, NLP1
246:          COLTYP( I ) = 1
247:    30 CONTINUE
248:       DO 40 I = NLP2, N
249:          COLTYP( I ) = 2
250:    40 CONTINUE
251: *
252: *     Sort the singular values into increasing order
253: *
254:       DO 50 I = NLP2, N
255:          IDXQ( I ) = IDXQ( I ) + NLP1
256:    50 CONTINUE
257: *
258: *     DSIGMA, IDXC, IDXC, and the first column of U2
259: *     are used as storage space.
260: *
261:       DO 60 I = 2, N
262:          DSIGMA( I ) = D( IDXQ( I ) )
263:          U2( I, 1 ) = Z( IDXQ( I ) )
264:          IDXC( I ) = COLTYP( IDXQ( I ) )
265:    60 CONTINUE
266: *
267:       CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
268: *
269:       DO 70 I = 2, N
270:          IDXI = 1 + IDX( I )
271:          D( I ) = DSIGMA( IDXI )
272:          Z( I ) = U2( IDXI, 1 )
273:          COLTYP( I ) = IDXC( IDXI )
274:    70 CONTINUE
275: *
276: *     Calculate the allowable deflation tolerance
277: *
278:       EPS = SLAMCH( 'Epsilon' )
279:       TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
280:       TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
281: *
282: *     There are 2 kinds of deflation -- first a value in the z-vector
283: *     is small, second two (or more) singular values are very close
284: *     together (their difference is small).
285: *
286: *     If the value in the z-vector is small, we simply permute the
287: *     array so that the corresponding singular value is moved to the
288: *     end.
289: *
290: *     If two values in the D-vector are close, we perform a two-sided
291: *     rotation designed to make one of the corresponding z-vector
292: *     entries zero, and then permute the array so that the deflated
293: *     singular value is moved to the end.
294: *
295: *     If there are multiple singular values then the problem deflates.
296: *     Here the number of equal singular values are found.  As each equal
297: *     singular value is found, an elementary reflector is computed to
298: *     rotate the corresponding singular subspace so that the
299: *     corresponding components of Z are zero in this new basis.
300: *
301:       K = 1
302:       K2 = N + 1
303:       DO 80 J = 2, N
304:          IF( ABS( Z( J ) ).LE.TOL ) THEN
305: *
306: *           Deflate due to small z component.
307: *
308:             K2 = K2 - 1
309:             IDXP( K2 ) = J
310:             COLTYP( J ) = 4
311:             IF( J.EQ.N )
312:      $         GO TO 120
313:          ELSE
314:             JPREV = J
315:             GO TO 90
316:          END IF
317:    80 CONTINUE
318:    90 CONTINUE
319:       J = JPREV
320:   100 CONTINUE
321:       J = J + 1
322:       IF( J.GT.N )
323:      $   GO TO 110
324:       IF( ABS( Z( J ) ).LE.TOL ) THEN
325: *
326: *        Deflate due to small z component.
327: *
328:          K2 = K2 - 1
329:          IDXP( K2 ) = J
330:          COLTYP( J ) = 4
331:       ELSE
332: *
333: *        Check if singular values are close enough to allow deflation.
334: *
335:          IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
336: *
337: *           Deflation is possible.
338: *
339:             S = Z( JPREV )
340:             C = Z( J )
341: *
342: *           Find sqrt(a**2+b**2) without overflow or
343: *           destructive underflow.
344: *
345:             TAU = SLAPY2( C, S )
346:             C = C / TAU
347:             S = -S / TAU
348:             Z( J ) = TAU
349:             Z( JPREV ) = ZERO
350: *
351: *           Apply back the Givens rotation to the left and right
352: *           singular vector matrices.
353: *
354:             IDXJP = IDXQ( IDX( JPREV )+1 )
355:             IDXJ = IDXQ( IDX( J )+1 )
356:             IF( IDXJP.LE.NLP1 ) THEN
357:                IDXJP = IDXJP - 1
358:             END IF
359:             IF( IDXJ.LE.NLP1 ) THEN
360:                IDXJ = IDXJ - 1
361:             END IF
362:             CALL SROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
363:             CALL SROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
364:      $                 S )
365:             IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
366:                COLTYP( J ) = 3
367:             END IF
368:             COLTYP( JPREV ) = 4
369:             K2 = K2 - 1
370:             IDXP( K2 ) = JPREV
371:             JPREV = J
372:          ELSE
373:             K = K + 1
374:             U2( K, 1 ) = Z( JPREV )
375:             DSIGMA( K ) = D( JPREV )
376:             IDXP( K ) = JPREV
377:             JPREV = J
378:          END IF
379:       END IF
380:       GO TO 100
381:   110 CONTINUE
382: *
383: *     Record the last singular value.
384: *
385:       K = K + 1
386:       U2( K, 1 ) = Z( JPREV )
387:       DSIGMA( K ) = D( JPREV )
388:       IDXP( K ) = JPREV
389: *
390:   120 CONTINUE
391: *
392: *     Count up the total number of the various types of columns, then
393: *     form a permutation which positions the four column types into
394: *     four groups of uniform structure (although one or more of these
395: *     groups may be empty).
396: *
397:       DO 130 J = 1, 4
398:          CTOT( J ) = 0
399:   130 CONTINUE
400:       DO 140 J = 2, N
401:          CT = COLTYP( J )
402:          CTOT( CT ) = CTOT( CT ) + 1
403:   140 CONTINUE
404: *
405: *     PSM(*) = Position in SubMatrix (of types 1 through 4)
406: *
407:       PSM( 1 ) = 2
408:       PSM( 2 ) = 2 + CTOT( 1 )
409:       PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
410:       PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
411: *
412: *     Fill out the IDXC array so that the permutation which it induces
413: *     will place all type-1 columns first, all type-2 columns next,
414: *     then all type-3's, and finally all type-4's, starting from the
415: *     second column. This applies similarly to the rows of VT.
416: *
417:       DO 150 J = 2, N
418:          JP = IDXP( J )
419:          CT = COLTYP( JP )
420:          IDXC( PSM( CT ) ) = J
421:          PSM( CT ) = PSM( CT ) + 1
422:   150 CONTINUE
423: *
424: *     Sort the singular values and corresponding singular vectors into
425: *     DSIGMA, U2, and VT2 respectively.  The singular values/vectors
426: *     which were not deflated go into the first K slots of DSIGMA, U2,
427: *     and VT2 respectively, while those which were deflated go into the
428: *     last N - K slots, except that the first column/row will be treated
429: *     separately.
430: *
431:       DO 160 J = 2, N
432:          JP = IDXP( J )
433:          DSIGMA( J ) = D( JP )
434:          IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
435:          IF( IDXJ.LE.NLP1 ) THEN
436:             IDXJ = IDXJ - 1
437:          END IF
438:          CALL SCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
439:          CALL SCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
440:   160 CONTINUE
441: *
442: *     Determine DSIGMA(1), DSIGMA(2) and Z(1)
443: *
444:       DSIGMA( 1 ) = ZERO
445:       HLFTOL = TOL / TWO
446:       IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
447:      $   DSIGMA( 2 ) = HLFTOL
448:       IF( M.GT.N ) THEN
449:          Z( 1 ) = SLAPY2( Z1, Z( M ) )
450:          IF( Z( 1 ).LE.TOL ) THEN
451:             C = ONE
452:             S = ZERO
453:             Z( 1 ) = TOL
454:          ELSE
455:             C = Z1 / Z( 1 )
456:             S = Z( M ) / Z( 1 )
457:          END IF
458:       ELSE
459:          IF( ABS( Z1 ).LE.TOL ) THEN
460:             Z( 1 ) = TOL
461:          ELSE
462:             Z( 1 ) = Z1
463:          END IF
464:       END IF
465: *
466: *     Move the rest of the updating row to Z.
467: *
468:       CALL SCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
469: *
470: *     Determine the first column of U2, the first row of VT2 and the
471: *     last row of VT.
472: *
473:       CALL SLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
474:       U2( NLP1, 1 ) = ONE
475:       IF( M.GT.N ) THEN
476:          DO 170 I = 1, NLP1
477:             VT( M, I ) = -S*VT( NLP1, I )
478:             VT2( 1, I ) = C*VT( NLP1, I )
479:   170    CONTINUE
480:          DO 180 I = NLP2, M
481:             VT2( 1, I ) = S*VT( M, I )
482:             VT( M, I ) = C*VT( M, I )
483:   180    CONTINUE
484:       ELSE
485:          CALL SCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
486:       END IF
487:       IF( M.GT.N ) THEN
488:          CALL SCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
489:       END IF
490: *
491: *     The deflated singular values and their corresponding vectors go
492: *     into the back of D, U, and V respectively.
493: *
494:       IF( N.GT.K ) THEN
495:          CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
496:          CALL SLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
497:      $                LDU )
498:          CALL SLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
499:      $                LDVT )
500:       END IF
501: *
502: *     Copy CTOT into COLTYP for referencing in SLASD3.
503: *
504:       DO 190 J = 1, 4
505:          COLTYP( J ) = CTOT( J )
506:   190 CONTINUE
507: *
508:       RETURN
509: *
510: *     End of SLASD2
511: *
512:       END
513: