d1/d83/dlasd2_8f_source.html

00001       SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
00002      $                   LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
00003      $                   IDXC, IDXQ, COLTYP, INFO )
00004 *
00005 *  -- LAPACK auxiliary routine (version 3.2) --
00006 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
00007 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
00008 *     November 2006
00009 *
00010 *     .. Scalar Arguments ..
00011       INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
00012       DOUBLE PRECISION   ALPHA, BETA
00013 *     ..
00014 *     .. Array Arguments ..
00015       INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
00016      $                   IDXQ( * )
00017       DOUBLE PRECISION   D( * ), DSIGMA( * ), U( LDU, * ),
00018      $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
00019      $                   Z( * )
00020 *     ..
00021 *
00022 *  Purpose
00023 *  =======
00024 *
00025 *  DLASD2 merges the two sets of singular values together into a single
00026 *  sorted set.  Then it tries to deflate the size of the problem.
00027 *  There are two ways in which deflation can occur:  when two or more
00028 *  singular values are close together or if there is a tiny entry in the
00029 *  Z vector.  For each such occurrence the order of the related secular
00030 *  equation problem is reduced by one.
00031 *
00032 *  DLASD2 is called from DLASD1.
00033 *
00034 *  Arguments
00035 *  =========
00036 *
00037 *  NL     (input) INTEGER
00038 *         The row dimension of the upper block.  NL >= 1.
00039 *
00040 *  NR     (input) INTEGER
00041 *         The row dimension of the lower block.  NR >= 1.
00042 *
00043 *  SQRE   (input) INTEGER
00044 *         = 0: the lower block is an NR-by-NR square matrix.
00045 *         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
00046 *
00047 *         The bidiagonal matrix has N = NL + NR + 1 rows and
00048 *         M = N + SQRE >= N columns.
00049 *
00050 *  K      (output) INTEGER
00051 *         Contains the dimension of the non-deflated matrix,
00052 *         This is the order of the related secular equation. 1 <= K <=N.
00053 *
00054 *  D      (input/output) DOUBLE PRECISION array, dimension(N)
00055 *         On entry D contains the singular values of the two submatrices
00056 *         to be combined.  On exit D contains the trailing (N-K) updated
00057 *         singular values (those which were deflated) sorted into
00058 *         increasing order.
00059 *
00060 *  Z      (output) DOUBLE PRECISION array, dimension(N)
00061 *         On exit Z contains the updating row vector in the secular
00062 *         equation.
00063 *
00064 *  ALPHA  (input) DOUBLE PRECISION
00065 *         Contains the diagonal element associated with the added row.
00066 *
00067 *  BETA   (input) DOUBLE PRECISION
00068 *         Contains the off-diagonal element associated with the added
00069 *         row.
00070 *
00071 *  U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
00072 *         On entry U contains the left singular vectors of two
00073 *         submatrices in the two square blocks with corners at (1,1),
00074 *         (NL, NL), and (NL+2, NL+2), (N,N).
00075 *         On exit U contains the trailing (N-K) updated left singular
00076 *         vectors (those which were deflated) in its last N-K columns.
00077 *
00078 *  LDU    (input) INTEGER
00079 *         The leading dimension of the array U.  LDU >= N.
00080 *
00081 *  VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
00082 *         On entry VT' contains the right singular vectors of two
00083 *         submatrices in the two square blocks with corners at (1,1),
00084 *         (NL+1, NL+1), and (NL+2, NL+2), (M,M).
00085 *         On exit VT' contains the trailing (N-K) updated right singular
00086 *         vectors (those which were deflated) in its last N-K columns.
00087 *         In case SQRE =1, the last row of VT spans the right null
00088 *         space.
00089 *
00090 *  LDVT   (input) INTEGER
00091 *         The leading dimension of the array VT.  LDVT >= M.
00092 *
00093 *  DSIGMA (output) DOUBLE PRECISION array, dimension (N)
00094 *         Contains a copy of the diagonal elements (K-1 singular values
00095 *         and one zero) in the secular equation.
00096 *
00097 *  U2     (output) DOUBLE PRECISION array, dimension(LDU2,N)
00098 *         Contains a copy of the first K-1 left singular vectors which
00099 *         will be used by DLASD3 in a matrix multiply (DGEMM) to solve
00100 *         for the new left singular vectors. U2 is arranged into four
00101 *         blocks. The first block contains a column with 1 at NL+1 and
00102 *         zero everywhere else; the second block contains non-zero
00103 *         entries only at and above NL; the third contains non-zero
00104 *         entries only below NL+1; and the fourth is dense.
00105 *
00106 *  LDU2   (input) INTEGER
00107 *         The leading dimension of the array U2.  LDU2 >= N.
00108 *
00109 *  VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)
00110 *         VT2' contains a copy of the first K right singular vectors
00111 *         which will be used by DLASD3 in a matrix multiply (DGEMM) to
00112 *         solve for the new right singular vectors. VT2 is arranged into
00113 *         three blocks. The first block contains a row that corresponds
00114 *         to the special 0 diagonal element in SIGMA; the second block
00115 *         contains non-zeros only at and before NL +1; the third block
00116 *         contains non-zeros only at and after  NL +2.
00117 *
00118 *  LDVT2  (input) INTEGER
00119 *         The leading dimension of the array VT2.  LDVT2 >= M.
00120 *
00121 *  IDXP   (workspace) INTEGER array dimension(N)
00122 *         This will contain the permutation used to place deflated
00123 *         values of D at the end of the array. On output IDXP(2:K)
00124 *         points to the nondeflated D-values and IDXP(K+1:N)
00125 *         points to the deflated singular values.
00126 *
00127 *  IDX    (workspace) INTEGER array dimension(N)
00128 *         This will contain the permutation used to sort the contents of
00129 *         D into ascending order.
00130 *
00131 *  IDXC   (output) INTEGER array dimension(N)
00132 *         This will contain the permutation used to arrange the columns
00133 *         of the deflated U matrix into three groups:  the first group
00134 *         contains non-zero entries only at and above NL, the second
00135 *         contains non-zero entries only below NL+2, and the third is
00136 *         dense.
00137 *
00138 *  IDXQ   (input/output) INTEGER array dimension(N)
00139 *         This contains the permutation which separately sorts the two
00140 *         sub-problems in D into ascending order.  Note that entries in
00141 *         the first hlaf of this permutation must first be moved one
00142 *         position backward; and entries in the second half
00143 *         must first have NL+1 added to their values.
00144 *
00145 *  COLTYP (workspace/output) INTEGER array dimension(N)
00146 *         As workspace, this will contain a label which will indicate
00147 *         which of the following types a column in the U2 matrix or a
00148 *         row in the VT2 matrix is:
00149 *         1 : non-zero in the upper half only
00150 *         2 : non-zero in the lower half only
00151 *         3 : dense
00152 *         4 : deflated
00153 *
00154 *         On exit, it is an array of dimension 4, with COLTYP(I) being
00155 *         the dimension of the I-th type columns.
00156 *
00157 *  INFO   (output) INTEGER
00158 *          = 0:  successful exit.
00159 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
00160 *
00161 *  Further Details
00162 *  ===============
00163 *
00164 *  Based on contributions by
00165 *     Ming Gu and Huan Ren, Computer Science Division, University of
00166 *     California at Berkeley, USA
00167 *
00168 *  =====================================================================
00169 *
00170 *     .. Parameters ..
00171       DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT
00172       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
00173      $                   EIGHT = 8.0D+0 )
00174 *     ..
00175 *     .. Local Arrays ..
00176       INTEGER            CTOT( 4 ), PSM( 4 )
00177 *     ..
00178 *     .. Local Scalars ..
00179       INTEGER            CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
00180      $                   N, NLP1, NLP2
00181       DOUBLE PRECISION   C, EPS, HLFTOL, S, TAU, TOL, Z1
00182 *     ..
00183 *     .. External Functions ..
00184       DOUBLE PRECISION   DLAMCH, DLAPY2
00185       EXTERNAL           DLAMCH, DLAPY2
00186 *     ..
00187 *     .. External Subroutines ..
00188       EXTERNAL           DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA
00189 *     ..
00190 *     .. Intrinsic Functions ..
00191       INTRINSIC          ABS, MAX
00192 *     ..
00193 *     .. Executable Statements ..
00194 *
00195 *     Test the input parameters.
00196 *
00197       INFO = 0
00198 *
00199       IF( NL.LT.1 ) THEN
00200          INFO = -1
00201       ELSE IF( NR.LT.1 ) THEN
00202          INFO = -2
00203       ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
00204          INFO = -3
00205       END IF
00206 *
00207       N = NL + NR + 1
00208       M = N + SQRE
00209 *
00210       IF( LDU.LT.N ) THEN
00211          INFO = -10
00212       ELSE IF( LDVT.LT.M ) THEN
00213          INFO = -12
00214       ELSE IF( LDU2.LT.N ) THEN
00215          INFO = -15
00216       ELSE IF( LDVT2.LT.M ) THEN
00217          INFO = -17
00218       END IF
00219       IF( INFO.NE.0 ) THEN
00220          CALL XERBLA( 'DLASD2', -INFO )
00221          RETURN
00222       END IF
00223 *
00224       NLP1 = NL + 1
00225       NLP2 = NL + 2
00226 *
00227 *     Generate the first part of the vector Z; and move the singular
00228 *     values in the first part of D one position backward.
00229 *
00230       Z1 = ALPHA*VT( NLP1, NLP1 )
00231       Z( 1 ) = Z1
00232       DO 10 I = NL, 1, -1
00233          Z( I+1 ) = ALPHA*VT( I, NLP1 )
00234          D( I+1 ) = D( I )
00235          IDXQ( I+1 ) = IDXQ( I ) + 1
00236    10 CONTINUE
00237 *
00238 *     Generate the second part of the vector Z.
00239 *
00240       DO 20 I = NLP2, M
00241          Z( I ) = BETA*VT( I, NLP2 )
00242    20 CONTINUE
00243 *
00244 *     Initialize some reference arrays.
00245 *
00246       DO 30 I = 2, NLP1
00247          COLTYP( I ) = 1
00248    30 CONTINUE
00249       DO 40 I = NLP2, N
00250          COLTYP( I ) = 2
00251    40 CONTINUE
00252 *
00253 *     Sort the singular values into increasing order
00254 *
00255       DO 50 I = NLP2, N
00256          IDXQ( I ) = IDXQ( I ) + NLP1
00257    50 CONTINUE
00258 *
00259 *     DSIGMA, IDXC, IDXC, and the first column of U2
00260 *     are used as storage space.
00261 *
00262       DO 60 I = 2, N
00263          DSIGMA( I ) = D( IDXQ( I ) )
00264          U2( I, 1 ) = Z( IDXQ( I ) )
00265          IDXC( I ) = COLTYP( IDXQ( I ) )
00266    60 CONTINUE
00267 *
00268       CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
00269 *
00270       DO 70 I = 2, N
00271          IDXI = 1 + IDX( I )
00272          D( I ) = DSIGMA( IDXI )
00273          Z( I ) = U2( IDXI, 1 )
00274          COLTYP( I ) = IDXC( IDXI )
00275    70 CONTINUE
00276 *
00277 *     Calculate the allowable deflation tolerance
00278 *
00279       EPS = DLAMCH( 'Epsilon' )
00280       TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
00281       TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
00282 *
00283 *     There are 2 kinds of deflation -- first a value in the z-vector
00284 *     is small, second two (or more) singular values are very close
00285 *     together (their difference is small).
00286 *
00287 *     If the value in the z-vector is small, we simply permute the
00288 *     array so that the corresponding singular value is moved to the
00289 *     end.
00290 *
00291 *     If two values in the D-vector are close, we perform a two-sided
00292 *     rotation designed to make one of the corresponding z-vector
00293 *     entries zero, and then permute the array so that the deflated
00294 *     singular value is moved to the end.
00295 *
00296 *     If there are multiple singular values then the problem deflates.
00297 *     Here the number of equal singular values are found.  As each equal
00298 *     singular value is found, an elementary reflector is computed to
00299 *     rotate the corresponding singular subspace so that the
00300 *     corresponding components of Z are zero in this new basis.
00301 *
00302       K = 1
00303       K2 = N + 1
00304       DO 80 J = 2, N
00305          IF( ABS( Z( J ) ).LE.TOL ) THEN
00306 *
00307 *           Deflate due to small z component.
00308 *
00309             K2 = K2 - 1
00310             IDXP( K2 ) = J
00311             COLTYP( J ) = 4
00312             IF( J.EQ.N )
00313      $         GO TO 120
00314          ELSE
00315             JPREV = J
00316             GO TO 90
00317          END IF
00318    80 CONTINUE
00319    90 CONTINUE
00320       J = JPREV
00321   100 CONTINUE
00322       J = J + 1
00323       IF( J.GT.N )
00324      $   GO TO 110
00325       IF( ABS( Z( J ) ).LE.TOL ) THEN
00326 *
00327 *        Deflate due to small z component.
00328 *
00329          K2 = K2 - 1
00330          IDXP( K2 ) = J
00331          COLTYP( J ) = 4
00332       ELSE
00333 *
00334 *        Check if singular values are close enough to allow deflation.
00335 *
00336          IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
00337 *
00338 *           Deflation is possible.
00339 *
00340             S = Z( JPREV )
00341             C = Z( J )
00342 *
00343 *           Find sqrt(a**2+b**2) without overflow or
00344 *           destructive underflow.
00345 *
00346             TAU = DLAPY2( C, S )
00347             C = C / TAU
00348             S = -S / TAU
00349             Z( J ) = TAU
00350             Z( JPREV ) = ZERO
00351 *
00352 *           Apply back the Givens rotation to the left and right
00353 *           singular vector matrices.
00354 *
00355             IDXJP = IDXQ( IDX( JPREV )+1 )
00356             IDXJ = IDXQ( IDX( J )+1 )
00357             IF( IDXJP.LE.NLP1 ) THEN
00358                IDXJP = IDXJP - 1
00359             END IF
00360             IF( IDXJ.LE.NLP1 ) THEN
00361                IDXJ = IDXJ - 1
00362             END IF
00363             CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
00364             CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
00365      $                 S )
00366             IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
00367                COLTYP( J ) = 3
00368             END IF
00369             COLTYP( JPREV ) = 4
00370             K2 = K2 - 1
00371             IDXP( K2 ) = JPREV
00372             JPREV = J
00373          ELSE
00374             K = K + 1
00375             U2( K, 1 ) = Z( JPREV )
00376             DSIGMA( K ) = D( JPREV )
00377             IDXP( K ) = JPREV
00378             JPREV = J
00379          END IF
00380       END IF
00381       GO TO 100
00382   110 CONTINUE
00383 *
00384 *     Record the last singular value.
00385 *
00386       K = K + 1
00387       U2( K, 1 ) = Z( JPREV )
00388       DSIGMA( K ) = D( JPREV )
00389       IDXP( K ) = JPREV
00390 *
00391   120 CONTINUE
00392 *
00393 *     Count up the total number of the various types of columns, then
00394 *     form a permutation which positions the four column types into
00395 *     four groups of uniform structure (although one or more of these
00396 *     groups may be empty).
00397 *
00398       DO 130 J = 1, 4
00399          CTOT( J ) = 0
00400   130 CONTINUE
00401       DO 140 J = 2, N
00402          CT = COLTYP( J )
00403          CTOT( CT ) = CTOT( CT ) + 1
00404   140 CONTINUE
00405 *
00406 *     PSM(*) = Position in SubMatrix (of types 1 through 4)
00407 *
00408       PSM( 1 ) = 2
00409       PSM( 2 ) = 2 + CTOT( 1 )
00410       PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
00411       PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
00412 *
00413 *     Fill out the IDXC array so that the permutation which it induces
00414 *     will place all type-1 columns first, all type-2 columns next,
00415 *     then all type-3's, and finally all type-4's, starting from the
00416 *     second column. This applies similarly to the rows of VT.
00417 *
00418       DO 150 J = 2, N
00419          JP = IDXP( J )
00420          CT = COLTYP( JP )
00421          IDXC( PSM( CT ) ) = J
00422          PSM( CT ) = PSM( CT ) + 1
00423   150 CONTINUE
00424 *
00425 *     Sort the singular values and corresponding singular vectors into
00426 *     DSIGMA, U2, and VT2 respectively.  The singular values/vectors
00427 *     which were not deflated go into the first K slots of DSIGMA, U2,
00428 *     and VT2 respectively, while those which were deflated go into the
00429 *     last N - K slots, except that the first column/row will be treated
00430 *     separately.
00431 *
00432       DO 160 J = 2, N
00433          JP = IDXP( J )
00434          DSIGMA( J ) = D( JP )
00435          IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
00436          IF( IDXJ.LE.NLP1 ) THEN
00437             IDXJ = IDXJ - 1
00438          END IF
00439          CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
00440          CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
00441   160 CONTINUE
00442 *
00443 *     Determine DSIGMA(1), DSIGMA(2) and Z(1)
00444 *
00445       DSIGMA( 1 ) = ZERO
00446       HLFTOL = TOL / TWO
00447       IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
00448      $   DSIGMA( 2 ) = HLFTOL
00449       IF( M.GT.N ) THEN
00450          Z( 1 ) = DLAPY2( Z1, Z( M ) )
00451          IF( Z( 1 ).LE.TOL ) THEN
00452             C = ONE
00453             S = ZERO
00454             Z( 1 ) = TOL
00455          ELSE
00456             C = Z1 / Z( 1 )
00457             S = Z( M ) / Z( 1 )
00458          END IF
00459       ELSE
00460          IF( ABS( Z1 ).LE.TOL ) THEN
00461             Z( 1 ) = TOL
00462          ELSE
00463             Z( 1 ) = Z1
00464          END IF
00465       END IF
00466 *
00467 *     Move the rest of the updating row to Z.
00468 *
00469       CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
00470 *
00471 *     Determine the first column of U2, the first row of VT2 and the
00472 *     last row of VT.
00473 *
00474       CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
00475       U2( NLP1, 1 ) = ONE
00476       IF( M.GT.N ) THEN
00477          DO 170 I = 1, NLP1
00478             VT( M, I ) = -S*VT( NLP1, I )
00479             VT2( 1, I ) = C*VT( NLP1, I )
00480   170    CONTINUE
00481          DO 180 I = NLP2, M
00482             VT2( 1, I ) = S*VT( M, I )
00483             VT( M, I ) = C*VT( M, I )
00484   180    CONTINUE
00485       ELSE
00486          CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
00487       END IF
00488       IF( M.GT.N ) THEN
00489          CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
00490       END IF
00491 *
00492 *     The deflated singular values and their corresponding vectors go
00493 *     into the back of D, U, and V respectively.
00494 *
00495       IF( N.GT.K ) THEN
00496          CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
00497          CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
00498      $                LDU )
00499          CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
00500      $                LDVT )
00501       END IF
00502 *
00503 *     Copy CTOT into COLTYP for referencing in DLASD3.
00504 *
00505       DO 190 J = 1, 4
00506          COLTYP( J ) = CTOT( J )
00507   190 CONTINUE
00508 *
00509       RETURN
00510 *
00511 *     End of DLASD2
00512 *
00513       END