LAPACK 3.3.0
|
00001 SUBROUTINE DLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2, 00002 $ LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z, 00003 $ INFO ) 00004 * 00005 * -- LAPACK auxiliary routine (version 3.2.2) -- 00006 * -- LAPACK is a software package provided by Univ. of Tennessee, -- 00007 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- 00008 * June 2010 00009 * 00010 * .. Scalar Arguments .. 00011 INTEGER INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR, 00012 $ SQRE 00013 * .. 00014 * .. Array Arguments .. 00015 INTEGER CTOT( * ), IDXC( * ) 00016 DOUBLE PRECISION D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ), 00017 $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ), 00018 $ Z( * ) 00019 * .. 00020 * 00021 * Purpose 00022 * ======= 00023 * 00024 * DLASD3 finds all the square roots of the roots of the secular 00025 * equation, as defined by the values in D and Z. It makes the 00026 * appropriate calls to DLASD4 and then updates the singular 00027 * vectors by matrix multiplication. 00028 * 00029 * This code makes very mild assumptions about floating point 00030 * arithmetic. It will work on machines with a guard digit in 00031 * add/subtract, or on those binary machines without guard digits 00032 * which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. 00033 * It could conceivably fail on hexadecimal or decimal machines 00034 * without guard digits, but we know of none. 00035 * 00036 * DLASD3 is called from DLASD1. 00037 * 00038 * Arguments 00039 * ========= 00040 * 00041 * NL (input) INTEGER 00042 * The row dimension of the upper block. NL >= 1. 00043 * 00044 * NR (input) INTEGER 00045 * The row dimension of the lower block. NR >= 1. 00046 * 00047 * SQRE (input) INTEGER 00048 * = 0: the lower block is an NR-by-NR square matrix. 00049 * = 1: the lower block is an NR-by-(NR+1) rectangular matrix. 00050 * 00051 * The bidiagonal matrix has N = NL + NR + 1 rows and 00052 * M = N + SQRE >= N columns. 00053 * 00054 * K (input) INTEGER 00055 * The size of the secular equation, 1 =< K = < N. 00056 * 00057 * D (output) DOUBLE PRECISION array, dimension(K) 00058 * On exit the square roots of the roots of the secular equation, 00059 * in ascending order. 00060 * 00061 * Q (workspace) DOUBLE PRECISION array, 00062 * dimension at least (LDQ,K). 00063 * 00064 * LDQ (input) INTEGER 00065 * The leading dimension of the array Q. LDQ >= K. 00066 * 00067 * DSIGMA (input) DOUBLE PRECISION array, dimension(K) 00068 * The first K elements of this array contain the old roots 00069 * of the deflated updating problem. These are the poles 00070 * of the secular equation. 00071 * 00072 * U (output) DOUBLE PRECISION array, dimension (LDU, N) 00073 * The last N - K columns of this matrix contain the deflated 00074 * left singular vectors. 00075 * 00076 * LDU (input) INTEGER 00077 * The leading dimension of the array U. LDU >= N. 00078 * 00079 * U2 (input/output) DOUBLE PRECISION array, dimension (LDU2, N) 00080 * The first K columns of this matrix contain the non-deflated 00081 * left singular vectors for the split problem. 00082 * 00083 * LDU2 (input) INTEGER 00084 * The leading dimension of the array U2. LDU2 >= N. 00085 * 00086 * VT (output) DOUBLE PRECISION array, dimension (LDVT, M) 00087 * The last M - K columns of VT' contain the deflated 00088 * right singular vectors. 00089 * 00090 * LDVT (input) INTEGER 00091 * The leading dimension of the array VT. LDVT >= N. 00092 * 00093 * VT2 (input/output) DOUBLE PRECISION array, dimension (LDVT2, N) 00094 * The first K columns of VT2' contain the non-deflated 00095 * right singular vectors for the split problem. 00096 * 00097 * LDVT2 (input) INTEGER 00098 * The leading dimension of the array VT2. LDVT2 >= N. 00099 * 00100 * IDXC (input) INTEGER array, dimension ( N ) 00101 * The permutation used to arrange the columns of U (and rows of 00102 * VT) into three groups: the first group contains non-zero 00103 * entries only at and above (or before) NL +1; the second 00104 * contains non-zero entries only at and below (or after) NL+2; 00105 * and the third is dense. The first column of U and the row of 00106 * VT are treated separately, however. 00107 * 00108 * The rows of the singular vectors found by DLASD4 00109 * must be likewise permuted before the matrix multiplies can 00110 * take place. 00111 * 00112 * CTOT (input) INTEGER array, dimension ( 4 ) 00113 * A count of the total number of the various types of columns 00114 * in U (or rows in VT), as described in IDXC. The fourth column 00115 * type is any column which has been deflated. 00116 * 00117 * Z (input) DOUBLE PRECISION array, dimension (K) 00118 * The first K elements of this array contain the components 00119 * of the deflation-adjusted updating row vector. 00120 * 00121 * INFO (output) INTEGER 00122 * = 0: successful exit. 00123 * < 0: if INFO = -i, the i-th argument had an illegal value. 00124 * > 0: if INFO = 1, a singular value did not converge 00125 * 00126 * Further Details 00127 * =============== 00128 * 00129 * Based on contributions by 00130 * Ming Gu and Huan Ren, Computer Science Division, University of 00131 * California at Berkeley, USA 00132 * 00133 * ===================================================================== 00134 * 00135 * .. Parameters .. 00136 DOUBLE PRECISION ONE, ZERO, NEGONE 00137 PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0, 00138 $ NEGONE = -1.0D+0 ) 00139 * .. 00140 * .. Local Scalars .. 00141 INTEGER CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1 00142 DOUBLE PRECISION RHO, TEMP 00143 * .. 00144 * .. External Functions .. 00145 DOUBLE PRECISION DLAMC3, DNRM2 00146 EXTERNAL DLAMC3, DNRM2 00147 * .. 00148 * .. External Subroutines .. 00149 EXTERNAL DCOPY, DGEMM, DLACPY, DLASCL, DLASD4, XERBLA 00150 * .. 00151 * .. Intrinsic Functions .. 00152 INTRINSIC ABS, SIGN, SQRT 00153 * .. 00154 * .. Executable Statements .. 00155 * 00156 * Test the input parameters. 00157 * 00158 INFO = 0 00159 * 00160 IF( NL.LT.1 ) THEN 00161 INFO = -1 00162 ELSE IF( NR.LT.1 ) THEN 00163 INFO = -2 00164 ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN 00165 INFO = -3 00166 END IF 00167 * 00168 N = NL + NR + 1 00169 M = N + SQRE 00170 NLP1 = NL + 1 00171 NLP2 = NL + 2 00172 * 00173 IF( ( K.LT.1 ) .OR. ( K.GT.N ) ) THEN 00174 INFO = -4 00175 ELSE IF( LDQ.LT.K ) THEN 00176 INFO = -7 00177 ELSE IF( LDU.LT.N ) THEN 00178 INFO = -10 00179 ELSE IF( LDU2.LT.N ) THEN 00180 INFO = -12 00181 ELSE IF( LDVT.LT.M ) THEN 00182 INFO = -14 00183 ELSE IF( LDVT2.LT.M ) THEN 00184 INFO = -16 00185 END IF 00186 IF( INFO.NE.0 ) THEN 00187 CALL XERBLA( 'DLASD3', -INFO ) 00188 RETURN 00189 END IF 00190 * 00191 * Quick return if possible 00192 * 00193 IF( K.EQ.1 ) THEN 00194 D( 1 ) = ABS( Z( 1 ) ) 00195 CALL DCOPY( M, VT2( 1, 1 ), LDVT2, VT( 1, 1 ), LDVT ) 00196 IF( Z( 1 ).GT.ZERO ) THEN 00197 CALL DCOPY( N, U2( 1, 1 ), 1, U( 1, 1 ), 1 ) 00198 ELSE 00199 DO 10 I = 1, N 00200 U( I, 1 ) = -U2( I, 1 ) 00201 10 CONTINUE 00202 END IF 00203 RETURN 00204 END IF 00205 * 00206 * Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can 00207 * be computed with high relative accuracy (barring over/underflow). 00208 * This is a problem on machines without a guard digit in 00209 * add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). 00210 * The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), 00211 * which on any of these machines zeros out the bottommost 00212 * bit of DSIGMA(I) if it is 1; this makes the subsequent 00213 * subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation 00214 * occurs. On binary machines with a guard digit (almost all 00215 * machines) it does not change DSIGMA(I) at all. On hexadecimal 00216 * and decimal machines with a guard digit, it slightly 00217 * changes the bottommost bits of DSIGMA(I). It does not account 00218 * for hexadecimal or decimal machines without guard digits 00219 * (we know of none). We use a subroutine call to compute 00220 * 2*DSIGMA(I) to prevent optimizing compilers from eliminating 00221 * this code. 00222 * 00223 DO 20 I = 1, K 00224 DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I ) 00225 20 CONTINUE 00226 * 00227 * Keep a copy of Z. 00228 * 00229 CALL DCOPY( K, Z, 1, Q, 1 ) 00230 * 00231 * Normalize Z. 00232 * 00233 RHO = DNRM2( K, Z, 1 ) 00234 CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO ) 00235 RHO = RHO*RHO 00236 * 00237 * Find the new singular values. 00238 * 00239 DO 30 J = 1, K 00240 CALL DLASD4( K, J, DSIGMA, Z, U( 1, J ), RHO, D( J ), 00241 $ VT( 1, J ), INFO ) 00242 * 00243 * If the zero finder fails, the computation is terminated. 00244 * 00245 IF( INFO.NE.0 ) THEN 00246 RETURN 00247 END IF 00248 30 CONTINUE 00249 * 00250 * Compute updated Z. 00251 * 00252 DO 60 I = 1, K 00253 Z( I ) = U( I, K )*VT( I, K ) 00254 DO 40 J = 1, I - 1 00255 Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / 00256 $ ( DSIGMA( I )-DSIGMA( J ) ) / 00257 $ ( DSIGMA( I )+DSIGMA( J ) ) ) 00258 40 CONTINUE 00259 DO 50 J = I, K - 1 00260 Z( I ) = Z( I )*( U( I, J )*VT( I, J ) / 00261 $ ( DSIGMA( I )-DSIGMA( J+1 ) ) / 00262 $ ( DSIGMA( I )+DSIGMA( J+1 ) ) ) 00263 50 CONTINUE 00264 Z( I ) = SIGN( SQRT( ABS( Z( I ) ) ), Q( I, 1 ) ) 00265 60 CONTINUE 00266 * 00267 * Compute left singular vectors of the modified diagonal matrix, 00268 * and store related information for the right singular vectors. 00269 * 00270 DO 90 I = 1, K 00271 VT( 1, I ) = Z( 1 ) / U( 1, I ) / VT( 1, I ) 00272 U( 1, I ) = NEGONE 00273 DO 70 J = 2, K 00274 VT( J, I ) = Z( J ) / U( J, I ) / VT( J, I ) 00275 U( J, I ) = DSIGMA( J )*VT( J, I ) 00276 70 CONTINUE 00277 TEMP = DNRM2( K, U( 1, I ), 1 ) 00278 Q( 1, I ) = U( 1, I ) / TEMP 00279 DO 80 J = 2, K 00280 JC = IDXC( J ) 00281 Q( J, I ) = U( JC, I ) / TEMP 00282 80 CONTINUE 00283 90 CONTINUE 00284 * 00285 * Update the left singular vector matrix. 00286 * 00287 IF( K.EQ.2 ) THEN 00288 CALL DGEMM( 'N', 'N', N, K, K, ONE, U2, LDU2, Q, LDQ, ZERO, U, 00289 $ LDU ) 00290 GO TO 100 00291 END IF 00292 IF( CTOT( 1 ).GT.0 ) THEN 00293 CALL DGEMM( 'N', 'N', NL, K, CTOT( 1 ), ONE, U2( 1, 2 ), LDU2, 00294 $ Q( 2, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) 00295 IF( CTOT( 3 ).GT.0 ) THEN 00296 KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) 00297 CALL DGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), 00298 $ LDU2, Q( KTEMP, 1 ), LDQ, ONE, U( 1, 1 ), LDU ) 00299 END IF 00300 ELSE IF( CTOT( 3 ).GT.0 ) THEN 00301 KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) 00302 CALL DGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ), 00303 $ LDU2, Q( KTEMP, 1 ), LDQ, ZERO, U( 1, 1 ), LDU ) 00304 ELSE 00305 CALL DLACPY( 'F', NL, K, U2, LDU2, U, LDU ) 00306 END IF 00307 CALL DCOPY( K, Q( 1, 1 ), LDQ, U( NLP1, 1 ), LDU ) 00308 KTEMP = 2 + CTOT( 1 ) 00309 CTEMP = CTOT( 2 ) + CTOT( 3 ) 00310 CALL DGEMM( 'N', 'N', NR, K, CTEMP, ONE, U2( NLP2, KTEMP ), LDU2, 00311 $ Q( KTEMP, 1 ), LDQ, ZERO, U( NLP2, 1 ), LDU ) 00312 * 00313 * Generate the right singular vectors. 00314 * 00315 100 CONTINUE 00316 DO 120 I = 1, K 00317 TEMP = DNRM2( K, VT( 1, I ), 1 ) 00318 Q( I, 1 ) = VT( 1, I ) / TEMP 00319 DO 110 J = 2, K 00320 JC = IDXC( J ) 00321 Q( I, J ) = VT( JC, I ) / TEMP 00322 110 CONTINUE 00323 120 CONTINUE 00324 * 00325 * Update the right singular vector matrix. 00326 * 00327 IF( K.EQ.2 ) THEN 00328 CALL DGEMM( 'N', 'N', K, M, K, ONE, Q, LDQ, VT2, LDVT2, ZERO, 00329 $ VT, LDVT ) 00330 RETURN 00331 END IF 00332 KTEMP = 1 + CTOT( 1 ) 00333 CALL DGEMM( 'N', 'N', K, NLP1, KTEMP, ONE, Q( 1, 1 ), LDQ, 00334 $ VT2( 1, 1 ), LDVT2, ZERO, VT( 1, 1 ), LDVT ) 00335 KTEMP = 2 + CTOT( 1 ) + CTOT( 2 ) 00336 IF( KTEMP.LE.LDVT2 ) 00337 $ CALL DGEMM( 'N', 'N', K, NLP1, CTOT( 3 ), ONE, Q( 1, KTEMP ), 00338 $ LDQ, VT2( KTEMP, 1 ), LDVT2, ONE, VT( 1, 1 ), 00339 $ LDVT ) 00340 * 00341 KTEMP = CTOT( 1 ) + 1 00342 NRP1 = NR + SQRE 00343 IF( KTEMP.GT.1 ) THEN 00344 DO 130 I = 1, K 00345 Q( I, KTEMP ) = Q( I, 1 ) 00346 130 CONTINUE 00347 DO 140 I = NLP2, M 00348 VT2( KTEMP, I ) = VT2( 1, I ) 00349 140 CONTINUE 00350 END IF 00351 CTEMP = 1 + CTOT( 2 ) + CTOT( 3 ) 00352 CALL DGEMM( 'N', 'N', K, NRP1, CTEMP, ONE, Q( 1, KTEMP ), LDQ, 00353 $ VT2( KTEMP, NLP2 ), LDVT2, ZERO, VT( 1, NLP2 ), LDVT ) 00354 * 00355 RETURN 00356 * 00357 * End of DLASD3 00358 * 00359 END