subroutine dbbcsd	(	character	JOBU1,
		character	JOBU2,
		character	JOBV1T,
		character	JOBV2T,
		character	TRANS,
		integer	M,
		integer	P,
		integer	Q,
		double precision, dimension( * )	THETA,
		double precision, dimension( * )	PHI,
		double precision, dimension( ldu1, * )	U1,
		integer	LDU1,
		double precision, dimension( ldu2, * )	U2,
		integer	LDU2,
		double precision, dimension( ldv1t, * )	V1T,
		integer	LDV1T,
		double precision, dimension( ldv2t, * )	V2T,
		integer	LDV2T,
		double precision, dimension( * )	B11D,
		double precision, dimension( * )	B11E,
		double precision, dimension( * )	B12D,
		double precision, dimension( * )	B12E,
		double precision, dimension( * )	B21D,
		double precision, dimension( * )	B21E,
		double precision, dimension( * )	B22D,
		double precision, dimension( * )	B22E,
		double precision, dimension( * )	WORK,
		integer	LWORK,
		integer	INFO
	)

DBBCSD

Download DBBCSD + dependencies [TGZ] [ZIP] [TXT]

Purpose:

 DBBCSD computes the CS decomposition of an orthogonal matrix in
 bidiagonal-block form,


     [ B11 | B12 0  0 ]
     [  0  |  0 -I  0 ]
 X = [----------------]
     [ B21 | B22 0  0 ]
     [  0  |  0  0  I ]

                               [  C | -S  0  0 ]
                   [ U1 |    ] [  0 |  0 -I  0 ] [ V1 |    ]**T
                 = [---------] [---------------] [---------]   .
                   [    | U2 ] [  S |  C  0  0 ] [    | V2 ]
                               [  0 |  0  0  I ]

 X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
 than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
 transposed and/or permuted. This can be done in constant time using
 the TRANS and SIGNS options. See DORCSD for details.)

 The bidiagonal matrices B11, B12, B21, and B22 are represented
 implicitly by angles THETA(1:Q) and PHI(1:Q-1).

 The orthogonal matrices U1, U2, V1T, and V2T are input/output.
 The input matrices are pre- or post-multiplied by the appropriate
 singular vector matrices.

Parameters

[in]	JOBU1	JOBU1 is CHARACTER = 'Y': U1 is updated; otherwise: U1 is not updated.
[in]	JOBU2	JOBU2 is CHARACTER = 'Y': U2 is updated; otherwise: U2 is not updated.
[in]	JOBV1T	JOBV1T is CHARACTER = 'Y': V1T is updated; otherwise: V1T is not updated.
[in]	JOBV2T	JOBV2T is CHARACTER = 'Y': V2T is updated; otherwise: V2T is not updated.
[in]	TRANS	TRANS is CHARACTER = 'T': X, U1, U2, V1T, and V2T are stored in row-major order; otherwise: X, U1, U2, V1T, and V2T are stored in column- major order.
[in]	M	M is INTEGER The number of rows and columns in X, the orthogonal matrix in bidiagonal-block form.
[in]	P	P is INTEGER The number of rows in the top-left block of X. 0 <= P <= M.
[in]	Q	Q is INTEGER The number of columns in the top-left block of X. 0 <= Q <= MIN(P,M-P,M-Q).
[in,out]	THETA	THETA is DOUBLE PRECISION array, dimension (Q) On entry, the angles THETA(1),...,THETA(Q) that, along with PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block form. On exit, the angles whose cosines and sines define the diagonal blocks in the CS decomposition.
[in,out]	PHI	PHI is DOUBLE PRECISION array, dimension (Q-1) The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),..., THETA(Q), define the matrix in bidiagonal-block form.
[in,out]	U1	U1 is DOUBLE PRECISION array, dimension (LDU1,P) On entry, a P-by-P matrix. On exit, U1 is postmultiplied by the left singular vector matrix common to [ B11 ; 0 ] and [ B12 0 0 ; 0 -I 0 0 ].
[in]	LDU1	LDU1 is INTEGER The leading dimension of the array U1, LDU1 >= MAX(1,P).
[in,out]	U2	U2 is DOUBLE PRECISION array, dimension (LDU2,M-P) On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is postmultiplied by the left singular vector matrix common to [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
[in]	LDU2	LDU2 is INTEGER The leading dimension of the array U2, LDU2 >= MAX(1,M-P).
[in,out]	V1T	V1T is DOUBLE PRECISION array, dimension (LDV1T,Q) On entry, a Q-by-Q matrix. On exit, V1T is premultiplied by the transpose of the right singular vector matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
[in]	LDV1T	LDV1T is INTEGER The leading dimension of the array V1T, LDV1T >= MAX(1,Q).
[in,out]	V2T	V2T is DOUBLE PRECISION array, dimenison (LDV2T,M-Q) On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is premultiplied by the transpose of the right singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and [ B22 0 0 ; 0 0 I ].
[in]	LDV2T	LDV2T is INTEGER The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q).
[out]	B11D	B11D is DOUBLE PRECISION array, dimension (Q) When DBBCSD converges, B11D contains the cosines of THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then B11D contains the diagonal of the partially reduced top-left block.
[out]	B11E	B11E is DOUBLE PRECISION array, dimension (Q-1) When DBBCSD converges, B11E contains zeros. If DBBCSD fails to converge, then B11E contains the superdiagonal of the partially reduced top-left block.
[out]	B12D	B12D is DOUBLE PRECISION array, dimension (Q) When DBBCSD converges, B12D contains the negative sines of THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then B12D contains the diagonal of the partially reduced top-right block.
[out]	B12E	B12E is DOUBLE PRECISION array, dimension (Q-1) When DBBCSD converges, B12E contains zeros. If DBBCSD fails to converge, then B12E contains the subdiagonal of the partially reduced top-right block.
[out]	B21D	B21D is DOUBLE PRECISION array, dimension (Q) When DBBCSD converges, B21D contains the negative sines of THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then B21D contains the diagonal of the partially reduced bottom-left block.
[out]	B21E	B21E is DOUBLE PRECISION array, dimension (Q-1) When DBBCSD converges, B21E contains zeros. If DBBCSD fails to converge, then B21E contains the subdiagonal of the partially reduced bottom-left block.
[out]	B22D	B22D is DOUBLE PRECISION array, dimension (Q) When DBBCSD converges, B22D contains the negative sines of THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then B22D contains the diagonal of the partially reduced bottom-right block.
[out]	B22E	B22E is DOUBLE PRECISION array, dimension (Q-1) When DBBCSD converges, B22E contains zeros. If DBBCSD fails to converge, then B22E contains the subdiagonal of the partially reduced bottom-right block.
[out]	WORK	WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]	LWORK	LWORK is INTEGER The dimension of the array WORK. LWORK >= MAX(1,8*Q). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the work array, and no error message related to LWORK is issued by XERBLA.
[out]	INFO	INFO is INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if DBBCSD did not converge, INFO specifies the number of nonzero entries in PHI, and B11D, B11E, etc., contain the partially reduced matrix.

Internal Parameters:

  TOLMUL  DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8)))
          TOLMUL controls the convergence criterion of the QR loop.
          Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
          are within TOLMUL*EPS of either bound.

References:: [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. Algorithms, 50(1):33-65, 2009.

Author: Univ. of Tennessee; Univ. of California Berkeley; Univ. of Colorado Denver; NAG Ltd.

Date: June 2016

Definition at line 334 of file dbbcsd.f.

 *
 *  -- LAPACK computational routine (version 3.6.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     June 2016
 *
 *     .. Scalar Arguments ..
       CHARACTER          jobu1, jobu2, jobv1t, jobv2t, trans
       INTEGER            info, ldu1, ldu2, ldv1t, ldv2t, lwork, m, p, q
 *     ..
 *     .. Array Arguments ..
       DOUBLE PRECISION   b11d( * ), b11e( * ), b12d( * ), b12e( * ),
      $                   b21d( * ), b21e( * ), b22d( * ), b22e( * ),
      $                   phi( * ), theta( * ), work( * )
       DOUBLE PRECISION   u1( ldu1, * ), u2( ldu2, * ), v1t( ldv1t, * ),
      $                   v2t( ldv2t, * )
 *     ..
 *
 *  ===================================================================
 *
 *     .. Parameters ..
       INTEGER            maxitr
       parameter                ( maxitr = 6 )
       DOUBLE PRECISION   hundred, meighth, one, piover2, ten, zero
       parameter                ( hundred = 100.0d0, meighth = -0.125d0,
      $                     one = 1.0d0, piover2 = 1.57079632679489662d0,
      $                     ten = 10.0d0, zero = 0.0d0 )
       DOUBLE PRECISION   negone
       parameter                ( negone = -1.0d0 )
 *     ..
 *     .. Local Scalars ..
       LOGICAL            colmajor, lquery, restart11, restart12,
      $                   restart21, restart22, wantu1, wantu2, wantv1t,
      $                   wantv2t
       INTEGER            i, imin, imax, iter, iu1cs, iu1sn, iu2cs,
      $                   iu2sn, iv1tcs, iv1tsn, iv2tcs, iv2tsn, j,
      $                   lworkmin, lworkopt, maxit, mini
       DOUBLE PRECISION   b11bulge, b12bulge, b21bulge, b22bulge, dummy,
      $                   eps, mu, nu, r, sigma11, sigma21,
      $                   temp, thetamax, thetamin, thresh, tol, tolmul,
      $                   unfl, x1, x2, y1, y2
 *
 *     .. External Subroutines ..
       EXTERNAL           dlasr, dscal, dswap, dlartgp, dlartgs, dlas2,
      $                   xerbla
 *     ..
 *     .. External Functions ..
       DOUBLE PRECISION   dlamch
       LOGICAL            lsame
       EXTERNAL           lsame, dlamch
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          abs, atan2, cos, max, min, sin, sqrt
 *     ..
 *     .. Executable Statements ..
 *
 *     Test input arguments
 *
       info = 0
       lquery = lwork .EQ. -1
       wantu1 = lsame( jobu1, 'Y' )
       wantu2 = lsame( jobu2, 'Y' )
       wantv1t = lsame( jobv1t, 'Y' )
       wantv2t = lsame( jobv2t, 'Y' )
       colmajor = .NOT. lsame( trans, 'T' )
 *
       IF( m .LT. 0 ) THEN
          info = -6
       ELSE IF( p .LT. 0 .OR. p .GT. m ) THEN
          info = -7
       ELSE IF( q .LT. 0 .OR. q .GT. m ) THEN
          info = -8
       ELSE IF( q .GT. p .OR. q .GT. m-p .OR. q .GT. m-q ) THEN
          info = -8
       ELSE IF( wantu1 .AND. ldu1 .LT. p ) THEN
          info = -12
       ELSE IF( wantu2 .AND. ldu2 .LT. m-p ) THEN
          info = -14
       ELSE IF( wantv1t .AND. ldv1t .LT. q ) THEN
          info = -16
       ELSE IF( wantv2t .AND. ldv2t .LT. m-q ) THEN
          info = -18
       END IF
 *
 *     Quick return if Q = 0
 *
       IF( info .EQ. 0 .AND. q .EQ. 0 ) THEN
          lworkmin = 1
          work(1) = lworkmin
          RETURN
       END IF
 *
 *     Compute workspace
 *
       IF( info .EQ. 0 ) THEN
          iu1cs = 1
          iu1sn = iu1cs + q
          iu2cs = iu1sn + q
          iu2sn = iu2cs + q
          iv1tcs = iu2sn + q
          iv1tsn = iv1tcs + q
          iv2tcs = iv1tsn + q
          iv2tsn = iv2tcs + q
          lworkopt = iv2tsn + q - 1
          lworkmin = lworkopt
          work(1) = lworkopt
          IF( lwork .LT. lworkmin .AND. .NOT. lquery ) THEN
             info = -28
          END IF
       END IF
 *
       IF( info .NE. 0 ) THEN
          CALL xerbla( 'DBBCSD', -info )
          RETURN
       ELSE IF( lquery ) THEN
          RETURN
       END IF
 *
 *     Get machine constants
 *
       eps = dlamch( 'Epsilon' )
       unfl = dlamch( 'Safe minimum' )
       tolmul = max( ten, min( hundred, eps**meighth ) )
       tol = tolmul*eps
       thresh = max( tol, maxitr*q*q*unfl )
 *
 *     Test for negligible sines or cosines
 *
       DO i = 1, q
          IF( theta(i) .LT. thresh ) THEN
             theta(i) = zero
          ELSE IF( theta(i) .GT. piover2-thresh ) THEN
             theta(i) = piover2
          END IF
       END DO
       DO i = 1, q-1
          IF( phi(i) .LT. thresh ) THEN
             phi(i) = zero
          ELSE IF( phi(i) .GT. piover2-thresh ) THEN
             phi(i) = piover2
          END IF
       END DO
 *
 *     Initial deflation
 *
       imax = q
       DO WHILE( imax .GT. 1 )
          IF( phi(imax-1) .NE. zero ) THEN
             EXIT
          END IF
          imax = imax - 1
       END DO
       imin = imax - 1
       IF  ( imin .GT. 1 ) THEN
          DO WHILE( phi(imin-1) .NE. zero )
             imin = imin - 1
             IF  ( imin .LE. 1 ) EXIT
          END DO
       END IF
 *
 *     Initialize iteration counter
 *
       maxit = maxitr*q*q
       iter = 0
 *
 *     Begin main iteration loop
 *
       DO WHILE( imax .GT. 1 )
 *
 *        Compute the matrix entries
 *
          b11d(imin) = cos( theta(imin) )
          b21d(imin) = -sin( theta(imin) )
          DO i = imin, imax - 1
             b11e(i) = -sin( theta(i) ) * sin( phi(i) )
             b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) )
             b12d(i) = sin( theta(i) ) * cos( phi(i) )
             b12e(i) = cos( theta(i+1) ) * sin( phi(i) )
             b21e(i) = -cos( theta(i) ) * sin( phi(i) )
             b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) )
             b22d(i) = cos( theta(i) ) * cos( phi(i) )
             b22e(i) = -sin( theta(i+1) ) * sin( phi(i) )
          END DO
          b12d(imax) = sin( theta(imax) )
          b22d(imax) = cos( theta(imax) )
 *
 *        Abort if not converging; otherwise, increment ITER
 *
          IF( iter .GT. maxit ) THEN
             info = 0
             DO i = 1, q
                IF( phi(i) .NE. zero )
      $            info = info + 1
             END DO
             RETURN
          END IF
 *
          iter = iter + imax - imin
 *
 *        Compute shifts
 *
          thetamax = theta(imin)
          thetamin = theta(imin)
          DO i = imin+1, imax
             IF( theta(i) > thetamax )
      $         thetamax = theta(i)
             IF( theta(i) < thetamin )
      $         thetamin = theta(i)
          END DO
 *
          IF( thetamax .GT. piover2 - thresh ) THEN
 *
 *           Zero on diagonals of B11 and B22; induce deflation with a
 *           zero shift
 *
             mu = zero
             nu = one
 *
          ELSE IF( thetamin .LT. thresh ) THEN
 *
 *           Zero on diagonals of B12 and B22; induce deflation with a
 *           zero shift
 *
             mu = one
             nu = zero
 *
          ELSE
 *
 *           Compute shifts for B11 and B21 and use the lesser
 *
             CALL dlas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,
      $                  dummy )
             CALL dlas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,
      $                  dummy )
 *
             IF( sigma11 .LE. sigma21 ) THEN
                mu = sigma11
                nu = sqrt( one - mu**2 )
                IF( mu .LT. thresh ) THEN
                   mu = zero
                   nu = one
                END IF
             ELSE
                nu = sigma21
                mu = sqrt( 1.0 - nu**2 )
                IF( nu .LT. thresh ) THEN
                   mu = one
                   nu = zero
                END IF
             END IF
          END IF
 *
 *        Rotate to produce bulges in B11 and B21
 *
          IF( mu .LE. nu ) THEN
             CALL dlartgs( b11d(imin), b11e(imin), mu,
      $                    work(iv1tcs+imin-1), work(iv1tsn+imin-1) )
          ELSE
             CALL dlartgs( b21d(imin), b21e(imin), nu,
      $                    work(iv1tcs+imin-1), work(iv1tsn+imin-1) )
          END IF
 *
          temp = work(iv1tcs+imin-1)*b11d(imin) +
      $          work(iv1tsn+imin-1)*b11e(imin)
          b11e(imin) = work(iv1tcs+imin-1)*b11e(imin) -
      $                work(iv1tsn+imin-1)*b11d(imin)
          b11d(imin) = temp
          b11bulge = work(iv1tsn+imin-1)*b11d(imin+1)
          b11d(imin+1) = work(iv1tcs+imin-1)*b11d(imin+1)
          temp = work(iv1tcs+imin-1)*b21d(imin) +
      $          work(iv1tsn+imin-1)*b21e(imin)
          b21e(imin) = work(iv1tcs+imin-1)*b21e(imin) -
      $                work(iv1tsn+imin-1)*b21d(imin)
          b21d(imin) = temp
          b21bulge = work(iv1tsn+imin-1)*b21d(imin+1)
          b21d(imin+1) = work(iv1tcs+imin-1)*b21d(imin+1)
 *
 *        Compute THETA(IMIN)
 *
          theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),
      $                   sqrt( b11d(imin)**2+b11bulge**2 ) )
 *
 *        Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
 *
          IF( b11d(imin)**2+b11bulge**2 .GT. thresh**2 ) THEN
             CALL dlartgp( b11bulge, b11d(imin), work(iu1sn+imin-1),
      $                    work(iu1cs+imin-1), r )
          ELSE IF( mu .LE. nu ) THEN
             CALL dlartgs( b11e( imin ), b11d( imin + 1 ), mu,
      $                    work(iu1cs+imin-1), work(iu1sn+imin-1) )
          ELSE
             CALL dlartgs( b12d( imin ), b12e( imin ), nu,
      $                    work(iu1cs+imin-1), work(iu1sn+imin-1) )
          END IF
          IF( b21d(imin)**2+b21bulge**2 .GT. thresh**2 ) THEN
             CALL dlartgp( b21bulge, b21d(imin), work(iu2sn+imin-1),
      $                    work(iu2cs+imin-1), r )
          ELSE IF( nu .LT. mu ) THEN
             CALL dlartgs( b21e( imin ), b21d( imin + 1 ), nu,
      $                    work(iu2cs+imin-1), work(iu2sn+imin-1) )
          ELSE
             CALL dlartgs( b22d(imin), b22e(imin), mu,
      $                    work(iu2cs+imin-1), work(iu2sn+imin-1) )
          END IF
          work(iu2cs+imin-1) = -work(iu2cs+imin-1)
          work(iu2sn+imin-1) = -work(iu2sn+imin-1)
 *
          temp = work(iu1cs+imin-1)*b11e(imin) +
      $          work(iu1sn+imin-1)*b11d(imin+1)
          b11d(imin+1) = work(iu1cs+imin-1)*b11d(imin+1) -
      $                  work(iu1sn+imin-1)*b11e(imin)
          b11e(imin) = temp
          IF( imax .GT. imin+1 ) THEN
             b11bulge = work(iu1sn+imin-1)*b11e(imin+1)
             b11e(imin+1) = work(iu1cs+imin-1)*b11e(imin+1)
          END IF
          temp = work(iu1cs+imin-1)*b12d(imin) +
      $          work(iu1sn+imin-1)*b12e(imin)
          b12e(imin) = work(iu1cs+imin-1)*b12e(imin) -
      $                work(iu1sn+imin-1)*b12d(imin)
          b12d(imin) = temp
          b12bulge = work(iu1sn+imin-1)*b12d(imin+1)
          b12d(imin+1) = work(iu1cs+imin-1)*b12d(imin+1)
          temp = work(iu2cs+imin-1)*b21e(imin) +
      $          work(iu2sn+imin-1)*b21d(imin+1)
          b21d(imin+1) = work(iu2cs+imin-1)*b21d(imin+1) -
      $                  work(iu2sn+imin-1)*b21e(imin)
          b21e(imin) = temp
          IF( imax .GT. imin+1 ) THEN
             b21bulge = work(iu2sn+imin-1)*b21e(imin+1)
             b21e(imin+1) = work(iu2cs+imin-1)*b21e(imin+1)
          END IF
          temp = work(iu2cs+imin-1)*b22d(imin) +
      $          work(iu2sn+imin-1)*b22e(imin)
          b22e(imin) = work(iu2cs+imin-1)*b22e(imin) -
      $                work(iu2sn+imin-1)*b22d(imin)
          b22d(imin) = temp
          b22bulge = work(iu2sn+imin-1)*b22d(imin+1)
          b22d(imin+1) = work(iu2cs+imin-1)*b22d(imin+1)
 *
 *        Inner loop: chase bulges from B11(IMIN,IMIN+2),
 *        B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
 *        bottom-right
 *
          DO i = imin+1, imax-1
 *
 *           Compute PHI(I-1)
 *
             x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1)
             x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge
             y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1)
             y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge
 *
             phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) )
 *
 *           Determine if there are bulges to chase or if a new direct
 *           summand has been reached
 *
             restart11 = b11e(i-1)**2 + b11bulge**2 .LE. thresh**2
             restart21 = b21e(i-1)**2 + b21bulge**2 .LE. thresh**2
             restart12 = b12d(i-1)**2 + b12bulge**2 .LE. thresh**2
             restart22 = b22d(i-1)**2 + b22bulge**2 .LE. thresh**2
 *
 *           If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
 *           B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
 *           chasing by applying the original shift again.
 *
             IF( .NOT. restart11 .AND. .NOT. restart21 ) THEN
                CALL dlartgp( x2, x1, work(iv1tsn+i-1), work(iv1tcs+i-1),
      $                       r )
             ELSE IF( .NOT. restart11 .AND. restart21 ) THEN
                CALL dlartgp( b11bulge, b11e(i-1), work(iv1tsn+i-1),
      $                       work(iv1tcs+i-1), r )
             ELSE IF( restart11 .AND. .NOT. restart21 ) THEN
                CALL dlartgp( b21bulge, b21e(i-1), work(iv1tsn+i-1),
      $                       work(iv1tcs+i-1), r )
             ELSE IF( mu .LE. nu ) THEN
                CALL dlartgs( b11d(i), b11e(i), mu, work(iv1tcs+i-1),
      $                       work(iv1tsn+i-1) )
             ELSE
                CALL dlartgs( b21d(i), b21e(i), nu, work(iv1tcs+i-1),
      $                       work(iv1tsn+i-1) )
             END IF
             work(iv1tcs+i-1) = -work(iv1tcs+i-1)
             work(iv1tsn+i-1) = -work(iv1tsn+i-1)
             IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN
                CALL dlartgp( y2, y1, work(iv2tsn+i-1-1),
      $                       work(iv2tcs+i-1-1), r )
             ELSE IF( .NOT. restart12 .AND. restart22 ) THEN
                CALL dlartgp( b12bulge, b12d(i-1), work(iv2tsn+i-1-1),
      $                       work(iv2tcs+i-1-1), r )
             ELSE IF( restart12 .AND. .NOT. restart22 ) THEN
                CALL dlartgp( b22bulge, b22d(i-1), work(iv2tsn+i-1-1),
      $                       work(iv2tcs+i-1-1), r )
             ELSE IF( nu .LT. mu ) THEN
                CALL dlartgs( b12e(i-1), b12d(i), nu, work(iv2tcs+i-1-1),
      $                       work(iv2tsn+i-1-1) )
             ELSE
                CALL dlartgs( b22e(i-1), b22d(i), mu, work(iv2tcs+i-1-1),
      $                       work(iv2tsn+i-1-1) )
             END IF
 *
             temp = work(iv1tcs+i-1)*b11d(i) + work(iv1tsn+i-1)*b11e(i)
             b11e(i) = work(iv1tcs+i-1)*b11e(i) -
      $                work(iv1tsn+i-1)*b11d(i)
             b11d(i) = temp
             b11bulge = work(iv1tsn+i-1)*b11d(i+1)
             b11d(i+1) = work(iv1tcs+i-1)*b11d(i+1)
             temp = work(iv1tcs+i-1)*b21d(i) + work(iv1tsn+i-1)*b21e(i)
             b21e(i) = work(iv1tcs+i-1)*b21e(i) -
      $                work(iv1tsn+i-1)*b21d(i)
             b21d(i) = temp
             b21bulge = work(iv1tsn+i-1)*b21d(i+1)
             b21d(i+1) = work(iv1tcs+i-1)*b21d(i+1)
             temp = work(iv2tcs+i-1-1)*b12e(i-1) +
      $             work(iv2tsn+i-1-1)*b12d(i)
             b12d(i) = work(iv2tcs+i-1-1)*b12d(i) -
      $                work(iv2tsn+i-1-1)*b12e(i-1)
             b12e(i-1) = temp
             b12bulge = work(iv2tsn+i-1-1)*b12e(i)
             b12e(i) = work(iv2tcs+i-1-1)*b12e(i)
             temp = work(iv2tcs+i-1-1)*b22e(i-1) +
      $             work(iv2tsn+i-1-1)*b22d(i)
             b22d(i) = work(iv2tcs+i-1-1)*b22d(i) -
      $                work(iv2tsn+i-1-1)*b22e(i-1)
             b22e(i-1) = temp
             b22bulge = work(iv2tsn+i-1-1)*b22e(i)
             b22e(i) = work(iv2tcs+i-1-1)*b22e(i)
 *
 *           Compute THETA(I)
 *
             x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1)
             x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge
             y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1)
             y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge
 *
             theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) )
 *
 *           Determine if there are bulges to chase or if a new direct
 *           summand has been reached
 *
             restart11 =   b11d(i)**2 + b11bulge**2 .LE. thresh**2
             restart12 = b12e(i-1)**2 + b12bulge**2 .LE. thresh**2
             restart21 =   b21d(i)**2 + b21bulge**2 .LE. thresh**2
             restart22 = b22e(i-1)**2 + b22bulge**2 .LE. thresh**2
 *
 *           If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
 *           B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
 *           chasing by applying the original shift again.
 *
             IF( .NOT. restart11 .AND. .NOT. restart12 ) THEN
                CALL dlartgp( x2, x1, work(iu1sn+i-1), work(iu1cs+i-1),
      $                       r )
             ELSE IF( .NOT. restart11 .AND. restart12 ) THEN
                CALL dlartgp( b11bulge, b11d(i), work(iu1sn+i-1),
      $                       work(iu1cs+i-1), r )
             ELSE IF( restart11 .AND. .NOT. restart12 ) THEN
                CALL dlartgp( b12bulge, b12e(i-1), work(iu1sn+i-1),
      $                       work(iu1cs+i-1), r )
             ELSE IF( mu .LE. nu ) THEN
                CALL dlartgs( b11e(i), b11d(i+1), mu, work(iu1cs+i-1),
      $                       work(iu1sn+i-1) )
             ELSE
                CALL dlartgs( b12d(i), b12e(i), nu, work(iu1cs+i-1),
      $                       work(iu1sn+i-1) )
             END IF
             IF( .NOT. restart21 .AND. .NOT. restart22 ) THEN
                CALL dlartgp( y2, y1, work(iu2sn+i-1), work(iu2cs+i-1),
      $                       r )
             ELSE IF( .NOT. restart21 .AND. restart22 ) THEN
                CALL dlartgp( b21bulge, b21d(i), work(iu2sn+i-1),
      $                       work(iu2cs+i-1), r )
             ELSE IF( restart21 .AND. .NOT. restart22 ) THEN
                CALL dlartgp( b22bulge, b22e(i-1), work(iu2sn+i-1),
      $                       work(iu2cs+i-1), r )
             ELSE IF( nu .LT. mu ) THEN
                CALL dlartgs( b21e(i), b21e(i+1), nu, work(iu2cs+i-1),
      $                       work(iu2sn+i-1) )
             ELSE
                CALL dlartgs( b22d(i), b22e(i), mu, work(iu2cs+i-1),
      $                       work(iu2sn+i-1) )
             END IF
             work(iu2cs+i-1) = -work(iu2cs+i-1)
             work(iu2sn+i-1) = -work(iu2sn+i-1)
 *
             temp = work(iu1cs+i-1)*b11e(i) + work(iu1sn+i-1)*b11d(i+1)
             b11d(i+1) = work(iu1cs+i-1)*b11d(i+1) -
      $                  work(iu1sn+i-1)*b11e(i)
             b11e(i) = temp
             IF( i .LT. imax - 1 ) THEN
                b11bulge = work(iu1sn+i-1)*b11e(i+1)
                b11e(i+1) = work(iu1cs+i-1)*b11e(i+1)
             END IF
             temp = work(iu2cs+i-1)*b21e(i) + work(iu2sn+i-1)*b21d(i+1)
             b21d(i+1) = work(iu2cs+i-1)*b21d(i+1) -
      $                  work(iu2sn+i-1)*b21e(i)
             b21e(i) = temp
             IF( i .LT. imax - 1 ) THEN
                b21bulge = work(iu2sn+i-1)*b21e(i+1)
                b21e(i+1) = work(iu2cs+i-1)*b21e(i+1)
             END IF
             temp = work(iu1cs+i-1)*b12d(i) + work(iu1sn+i-1)*b12e(i)
             b12e(i) = work(iu1cs+i-1)*b12e(i) - work(iu1sn+i-1)*b12d(i)
             b12d(i) = temp
             b12bulge = work(iu1sn+i-1)*b12d(i+1)
             b12d(i+1) = work(iu1cs+i-1)*b12d(i+1)
             temp = work(iu2cs+i-1)*b22d(i) + work(iu2sn+i-1)*b22e(i)
             b22e(i) = work(iu2cs+i-1)*b22e(i) - work(iu2sn+i-1)*b22d(i)
             b22d(i) = temp
             b22bulge = work(iu2sn+i-1)*b22d(i+1)
             b22d(i+1) = work(iu2cs+i-1)*b22d(i+1)
 *
          END DO
 *
 *        Compute PHI(IMAX-1)
 *
          x1 = sin(theta(imax-1))*b11e(imax-1) +
      $        cos(theta(imax-1))*b21e(imax-1)
          y1 = sin(theta(imax-1))*b12d(imax-1) +
      $        cos(theta(imax-1))*b22d(imax-1)
          y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge
 *
          phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) )
 *
 *        Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
 *
          restart12 = b12d(imax-1)**2 + b12bulge**2 .LE. thresh**2
          restart22 = b22d(imax-1)**2 + b22bulge**2 .LE. thresh**2
 *
          IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN
             CALL dlartgp( y2, y1, work(iv2tsn+imax-1-1),
      $                    work(iv2tcs+imax-1-1), r )
          ELSE IF( .NOT. restart12 .AND. restart22 ) THEN
             CALL dlartgp( b12bulge, b12d(imax-1), work(iv2tsn+imax-1-1),
      $                    work(iv2tcs+imax-1-1), r )
          ELSE IF( restart12 .AND. .NOT. restart22 ) THEN
             CALL dlartgp( b22bulge, b22d(imax-1), work(iv2tsn+imax-1-1),
      $                    work(iv2tcs+imax-1-1), r )
          ELSE IF( nu .LT. mu ) THEN
             CALL dlartgs( b12e(imax-1), b12d(imax), nu,
      $                    work(iv2tcs+imax-1-1), work(iv2tsn+imax-1-1) )
          ELSE
             CALL dlartgs( b22e(imax-1), b22d(imax), mu,
      $                    work(iv2tcs+imax-1-1), work(iv2tsn+imax-1-1) )
          END IF
 *
          temp = work(iv2tcs+imax-1-1)*b12e(imax-1) +
      $          work(iv2tsn+imax-1-1)*b12d(imax)
          b12d(imax) = work(iv2tcs+imax-1-1)*b12d(imax) -
      $                work(iv2tsn+imax-1-1)*b12e(imax-1)
          b12e(imax-1) = temp
          temp = work(iv2tcs+imax-1-1)*b22e(imax-1) +
      $          work(iv2tsn+imax-1-1)*b22d(imax)
          b22d(imax) = work(iv2tcs+imax-1-1)*b22d(imax) -
      $                work(iv2tsn+imax-1-1)*b22e(imax-1)
          b22e(imax-1) = temp
 *
 *        Update singular vectors
 *
          IF( wantu1 ) THEN
             IF( colmajor ) THEN
                CALL dlasr( 'R', 'V', 'F', p, imax-imin+1,
      $                     work(iu1cs+imin-1), work(iu1sn+imin-1),
      $                     u1(1,imin), ldu1 )
             ELSE
                CALL dlasr( 'L', 'V', 'F', imax-imin+1, p,
      $                     work(iu1cs+imin-1), work(iu1sn+imin-1),
      $                     u1(imin,1), ldu1 )
             END IF
          END IF
          IF( wantu2 ) THEN
             IF( colmajor ) THEN
                CALL dlasr( 'R', 'V', 'F', m-p, imax-imin+1,
      $                     work(iu2cs+imin-1), work(iu2sn+imin-1),
      $                     u2(1,imin), ldu2 )
             ELSE
                CALL dlasr( 'L', 'V', 'F', imax-imin+1, m-p,
      $                     work(iu2cs+imin-1), work(iu2sn+imin-1),
      $                     u2(imin,1), ldu2 )
             END IF
          END IF
          IF( wantv1t ) THEN
             IF( colmajor ) THEN
                CALL dlasr( 'L', 'V', 'F', imax-imin+1, q,
      $                     work(iv1tcs+imin-1), work(iv1tsn+imin-1),
      $                     v1t(imin,1), ldv1t )
             ELSE
                CALL dlasr( 'R', 'V', 'F', q, imax-imin+1,
      $                     work(iv1tcs+imin-1), work(iv1tsn+imin-1),
      $                     v1t(1,imin), ldv1t )
             END IF
          END IF
          IF( wantv2t ) THEN
             IF( colmajor ) THEN
                CALL dlasr( 'L', 'V', 'F', imax-imin+1, m-q,
      $                     work(iv2tcs+imin-1), work(iv2tsn+imin-1),
      $                     v2t(imin,1), ldv2t )
             ELSE
                CALL dlasr( 'R', 'V', 'F', m-q, imax-imin+1,
      $                     work(iv2tcs+imin-1), work(iv2tsn+imin-1),
      $                     v2t(1,imin), ldv2t )
             END IF
          END IF
 *
 *        Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
 *
          IF( b11e(imax-1)+b21e(imax-1) .GT. 0 ) THEN
             b11d(imax) = -b11d(imax)
             b21d(imax) = -b21d(imax)
             IF( wantv1t ) THEN
                IF( colmajor ) THEN
                   CALL dscal( q, negone, v1t(imax,1), ldv1t )
                ELSE
                   CALL dscal( q, negone, v1t(1,imax), 1 )
                END IF
             END IF
          END IF
 *
 *        Compute THETA(IMAX)
 *
          x1 = cos(phi(imax-1))*b11d(imax) +
      $        sin(phi(imax-1))*b12e(imax-1)
          y1 = cos(phi(imax-1))*b21d(imax) +
      $        sin(phi(imax-1))*b22e(imax-1)
 *
          theta(imax) = atan2( abs(y1), abs(x1) )
 *
 *        Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
 *        and B22(IMAX,IMAX-1)
 *
          IF( b11d(imax)+b12e(imax-1) .LT. 0 ) THEN
             b12d(imax) = -b12d(imax)
             IF( wantu1 ) THEN
                IF( colmajor ) THEN
                   CALL dscal( p, negone, u1(1,imax), 1 )
                ELSE
                   CALL dscal( p, negone, u1(imax,1), ldu1 )
                END IF
             END IF
          END IF
          IF( b21d(imax)+b22e(imax-1) .GT. 0 ) THEN
             b22d(imax) = -b22d(imax)
             IF( wantu2 ) THEN
                IF( colmajor ) THEN
                   CALL dscal( m-p, negone, u2(1,imax), 1 )
                ELSE
                   CALL dscal( m-p, negone, u2(imax,1), ldu2 )
                END IF
             END IF
          END IF
 *
 *        Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
 *
          IF( b12d(imax)+b22d(imax) .LT. 0 ) THEN
             IF( wantv2t ) THEN
                IF( colmajor ) THEN
                   CALL dscal( m-q, negone, v2t(imax,1), ldv2t )
                ELSE
                   CALL dscal( m-q, negone, v2t(1,imax), 1 )
                END IF
             END IF
          END IF
 *
 *        Test for negligible sines or cosines
 *
          DO i = imin, imax
             IF( theta(i) .LT. thresh ) THEN
                theta(i) = zero
             ELSE IF( theta(i) .GT. piover2-thresh ) THEN
                theta(i) = piover2
             END IF
          END DO
          DO i = imin, imax-1
             IF( phi(i) .LT. thresh ) THEN
                phi(i) = zero
             ELSE IF( phi(i) .GT. piover2-thresh ) THEN
                phi(i) = piover2
             END IF
          END DO
 *
 *        Deflate
 *
          IF (imax .GT. 1) THEN
             DO WHILE( phi(imax-1) .EQ. zero )
                imax = imax - 1
                IF (imax .LE. 1) EXIT
             END DO
          END IF
          IF( imin .GT. imax - 1 )
      $      imin = imax - 1
          IF (imin .GT. 1) THEN
             DO WHILE (phi(imin-1) .NE. zero)
                 imin = imin - 1
                 IF (imin .LE. 1) EXIT
             END DO
          END IF
 *
 *        Repeat main iteration loop
 *
       END DO
 *
 *     Postprocessing: order THETA from least to greatest
 *
       DO i = 1, q
 *
          mini = i
          thetamin = theta(i)
          DO j = i+1, q
             IF( theta(j) .LT. thetamin ) THEN
                mini = j
                thetamin = theta(j)
             END IF
          END DO
 *
          IF( mini .NE. i ) THEN
             theta(mini) = theta(i)
             theta(i) = thetamin
             IF( colmajor ) THEN
                IF( wantu1 )
      $            CALL dswap( p, u1(1,i), 1, u1(1,mini), 1 )
                IF( wantu2 )
      $            CALL dswap( m-p, u2(1,i), 1, u2(1,mini), 1 )
                IF( wantv1t )
      $            CALL dswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t )
                IF( wantv2t )
      $            CALL dswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),
      $               ldv2t )
             ELSE
                IF( wantu1 )
      $            CALL dswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 )
                IF( wantu2 )
      $            CALL dswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 )
                IF( wantv1t )
      $            CALL dswap( q, v1t(1,i), 1, v1t(1,mini), 1 )
                IF( wantv2t )
      $            CALL dswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 )
             END IF
          END IF
 *
       END DO
 *
       RETURN
 *
 *     End of DBBCSD
 *

Here is the call graph for this function:

Here is the caller graph for this function: