◆ zbbcsd()

subroutine zbbcsd	(	character	jobu1,
		character	jobu2,
		character	jobv1t,
		character	jobv2t,
		character	trans,
		integer	m,
		integer	p,
		integer	q,
		double precision, dimension( * )	theta,
		double precision, dimension( * )	phi,
		complex16, dimension( ldu1, )	u1,
		integer	ldu1,
		complex16, dimension( ldu2, )	u2,
		integer	ldu2,
		complex16, dimension( ldv1t, )	v1t,
		integer	ldv1t,
		complex16, dimension( ldv2t, )	v2t,
		integer	ldv2t,
		double precision, dimension( * )	b11d,
		double precision, dimension( * )	b11e,
		double precision, dimension( * )	b12d,
		double precision, dimension( * )	b12e,
		double precision, dimension( * )	b21d,
		double precision, dimension( * )	b21e,
		double precision, dimension( * )	b22d,
		double precision, dimension( * )	b22e,
		double precision, dimension( * )	rwork,
		integer	lrwork,
		integer	info
	)

ZBBCSD

Download ZBBCSD + dependencies [TGZ] [ZIP] [TXT]

Purpose:

 ZBBCSD computes the CS decomposition of a unitary matrix in
 bidiagonal-block form,


     [ B11 | B12 0  0 ]
     [  0  |  0 -I  0 ]
 X = [----------------]
     [ B21 | B22 0  0 ]
     [  0  |  0  0  I ]

                               [  C | -S  0  0 ]
                   [ U1 |    ] [  0 |  0 -I  0 ] [ V1 |    ]**H
                 = [---------] [---------------] [---------]   .
                   [    | U2 ] [  S |  C  0  0 ] [    | V2 ]
                               [  0 |  0  0  I ]

 X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
 than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
 transposed and/or permuted. This can be done in constant time using
 the TRANS and SIGNS options. See ZUNCSD for details.)

 The bidiagonal matrices B11, B12, B21, and B22 are represented
 implicitly by angles THETA(1:Q) and PHI(1:Q-1).

 The unitary matrices U1, U2, V1T, and V2T are input/output.
 The input matrices are pre- or post-multiplied by the appropriate
 singular vector matrices.

Parameters

[in]	JOBU1	JOBU1 is CHARACTER = 'Y': U1 is updated; otherwise: U1 is not updated.
[in]	JOBU2	JOBU2 is CHARACTER = 'Y': U2 is updated; otherwise: U2 is not updated.
[in]	JOBV1T	JOBV1T is CHARACTER = 'Y': V1T is updated; otherwise: V1T is not updated.
[in]	JOBV2T	JOBV2T is CHARACTER = 'Y': V2T is updated; otherwise: V2T is not updated.
[in]	TRANS	TRANS is CHARACTER = 'T': X, U1, U2, V1T, and V2T are stored in row-major order; otherwise: X, U1, U2, V1T, and V2T are stored in column- major order.
[in]	M	M is INTEGER The number of rows and columns in X, the unitary matrix in bidiagonal-block form.
[in]	P	P is INTEGER The number of rows in the top-left block of X. 0 <= P <= M.
[in]	Q	Q is INTEGER The number of columns in the top-left block of X. 0 <= Q <= MIN(P,M-P,M-Q).
[in,out]	THETA	THETA is DOUBLE PRECISION array, dimension (Q) On entry, the angles THETA(1),...,THETA(Q) that, along with PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block form. On exit, the angles whose cosines and sines define the diagonal blocks in the CS decomposition.
[in,out]	PHI	PHI is DOUBLE PRECISION array, dimension (Q-1) The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),..., THETA(Q), define the matrix in bidiagonal-block form.
[in,out]	U1	U1 is COMPLEX*16 array, dimension (LDU1,P) On entry, a P-by-P matrix. On exit, U1 is postmultiplied by the left singular vector matrix common to [ B11 ; 0 ] and [ B12 0 0 ; 0 -I 0 0 ].
[in]	LDU1	LDU1 is INTEGER The leading dimension of the array U1, LDU1 >= MAX(1,P).
[in,out]	U2	U2 is COMPLEX*16 array, dimension (LDU2,M-P) On entry, an (M-P)-by-(M-P) matrix. On exit, U2 is postmultiplied by the left singular vector matrix common to [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
[in]	LDU2	LDU2 is INTEGER The leading dimension of the array U2, LDU2 >= MAX(1,M-P).
[in,out]	V1T	V1T is COMPLEX*16 array, dimension (LDV1T,Q) On entry, a Q-by-Q matrix. On exit, V1T is premultiplied by the conjugate transpose of the right singular vector matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
[in]	LDV1T	LDV1T is INTEGER The leading dimension of the array V1T, LDV1T >= MAX(1,Q).
[in,out]	V2T	V2T is COMPLEX*16 array, dimension (LDV2T,M-Q) On entry, an (M-Q)-by-(M-Q) matrix. On exit, V2T is premultiplied by the conjugate transpose of the right singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and [ B22 0 0 ; 0 0 I ].
[in]	LDV2T	LDV2T is INTEGER The leading dimension of the array V2T, LDV2T >= MAX(1,M-Q).
[out]	B11D	B11D is DOUBLE PRECISION array, dimension (Q) When ZBBCSD converges, B11D contains the cosines of THETA(1), ..., THETA(Q). If ZBBCSD fails to converge, then B11D contains the diagonal of the partially reduced top-left block.
[out]	B11E	B11E is DOUBLE PRECISION array, dimension (Q-1) When ZBBCSD converges, B11E contains zeros. If ZBBCSD fails to converge, then B11E contains the superdiagonal of the partially reduced top-left block.
[out]	B12D	B12D is DOUBLE PRECISION array, dimension (Q) When ZBBCSD converges, B12D contains the negative sines of THETA(1), ..., THETA(Q). If ZBBCSD fails to converge, then B12D contains the diagonal of the partially reduced top-right block.
[out]	B12E	B12E is DOUBLE PRECISION array, dimension (Q-1) When ZBBCSD converges, B12E contains zeros. If ZBBCSD fails to converge, then B12E contains the subdiagonal of the partially reduced top-right block.
[out]	B21D	B21D is DOUBLE PRECISION array, dimension (Q) When ZBBCSD converges, B21D contains the negative sines of THETA(1), ..., THETA(Q). If ZBBCSD fails to converge, then B21D contains the diagonal of the partially reduced bottom-left block.
[out]	B21E	B21E is DOUBLE PRECISION array, dimension (Q-1) When ZBBCSD converges, B21E contains zeros. If ZBBCSD fails to converge, then B21E contains the subdiagonal of the partially reduced bottom-left block.
[out]	B22D	B22D is DOUBLE PRECISION array, dimension (Q) When ZBBCSD converges, B22D contains the negative sines of THETA(1), ..., THETA(Q). If ZBBCSD fails to converge, then B22D contains the diagonal of the partially reduced bottom-right block.
[out]	B22E	B22E is DOUBLE PRECISION array, dimension (Q-1) When ZBBCSD converges, B22E contains zeros. If ZBBCSD fails to converge, then B22E contains the subdiagonal of the partially reduced bottom-right block.
[out]	RWORK	RWORK is DOUBLE PRECISION array, dimension (MAX(1,LRWORK)) On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
[in]	LRWORK	LRWORK is INTEGER The dimension of the array RWORK. LRWORK >= MAX(1,8*Q). If LRWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the RWORK array, returns this value as the first entry of the work array, and no error message related to LRWORK is issued by XERBLA.
[out]	INFO	INFO is INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if ZBBCSD did not converge, INFO specifies the number of nonzero entries in PHI, and B11D, B11E, etc., contain the partially reduced matrix.

Internal Parameters:

  TOLMUL  DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8)))
          TOLMUL controls the convergence criterion of the QR loop.
          Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
          are within TOLMUL*EPS of either bound.

References:: [1] Brian D. Sutton. Computing the complete CS decomposition. Numer. Algorithms, 50(1):33-65, 2009.

Author: Univ. of Tennessee; Univ. of California Berkeley; Univ. of Colorado Denver; NAG Ltd.

Definition at line 328 of file zbbcsd.f.

*
*  -- LAPACK computational routine --
*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
*     .. Scalar Arguments ..
      CHARACTER          JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS
      INTEGER            INFO, LDU1, LDU2, LDV1T, LDV2T, LRWORK, M, P, Q
*     ..
*     .. Array Arguments ..
      DOUBLE PRECISION   B11D( * ), B11E( * ), B12D( * ), B12E( * ),
     $                   B21D( * ), B21E( * ), B22D( * ), B22E( * ),
     $                   PHI( * ), THETA( * ), RWORK( * )
      COMPLEX*16         U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
     $                   V2T( LDV2T, * )
*     ..
*
*  ===================================================================
*
*     .. Parameters ..
      INTEGER            MAXITR
      parameter( maxitr = 6 )
      DOUBLE PRECISION   HUNDRED, MEIGHTH, ONE, TEN, ZERO
      parameter( hundred = 100.0d0, meighth = -0.125d0,
     $                     one = 1.0d0, ten = 10.0d0, zero = 0.0d0 )
      COMPLEX*16         NEGONECOMPLEX
      parameter( negonecomplex = (-1.0d0,0.0d0) )
      DOUBLE PRECISION   PIOVER2
      parameter( piover2 = 1.57079632679489661923132169163975144210d0 )
*     ..
*     .. Local Scalars ..
      LOGICAL            COLMAJOR, LQUERY, RESTART11, RESTART12,
     $                   RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T,
     $                   WANTV2T
      INTEGER            I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS,
     $                   IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J,
     $                   LRWORKMIN, LRWORKOPT, MAXIT, MINI
      DOUBLE PRECISION   B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY,
     $                   EPS, MU, NU, R, SIGMA11, SIGMA21,
     $                   TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL,
     $                   UNFL, X1, X2, Y1, Y2
*
      EXTERNAL           dlartgp, dlartgs, dlas2, xerbla, zlasr, zscal,
     $                   zswap
*     ..
*     .. External Functions ..
      DOUBLE PRECISION   DLAMCH
      LOGICAL            LSAME
      EXTERNAL           lsame, dlamch
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          abs, atan2, cos, max, min, sin, sqrt
*     ..
*     .. Executable Statements ..
*
*     Test input arguments
*
      info = 0
      lquery = lrwork .EQ. -1
      wantu1 = lsame( jobu1, 'Y' )
      wantu2 = lsame( jobu2, 'Y' )
      wantv1t = lsame( jobv1t, 'Y' )
      wantv2t = lsame( jobv2t, 'Y' )
      colmajor = .NOT. lsame( trans, 'T' )
*
      IF( m .LT. 0 ) THEN
         info = -6
      ELSE IF( p .LT. 0 .OR. p .GT. m ) THEN
         info = -7
      ELSE IF( q .LT. 0 .OR. q .GT. m ) THEN
         info = -8
      ELSE IF( q .GT. p .OR. q .GT. m-p .OR. q .GT. m-q ) THEN
         info = -8
      ELSE IF( wantu1 .AND. ldu1 .LT. p ) THEN
         info = -12
      ELSE IF( wantu2 .AND. ldu2 .LT. m-p ) THEN
         info = -14
      ELSE IF( wantv1t .AND. ldv1t .LT. q ) THEN
         info = -16
      ELSE IF( wantv2t .AND. ldv2t .LT. m-q ) THEN
         info = -18
      END IF
*
*     Quick return if Q = 0
*
      IF( info .EQ. 0 .AND. q .EQ. 0 ) THEN
         lrworkmin = 1
         rwork(1) = lrworkmin
         RETURN
      END IF
*
*     Compute workspace
*
      IF( info .EQ. 0 ) THEN
         iu1cs = 1
         iu1sn = iu1cs + q
         iu2cs = iu1sn + q
         iu2sn = iu2cs + q
         iv1tcs = iu2sn + q
         iv1tsn = iv1tcs + q
         iv2tcs = iv1tsn + q
         iv2tsn = iv2tcs + q
         lrworkopt = iv2tsn + q - 1
         lrworkmin = lrworkopt
         rwork(1) = lrworkopt
         IF( lrwork .LT. lrworkmin .AND. .NOT. lquery ) THEN
            info = -28
         END IF
      END IF
*
      IF( info .NE. 0 ) THEN
         CALL xerbla( 'ZBBCSD', -info )
         RETURN
      ELSE IF( lquery ) THEN
         RETURN
      END IF
*
*     Get machine constants
*
      eps = dlamch( 'Epsilon' )
      unfl = dlamch( 'Safe minimum' )
      tolmul = max( ten, min( hundred, eps**meighth ) )
      tol = tolmul*eps
      thresh = max( tol, maxitr*q*q*unfl )
*
*     Test for negligible sines or cosines
*
      DO i = 1, q
         IF( theta(i) .LT. thresh ) THEN
            theta(i) = zero
         ELSE IF( theta(i) .GT. piover2-thresh ) THEN
            theta(i) = piover2
         END IF
      END DO
      DO i = 1, q-1
         IF( phi(i) .LT. thresh ) THEN
            phi(i) = zero
         ELSE IF( phi(i) .GT. piover2-thresh ) THEN
            phi(i) = piover2
         END IF
      END DO
*
*     Initial deflation
*
      imax = q
      DO WHILE( imax .GT. 1 )
         IF( phi(imax-1) .NE. zero ) THEN
            EXIT
         END IF
         imax = imax - 1
      END DO
      imin = imax - 1
      IF  ( imin .GT. 1 ) THEN
         DO WHILE( phi(imin-1) .NE. zero )
            imin = imin - 1
            IF  ( imin .LE. 1 ) EXIT
         END DO
      END IF
*
*     Initialize iteration counter
*
      maxit = maxitr*q*q
      iter = 0
*
*     Begin main iteration loop
*
      DO WHILE( imax .GT. 1 )
*
*        Compute the matrix entries
*
         b11d(imin) = cos( theta(imin) )
         b21d(imin) = -sin( theta(imin) )
         DO i = imin, imax - 1
            b11e(i) = -sin( theta(i) ) * sin( phi(i) )
            b11d(i+1) = cos( theta(i+1) ) * cos( phi(i) )
            b12d(i) = sin( theta(i) ) * cos( phi(i) )
            b12e(i) = cos( theta(i+1) ) * sin( phi(i) )
            b21e(i) = -cos( theta(i) ) * sin( phi(i) )
            b21d(i+1) = -sin( theta(i+1) ) * cos( phi(i) )
            b22d(i) = cos( theta(i) ) * cos( phi(i) )
            b22e(i) = -sin( theta(i+1) ) * sin( phi(i) )
         END DO
         b12d(imax) = sin( theta(imax) )
         b22d(imax) = cos( theta(imax) )
*
*        Abort if not converging; otherwise, increment ITER
*
         IF( iter .GT. maxit ) THEN
            info = 0
            DO i = 1, q
               IF( phi(i) .NE. zero )
     $            info = info + 1
            END DO
            RETURN
         END IF
*
         iter = iter + imax - imin
*
*        Compute shifts
*
         thetamax = theta(imin)
         thetamin = theta(imin)
         DO i = imin+1, imax
            IF( theta(i) > thetamax )
     $         thetamax = theta(i)
            IF( theta(i) < thetamin )
     $         thetamin = theta(i)
         END DO
*
         IF( thetamax .GT. piover2 - thresh ) THEN
*
*           Zero on diagonals of B11 and B22; induce deflation with a
*           zero shift
*
            mu = zero
            nu = one
*
         ELSE IF( thetamin .LT. thresh ) THEN
*
*           Zero on diagonals of B12 and B22; induce deflation with a
*           zero shift
*
            mu = one
            nu = zero
*
         ELSE
*
*           Compute shifts for B11 and B21 and use the lesser
*
            CALL dlas2( b11d(imax-1), b11e(imax-1), b11d(imax), sigma11,
     $                  dummy )
            CALL dlas2( b21d(imax-1), b21e(imax-1), b21d(imax), sigma21,
     $                  dummy )
*
            IF( sigma11 .LE. sigma21 ) THEN
               mu = sigma11
               nu = sqrt( one - mu**2 )
               IF( mu .LT. thresh ) THEN
                  mu = zero
                  nu = one
               END IF
            ELSE
               nu = sigma21
               mu = sqrt( 1.0 - nu**2 )
               IF( nu .LT. thresh ) THEN
                  mu = one
                  nu = zero
               END IF
            END IF
         END IF
*
*        Rotate to produce bulges in B11 and B21
*
         IF( mu .LE. nu ) THEN
            CALL dlartgs( b11d(imin), b11e(imin), mu,
     $                    rwork(iv1tcs+imin-1), rwork(iv1tsn+imin-1) )
         ELSE
            CALL dlartgs( b21d(imin), b21e(imin), nu,
     $                    rwork(iv1tcs+imin-1), rwork(iv1tsn+imin-1) )
         END IF
*
         temp = rwork(iv1tcs+imin-1)*b11d(imin) +
     $          rwork(iv1tsn+imin-1)*b11e(imin)
         b11e(imin) = rwork(iv1tcs+imin-1)*b11e(imin) -
     $                rwork(iv1tsn+imin-1)*b11d(imin)
         b11d(imin) = temp
         b11bulge = rwork(iv1tsn+imin-1)*b11d(imin+1)
         b11d(imin+1) = rwork(iv1tcs+imin-1)*b11d(imin+1)
         temp = rwork(iv1tcs+imin-1)*b21d(imin) +
     $          rwork(iv1tsn+imin-1)*b21e(imin)
         b21e(imin) = rwork(iv1tcs+imin-1)*b21e(imin) -
     $                rwork(iv1tsn+imin-1)*b21d(imin)
         b21d(imin) = temp
         b21bulge = rwork(iv1tsn+imin-1)*b21d(imin+1)
         b21d(imin+1) = rwork(iv1tcs+imin-1)*b21d(imin+1)
*
*        Compute THETA(IMIN)
*
         theta( imin ) = atan2( sqrt( b21d(imin)**2+b21bulge**2 ),
     $                   sqrt( b11d(imin)**2+b11bulge**2 ) )
*
*        Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
*
         IF( b11d(imin)**2+b11bulge**2 .GT. thresh**2 ) THEN
            CALL dlartgp( b11bulge, b11d(imin), rwork(iu1sn+imin-1),
     $                    rwork(iu1cs+imin-1), r )
         ELSE IF( mu .LE. nu ) THEN
            CALL dlartgs( b11e( imin ), b11d( imin + 1 ), mu,
     $                    rwork(iu1cs+imin-1), rwork(iu1sn+imin-1) )
         ELSE
            CALL dlartgs( b12d( imin ), b12e( imin ), nu,
     $                    rwork(iu1cs+imin-1), rwork(iu1sn+imin-1) )
         END IF
         IF( b21d(imin)**2+b21bulge**2 .GT. thresh**2 ) THEN
            CALL dlartgp( b21bulge, b21d(imin), rwork(iu2sn+imin-1),
     $                    rwork(iu2cs+imin-1), r )
         ELSE IF( nu .LT. mu ) THEN
            CALL dlartgs( b21e( imin ), b21d( imin + 1 ), nu,
     $                    rwork(iu2cs+imin-1), rwork(iu2sn+imin-1) )
         ELSE
            CALL dlartgs( b22d(imin), b22e(imin), mu,
     $                    rwork(iu2cs+imin-1), rwork(iu2sn+imin-1) )
         END IF
         rwork(iu2cs+imin-1) = -rwork(iu2cs+imin-1)
         rwork(iu2sn+imin-1) = -rwork(iu2sn+imin-1)
*
         temp = rwork(iu1cs+imin-1)*b11e(imin) +
     $          rwork(iu1sn+imin-1)*b11d(imin+1)
         b11d(imin+1) = rwork(iu1cs+imin-1)*b11d(imin+1) -
     $                  rwork(iu1sn+imin-1)*b11e(imin)
         b11e(imin) = temp
         IF( imax .GT. imin+1 ) THEN
            b11bulge = rwork(iu1sn+imin-1)*b11e(imin+1)
            b11e(imin+1) = rwork(iu1cs+imin-1)*b11e(imin+1)
         END IF
         temp = rwork(iu1cs+imin-1)*b12d(imin) +
     $          rwork(iu1sn+imin-1)*b12e(imin)
         b12e(imin) = rwork(iu1cs+imin-1)*b12e(imin) -
     $                rwork(iu1sn+imin-1)*b12d(imin)
         b12d(imin) = temp
         b12bulge = rwork(iu1sn+imin-1)*b12d(imin+1)
         b12d(imin+1) = rwork(iu1cs+imin-1)*b12d(imin+1)
         temp = rwork(iu2cs+imin-1)*b21e(imin) +
     $          rwork(iu2sn+imin-1)*b21d(imin+1)
         b21d(imin+1) = rwork(iu2cs+imin-1)*b21d(imin+1) -
     $                  rwork(iu2sn+imin-1)*b21e(imin)
         b21e(imin) = temp
         IF( imax .GT. imin+1 ) THEN
            b21bulge = rwork(iu2sn+imin-1)*b21e(imin+1)
            b21e(imin+1) = rwork(iu2cs+imin-1)*b21e(imin+1)
         END IF
         temp = rwork(iu2cs+imin-1)*b22d(imin) +
     $          rwork(iu2sn+imin-1)*b22e(imin)
         b22e(imin) = rwork(iu2cs+imin-1)*b22e(imin) -
     $                rwork(iu2sn+imin-1)*b22d(imin)
         b22d(imin) = temp
         b22bulge = rwork(iu2sn+imin-1)*b22d(imin+1)
         b22d(imin+1) = rwork(iu2cs+imin-1)*b22d(imin+1)
*
*        Inner loop: chase bulges from B11(IMIN,IMIN+2),
*        B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
*        bottom-right
*
         DO i = imin+1, imax-1
*
*           Compute PHI(I-1)
*
            x1 = sin(theta(i-1))*b11e(i-1) + cos(theta(i-1))*b21e(i-1)
            x2 = sin(theta(i-1))*b11bulge + cos(theta(i-1))*b21bulge
            y1 = sin(theta(i-1))*b12d(i-1) + cos(theta(i-1))*b22d(i-1)
            y2 = sin(theta(i-1))*b12bulge + cos(theta(i-1))*b22bulge
*
            phi(i-1) = atan2( sqrt(x1**2+x2**2), sqrt(y1**2+y2**2) )
*
*           Determine if there are bulges to chase or if a new direct
*           summand has been reached
*
            restart11 = b11e(i-1)**2 + b11bulge**2 .LE. thresh**2
            restart21 = b21e(i-1)**2 + b21bulge**2 .LE. thresh**2
            restart12 = b12d(i-1)**2 + b12bulge**2 .LE. thresh**2
            restart22 = b22d(i-1)**2 + b22bulge**2 .LE. thresh**2
*
*           If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
*           B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
*           chasing by applying the original shift again.
*
            IF( .NOT. restart11 .AND. .NOT. restart21 ) THEN
               CALL dlartgp( x2, x1, rwork(iv1tsn+i-1),
     $                       rwork(iv1tcs+i-1), r )
            ELSE IF( .NOT. restart11 .AND. restart21 ) THEN
               CALL dlartgp( b11bulge, b11e(i-1), rwork(iv1tsn+i-1),
     $                       rwork(iv1tcs+i-1), r )
            ELSE IF( restart11 .AND. .NOT. restart21 ) THEN
               CALL dlartgp( b21bulge, b21e(i-1), rwork(iv1tsn+i-1),
     $                       rwork(iv1tcs+i-1), r )
            ELSE IF( mu .LE. nu ) THEN
               CALL dlartgs( b11d(i), b11e(i), mu, rwork(iv1tcs+i-1),
     $                       rwork(iv1tsn+i-1) )
            ELSE
               CALL dlartgs( b21d(i), b21e(i), nu, rwork(iv1tcs+i-1),
     $                       rwork(iv1tsn+i-1) )
            END IF
            rwork(iv1tcs+i-1) = -rwork(iv1tcs+i-1)
            rwork(iv1tsn+i-1) = -rwork(iv1tsn+i-1)
            IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN
               CALL dlartgp( y2, y1, rwork(iv2tsn+i-1-1),
     $                       rwork(iv2tcs+i-1-1), r )
            ELSE IF( .NOT. restart12 .AND. restart22 ) THEN
               CALL dlartgp( b12bulge, b12d(i-1), rwork(iv2tsn+i-1-1),
     $                       rwork(iv2tcs+i-1-1), r )
            ELSE IF( restart12 .AND. .NOT. restart22 ) THEN
               CALL dlartgp( b22bulge, b22d(i-1), rwork(iv2tsn+i-1-1),
     $                       rwork(iv2tcs+i-1-1), r )
            ELSE IF( nu .LT. mu ) THEN
               CALL dlartgs( b12e(i-1), b12d(i), nu,
     $                       rwork(iv2tcs+i-1-1), rwork(iv2tsn+i-1-1) )
            ELSE
               CALL dlartgs( b22e(i-1), b22d(i), mu,
     $                       rwork(iv2tcs+i-1-1), rwork(iv2tsn+i-1-1) )
            END IF
*
            temp = rwork(iv1tcs+i-1)*b11d(i) + rwork(iv1tsn+i-1)*b11e(i)
            b11e(i) = rwork(iv1tcs+i-1)*b11e(i) -
     $                rwork(iv1tsn+i-1)*b11d(i)
            b11d(i) = temp
            b11bulge = rwork(iv1tsn+i-1)*b11d(i+1)
            b11d(i+1) = rwork(iv1tcs+i-1)*b11d(i+1)
            temp = rwork(iv1tcs+i-1)*b21d(i) + rwork(iv1tsn+i-1)*b21e(i)
            b21e(i) = rwork(iv1tcs+i-1)*b21e(i) -
     $                rwork(iv1tsn+i-1)*b21d(i)
            b21d(i) = temp
            b21bulge = rwork(iv1tsn+i-1)*b21d(i+1)
            b21d(i+1) = rwork(iv1tcs+i-1)*b21d(i+1)
            temp = rwork(iv2tcs+i-1-1)*b12e(i-1) +
     $             rwork(iv2tsn+i-1-1)*b12d(i)
            b12d(i) = rwork(iv2tcs+i-1-1)*b12d(i) -
     $                rwork(iv2tsn+i-1-1)*b12e(i-1)
            b12e(i-1) = temp
            b12bulge = rwork(iv2tsn+i-1-1)*b12e(i)
            b12e(i) = rwork(iv2tcs+i-1-1)*b12e(i)
            temp = rwork(iv2tcs+i-1-1)*b22e(i-1) +
     $             rwork(iv2tsn+i-1-1)*b22d(i)
            b22d(i) = rwork(iv2tcs+i-1-1)*b22d(i) -
     $                rwork(iv2tsn+i-1-1)*b22e(i-1)
            b22e(i-1) = temp
            b22bulge = rwork(iv2tsn+i-1-1)*b22e(i)
            b22e(i) = rwork(iv2tcs+i-1-1)*b22e(i)
*
*           Compute THETA(I)
*
            x1 = cos(phi(i-1))*b11d(i) + sin(phi(i-1))*b12e(i-1)
            x2 = cos(phi(i-1))*b11bulge + sin(phi(i-1))*b12bulge
            y1 = cos(phi(i-1))*b21d(i) + sin(phi(i-1))*b22e(i-1)
            y2 = cos(phi(i-1))*b21bulge + sin(phi(i-1))*b22bulge
*
            theta(i) = atan2( sqrt(y1**2+y2**2), sqrt(x1**2+x2**2) )
*
*           Determine if there are bulges to chase or if a new direct
*           summand has been reached
*
            restart11 =   b11d(i)**2 + b11bulge**2 .LE. thresh**2
            restart12 = b12e(i-1)**2 + b12bulge**2 .LE. thresh**2
            restart21 =   b21d(i)**2 + b21bulge**2 .LE. thresh**2
            restart22 = b22e(i-1)**2 + b22bulge**2 .LE. thresh**2
*
*           If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
*           B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
*           chasing by applying the original shift again.
*
            IF( .NOT. restart11 .AND. .NOT. restart12 ) THEN
               CALL dlartgp( x2, x1, rwork(iu1sn+i-1), rwork(iu1cs+i-1),
     $                       r )
            ELSE IF( .NOT. restart11 .AND. restart12 ) THEN
               CALL dlartgp( b11bulge, b11d(i), rwork(iu1sn+i-1),
     $                       rwork(iu1cs+i-1), r )
            ELSE IF( restart11 .AND. .NOT. restart12 ) THEN
               CALL dlartgp( b12bulge, b12e(i-1), rwork(iu1sn+i-1),
     $                       rwork(iu1cs+i-1), r )
            ELSE IF( mu .LE. nu ) THEN
               CALL dlartgs( b11e(i), b11d(i+1), mu, rwork(iu1cs+i-1),
     $                       rwork(iu1sn+i-1) )
            ELSE
               CALL dlartgs( b12d(i), b12e(i), nu, rwork(iu1cs+i-1),
     $                       rwork(iu1sn+i-1) )
            END IF
            IF( .NOT. restart21 .AND. .NOT. restart22 ) THEN
               CALL dlartgp( y2, y1, rwork(iu2sn+i-1), rwork(iu2cs+i-1),
     $                       r )
            ELSE IF( .NOT. restart21 .AND. restart22 ) THEN
               CALL dlartgp( b21bulge, b21d(i), rwork(iu2sn+i-1),
     $                       rwork(iu2cs+i-1), r )
            ELSE IF( restart21 .AND. .NOT. restart22 ) THEN
               CALL dlartgp( b22bulge, b22e(i-1), rwork(iu2sn+i-1),
     $                       rwork(iu2cs+i-1), r )
            ELSE IF( nu .LT. mu ) THEN
               CALL dlartgs( b21e(i), b21e(i+1), nu, rwork(iu2cs+i-1),
     $                       rwork(iu2sn+i-1) )
            ELSE
               CALL dlartgs( b22d(i), b22e(i), mu, rwork(iu2cs+i-1),
     $                       rwork(iu2sn+i-1) )
            END IF
            rwork(iu2cs+i-1) = -rwork(iu2cs+i-1)
            rwork(iu2sn+i-1) = -rwork(iu2sn+i-1)
*
            temp = rwork(iu1cs+i-1)*b11e(i) + rwork(iu1sn+i-1)*b11d(i+1)
            b11d(i+1) = rwork(iu1cs+i-1)*b11d(i+1) -
     $                  rwork(iu1sn+i-1)*b11e(i)
            b11e(i) = temp
            IF( i .LT. imax - 1 ) THEN
               b11bulge = rwork(iu1sn+i-1)*b11e(i+1)
               b11e(i+1) = rwork(iu1cs+i-1)*b11e(i+1)
            END IF
            temp = rwork(iu2cs+i-1)*b21e(i) + rwork(iu2sn+i-1)*b21d(i+1)
            b21d(i+1) = rwork(iu2cs+i-1)*b21d(i+1) -
     $                  rwork(iu2sn+i-1)*b21e(i)
            b21e(i) = temp
            IF( i .LT. imax - 1 ) THEN
               b21bulge = rwork(iu2sn+i-1)*b21e(i+1)
               b21e(i+1) = rwork(iu2cs+i-1)*b21e(i+1)
            END IF
            temp = rwork(iu1cs+i-1)*b12d(i) + rwork(iu1sn+i-1)*b12e(i)
            b12e(i) = rwork(iu1cs+i-1)*b12e(i) -
     $                rwork(iu1sn+i-1)*b12d(i)
            b12d(i) = temp
            b12bulge = rwork(iu1sn+i-1)*b12d(i+1)
            b12d(i+1) = rwork(iu1cs+i-1)*b12d(i+1)
            temp = rwork(iu2cs+i-1)*b22d(i) + rwork(iu2sn+i-1)*b22e(i)
            b22e(i) = rwork(iu2cs+i-1)*b22e(i) -
     $                rwork(iu2sn+i-1)*b22d(i)
            b22d(i) = temp
            b22bulge = rwork(iu2sn+i-1)*b22d(i+1)
            b22d(i+1) = rwork(iu2cs+i-1)*b22d(i+1)
*
         END DO
*
*        Compute PHI(IMAX-1)
*
         x1 = sin(theta(imax-1))*b11e(imax-1) +
     $        cos(theta(imax-1))*b21e(imax-1)
         y1 = sin(theta(imax-1))*b12d(imax-1) +
     $        cos(theta(imax-1))*b22d(imax-1)
         y2 = sin(theta(imax-1))*b12bulge + cos(theta(imax-1))*b22bulge
*
         phi(imax-1) = atan2( abs(x1), sqrt(y1**2+y2**2) )
*
*        Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
*
         restart12 = b12d(imax-1)**2 + b12bulge**2 .LE. thresh**2
         restart22 = b22d(imax-1)**2 + b22bulge**2 .LE. thresh**2
*
         IF( .NOT. restart12 .AND. .NOT. restart22 ) THEN
            CALL dlartgp( y2, y1, rwork(iv2tsn+imax-1-1),
     $                    rwork(iv2tcs+imax-1-1), r )
         ELSE IF( .NOT. restart12 .AND. restart22 ) THEN
            CALL dlartgp( b12bulge, b12d(imax-1),
     $                    rwork(iv2tsn+imax-1-1),
     $                    rwork(iv2tcs+imax-1-1), r )
         ELSE IF( restart12 .AND. .NOT. restart22 ) THEN
            CALL dlartgp( b22bulge, b22d(imax-1),
     $                    rwork(iv2tsn+imax-1-1),
     $                    rwork(iv2tcs+imax-1-1), r )
         ELSE IF( nu .LT. mu ) THEN
            CALL dlartgs( b12e(imax-1), b12d(imax), nu,
     $                    rwork(iv2tcs+imax-1-1),
     $                    rwork(iv2tsn+imax-1-1) )
         ELSE
            CALL dlartgs( b22e(imax-1), b22d(imax), mu,
     $                    rwork(iv2tcs+imax-1-1),
     $                    rwork(iv2tsn+imax-1-1) )
         END IF
*
         temp = rwork(iv2tcs+imax-1-1)*b12e(imax-1) +
     $          rwork(iv2tsn+imax-1-1)*b12d(imax)
         b12d(imax) = rwork(iv2tcs+imax-1-1)*b12d(imax) -
     $                rwork(iv2tsn+imax-1-1)*b12e(imax-1)
         b12e(imax-1) = temp
         temp = rwork(iv2tcs+imax-1-1)*b22e(imax-1) +
     $          rwork(iv2tsn+imax-1-1)*b22d(imax)
         b22d(imax) = rwork(iv2tcs+imax-1-1)*b22d(imax) -
     $                rwork(iv2tsn+imax-1-1)*b22e(imax-1)
         b22e(imax-1) = temp
*
*        Update singular vectors
*
         IF( wantu1 ) THEN
            IF( colmajor ) THEN
               CALL zlasr( 'R', 'V', 'F', p, imax-imin+1,
     $                     rwork(iu1cs+imin-1), rwork(iu1sn+imin-1),
     $                     u1(1,imin), ldu1 )
            ELSE
               CALL zlasr( 'L', 'V', 'F', imax-imin+1, p,
     $                     rwork(iu1cs+imin-1), rwork(iu1sn+imin-1),
     $                     u1(imin,1), ldu1 )
            END IF
         END IF
         IF( wantu2 ) THEN
            IF( colmajor ) THEN
               CALL zlasr( 'R', 'V', 'F', m-p, imax-imin+1,
     $                     rwork(iu2cs+imin-1), rwork(iu2sn+imin-1),
     $                     u2(1,imin), ldu2 )
            ELSE
               CALL zlasr( 'L', 'V', 'F', imax-imin+1, m-p,
     $                     rwork(iu2cs+imin-1), rwork(iu2sn+imin-1),
     $                     u2(imin,1), ldu2 )
            END IF
         END IF
         IF( wantv1t ) THEN
            IF( colmajor ) THEN
               CALL zlasr( 'L', 'V', 'F', imax-imin+1, q,
     $                     rwork(iv1tcs+imin-1), rwork(iv1tsn+imin-1),
     $                     v1t(imin,1), ldv1t )
            ELSE
               CALL zlasr( 'R', 'V', 'F', q, imax-imin+1,
     $                     rwork(iv1tcs+imin-1), rwork(iv1tsn+imin-1),
     $                     v1t(1,imin), ldv1t )
            END IF
         END IF
         IF( wantv2t ) THEN
            IF( colmajor ) THEN
               CALL zlasr( 'L', 'V', 'F', imax-imin+1, m-q,
     $                     rwork(iv2tcs+imin-1), rwork(iv2tsn+imin-1),
     $                     v2t(imin,1), ldv2t )
            ELSE
               CALL zlasr( 'R', 'V', 'F', m-q, imax-imin+1,
     $                     rwork(iv2tcs+imin-1), rwork(iv2tsn+imin-1),
     $                     v2t(1,imin), ldv2t )
            END IF
         END IF
*
*        Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
*
         IF( b11e(imax-1)+b21e(imax-1) .GT. 0 ) THEN
            b11d(imax) = -b11d(imax)
            b21d(imax) = -b21d(imax)
            IF( wantv1t ) THEN
               IF( colmajor ) THEN
                  CALL zscal( q, negonecomplex, v1t(imax,1), ldv1t )
               ELSE
                  CALL zscal( q, negonecomplex, v1t(1,imax), 1 )
               END IF
            END IF
         END IF
*
*        Compute THETA(IMAX)
*
         x1 = cos(phi(imax-1))*b11d(imax) +
     $        sin(phi(imax-1))*b12e(imax-1)
         y1 = cos(phi(imax-1))*b21d(imax) +
     $        sin(phi(imax-1))*b22e(imax-1)
*
         theta(imax) = atan2( abs(y1), abs(x1) )
*
*        Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
*        and B22(IMAX,IMAX-1)
*
         IF( b11d(imax)+b12e(imax-1) .LT. 0 ) THEN
            b12d(imax) = -b12d(imax)
            IF( wantu1 ) THEN
               IF( colmajor ) THEN
                  CALL zscal( p, negonecomplex, u1(1,imax), 1 )
               ELSE
                  CALL zscal( p, negonecomplex, u1(imax,1), ldu1 )
               END IF
            END IF
         END IF
         IF( b21d(imax)+b22e(imax-1) .GT. 0 ) THEN
            b22d(imax) = -b22d(imax)
            IF( wantu2 ) THEN
               IF( colmajor ) THEN
                  CALL zscal( m-p, negonecomplex, u2(1,imax), 1 )
               ELSE
                  CALL zscal( m-p, negonecomplex, u2(imax,1), ldu2 )
               END IF
            END IF
         END IF
*
*        Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
*
         IF( b12d(imax)+b22d(imax) .LT. 0 ) THEN
            IF( wantv2t ) THEN
               IF( colmajor ) THEN
                  CALL zscal( m-q, negonecomplex, v2t(imax,1), ldv2t )
               ELSE
                  CALL zscal( m-q, negonecomplex, v2t(1,imax), 1 )
               END IF
            END IF
         END IF
*
*        Test for negligible sines or cosines
*
         DO i = imin, imax
            IF( theta(i) .LT. thresh ) THEN
               theta(i) = zero
            ELSE IF( theta(i) .GT. piover2-thresh ) THEN
               theta(i) = piover2
            END IF
         END DO
         DO i = imin, imax-1
            IF( phi(i) .LT. thresh ) THEN
               phi(i) = zero
            ELSE IF( phi(i) .GT. piover2-thresh ) THEN
               phi(i) = piover2
            END IF
         END DO
*
*        Deflate
*
         IF (imax .GT. 1) THEN
            DO WHILE( phi(imax-1) .EQ. zero )
               imax = imax - 1
               IF (imax .LE. 1) EXIT
            END DO
         END IF
         IF( imin .GT. imax - 1 )
     $      imin = imax - 1
         IF (imin .GT. 1) THEN
            DO WHILE (phi(imin-1) .NE. zero)
                imin = imin - 1
                IF (imin .LE. 1) EXIT
            END DO
         END IF
*
*        Repeat main iteration loop
*
      END DO
*
*     Postprocessing: order THETA from least to greatest
*
      DO i = 1, q
*
         mini = i
         thetamin = theta(i)
         DO j = i+1, q
            IF( theta(j) .LT. thetamin ) THEN
               mini = j
               thetamin = theta(j)
            END IF
         END DO
*
         IF( mini .NE. i ) THEN
            theta(mini) = theta(i)
            theta(i) = thetamin
            IF( colmajor ) THEN
               IF( wantu1 )
     $            CALL zswap( p, u1(1,i), 1, u1(1,mini), 1 )
               IF( wantu2 )
     $            CALL zswap( m-p, u2(1,i), 1, u2(1,mini), 1 )
               IF( wantv1t )
     $            CALL zswap( q, v1t(i,1), ldv1t, v1t(mini,1), ldv1t )
               IF( wantv2t )
     $            CALL zswap( m-q, v2t(i,1), ldv2t, v2t(mini,1),
     $               ldv2t )
            ELSE
               IF( wantu1 )
     $            CALL zswap( p, u1(i,1), ldu1, u1(mini,1), ldu1 )
               IF( wantu2 )
     $            CALL zswap( m-p, u2(i,1), ldu2, u2(mini,1), ldu2 )
               IF( wantv1t )
     $            CALL zswap( q, v1t(1,i), 1, v1t(1,mini), 1 )
               IF( wantv2t )
     $            CALL zswap( m-q, v2t(1,i), 1, v2t(1,mini), 1 )
            END IF
         END IF
*
      END DO
*
      RETURN
*
*     End of ZBBCSD
*

Here is the call graph for this function:

Here is the caller graph for this function: