◆ cgesvd()

subroutine cgesvd	(	character	jobu,
		character	jobvt,
		integer	m,
		integer	n,
		complex, dimension( lda, * )	a,
		integer	lda,
		real, dimension( * )	s,
		complex, dimension( ldu, * )	u,
		integer	ldu,
		complex, dimension( ldvt, * )	vt,
		integer	ldvt,
		complex, dimension( * )	work,
		integer	lwork,
		real, dimension( * )	rwork,
		integer	info )

CGESVD computes the singular value decomposition (SVD) for GE matrices

Download CGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:

!>
!> CGESVD computes the singular value decomposition (SVD) of a complex
!> M-by-N matrix A, optionally computing the left and/or right singular
!> vectors. The SVD is written
!>
!>      A = U * SIGMA * conjugate-transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
!> V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns V**H, not V.
!>

Parameters

[in]	JOBU	!> JOBU is CHARACTER*1 !> Specifies options for computing all or part of the matrix U: !> = 'A': all M columns of U are returned in array U: !> = 'S': the first min(m,n) columns of U (the left singular !> vectors) are returned in the array U; !> = 'O': the first min(m,n) columns of U (the left singular !> vectors) are overwritten on the array A; !> = 'N': no columns of U (no left singular vectors) are !> computed. !>
[in]	JOBVT	!> JOBVT is CHARACTER1 !> Specifies options for computing all or part of the matrix !> VH: !> = 'A': all N rows of VH are returned in the array VT; !> = 'S': the first min(m,n) rows of VH (the right singular !> vectors) are returned in the array VT; !> = 'O': the first min(m,n) rows of VH (the right singular !> vectors) are overwritten on the array A; !> = 'N': no rows of V*H (no right singular vectors) are !> computed. !> !> JOBVT and JOBU cannot both be 'O'. !>
[in]	M	!> M is INTEGER !> The number of rows of the input matrix A. M >= 0. !>
[in]	N	!> N is INTEGER !> The number of columns of the input matrix A. N >= 0. !>
[in,out]	A	!> A is COMPLEX array, dimension (LDA,N) !> On entry, the M-by-N matrix A. !> On exit, !> if JOBU = 'O', A is overwritten with the first min(m,n) !> columns of U (the left singular vectors, !> stored columnwise); !> if JOBVT = 'O', A is overwritten with the first min(m,n) !> rows of V**H (the right singular vectors, !> stored rowwise); !> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A !> are destroyed. !>
[in]	LDA	!> LDA is INTEGER !> The leading dimension of the array A. LDA >= max(1,M). !>
[out]	S	!> S is REAL array, dimension (min(M,N)) !> The singular values of A, sorted so that S(i) >= S(i+1). !>
[out]	U	!> U is COMPLEX array, dimension (LDU,UCOL) !> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. !> If JOBU = 'A', U contains the M-by-M unitary matrix U; !> if JOBU = 'S', U contains the first min(m,n) columns of U !> (the left singular vectors, stored columnwise); !> if JOBU = 'N' or 'O', U is not referenced. !>
[in]	LDU	!> LDU is INTEGER !> The leading dimension of the array U. LDU >= 1; if !> JOBU = 'S' or 'A', LDU >= M. !>
[out]	VT	!> VT is COMPLEX array, dimension (LDVT,N) !> If JOBVT = 'A', VT contains the N-by-N unitary matrix !> VH; !> if JOBVT = 'S', VT contains the first min(m,n) rows of !> VH (the right singular vectors, stored rowwise); !> if JOBVT = 'N' or 'O', VT is not referenced. !>
[in]	LDVT	!> LDVT is INTEGER !> The leading dimension of the array VT. LDVT >= 1; if !> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N). !>
[out]	WORK	!> WORK is COMPLEX array, dimension (MAX(1,LWORK)) !> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. !>
[in]	LWORK	!> LWORK is INTEGER !> The dimension of the array WORK. !> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)). !> For good performance, LWORK should generally be larger. !> !> If LWORK = -1, then a workspace query is assumed; the routine !> only calculates the optimal size of the WORK array, returns !> this value as the first entry of the WORK array, and no error !> message related to LWORK is issued by XERBLA. !>
[out]	RWORK	!> RWORK is REAL array, dimension (5min(M,N)) !> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the !> unconverged superdiagonal elements of an upper bidiagonal !> matrix B whose diagonal is in S (not necessarily sorted). !> B satisfies A = U B * VT, so it has the same singular !> values as A, and singular vectors related by U and VT. !>
[out]	INFO	!> INFO is INTEGER !> = 0: successful exit. !> < 0: if INFO = -i, the i-th argument had an illegal value. !> > 0: if CBDSQR did not converge, INFO specifies how many !> superdiagonals of an intermediate bidiagonal form B !> did not converge to zero. See the description of RWORK !> above for details. !>

Author: Univ. of Tennessee; Univ. of California Berkeley; Univ. of Colorado Denver; NAG Ltd.

Definition at line 210 of file cgesvd.f.

*
*  -- LAPACK driver routine --
*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
*     .. Scalar Arguments ..
      CHARACTER          JOBU, JOBVT
      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
*     ..
*     .. Array Arguments ..
      REAL               RWORK( * ), S( * )
      COMPLEX            A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
     $                   WORK( * )
*     ..
*
*  =====================================================================
*
*     .. Parameters ..
      COMPLEX            CZERO, CONE
      parameter( czero = ( 0.0e0, 0.0e0 ),
     $                   cone = ( 1.0e0, 0.0e0 ) )
      REAL               ZERO, ONE
      parameter( zero = 0.0e0, one = 1.0e0 )
*     ..
*     .. Local Scalars ..
      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
      INTEGER            BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
     $                   NRVT, WRKBL
      INTEGER            LWORK_CGEQRF, LWORK_CUNGQR_N, LWORK_CUNGQR_M,
     $                   LWORK_CGEBRD, LWORK_CUNGBR_P, LWORK_CUNGBR_Q,
     $                   LWORK_CGELQF, LWORK_CUNGLQ_N, LWORK_CUNGLQ_M
      REAL               ANRM, BIGNUM, EPS, SMLNUM
*     ..
*     .. Local Arrays ..
      REAL               DUM( 1 )
      COMPLEX            CDUM( 1 )
*     ..
*     .. External Subroutines ..
      EXTERNAL           cbdsqr, cgebrd, cgelqf, cgemm, cgeqrf,
     $                   clacpy,
     $                   clascl, claset, cungbr, cunglq, cungqr, cunmbr,
     $                   slascl, xerbla
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ILAENV
      REAL               CLANGE, SLAMCH, SROUNDUP_LWORK
      EXTERNAL           lsame, ilaenv, clange, slamch,
     $                   sroundup_lwork
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          max, min, sqrt
*     ..
*     .. Executable Statements ..
*
*     Test the input arguments
*
      info = 0
      minmn = min( m, n )
      wntua = lsame( jobu, 'A' )
      wntus = lsame( jobu, 'S' )
      wntuas = wntua .OR. wntus
      wntuo = lsame( jobu, 'O' )
      wntun = lsame( jobu, 'N' )
      wntva = lsame( jobvt, 'A' )
      wntvs = lsame( jobvt, 'S' )
      wntvas = wntva .OR. wntvs
      wntvo = lsame( jobvt, 'O' )
      wntvn = lsame( jobvt, 'N' )
      lquery = ( lwork.EQ.-1 )
*
      IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
         info = -1
      ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
     $         ( wntvo .AND. wntuo ) ) THEN
         info = -2
      ELSE IF( m.LT.0 ) THEN
         info = -3
      ELSE IF( n.LT.0 ) THEN
         info = -4
      ELSE IF( lda.LT.max( 1, m ) ) THEN
         info = -6
      ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
         info = -9
      ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
     $         ( wntvs .AND. ldvt.LT.minmn ) ) THEN
         info = -11
      END IF
*
*     Compute workspace
*      (Note: Comments in the code beginning "Workspace:" describe the
*       minimal amount of workspace needed at that point in the code,
*       as well as the preferred amount for good performance.
*       CWorkspace refers to complex workspace, and RWorkspace to
*       real workspace. NB refers to the optimal block size for the
*       immediately following subroutine, as returned by ILAENV.)
*
      IF( info.EQ.0 ) THEN
         minwrk = 1
         maxwrk = 1
         IF( m.GE.n .AND. minmn.GT.0 ) THEN
*
*           Space needed for ZBDSQR is BDSPAC = 5*N
*
            mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
*           Compute space needed for CGEQRF
            CALL cgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
            lwork_cgeqrf = int( cdum(1) )
*           Compute space needed for CUNGQR
            CALL cungqr( m, n, n, a, lda, cdum(1), cdum(1), -1,
     $                   ierr )
            lwork_cungqr_n = int( cdum(1) )
            CALL cungqr( m, m, n, a, lda, cdum(1), cdum(1), -1,
     $                   ierr )
            lwork_cungqr_m = int( cdum(1) )
*           Compute space needed for CGEBRD
            CALL cgebrd( n, n, a, lda, s, dum(1), cdum(1),
     $                   cdum(1), cdum(1), -1, ierr )
            lwork_cgebrd = int( cdum(1) )
*           Compute space needed for CUNGBR
            CALL cungbr( 'P', n, n, n, a, lda, cdum(1),
     $                   cdum(1), -1, ierr )
            lwork_cungbr_p = int( cdum(1) )
            CALL cungbr( 'Q', n, n, n, a, lda, cdum(1),
     $                   cdum(1), -1, ierr )
            lwork_cungbr_q = int( cdum(1) )
*
            mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
            IF( m.GE.mnthr ) THEN
               IF( wntun ) THEN
*
*                 Path 1 (M much larger than N, JOBU='N')
*
                  maxwrk = n + lwork_cgeqrf
                  maxwrk = max( maxwrk, 2*n+lwork_cgebrd )
                  IF( wntvo .OR. wntvas )
     $               maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
                  minwrk = 3*n
               ELSE IF( wntuo .AND. wntvn ) THEN
*
*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
*
                  wrkbl = n + lwork_cgeqrf
                  wrkbl = max( wrkbl, n+lwork_cungqr_n )
                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
                  maxwrk = max( n*n+wrkbl, n*n+m*n )
                  minwrk = 2*n + m
               ELSE IF( wntuo .AND. wntvas ) THEN
*
*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
*                 'A')
*
                  wrkbl = n + lwork_cgeqrf
                  wrkbl = max( wrkbl, n+lwork_cungqr_n )
                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
                  maxwrk = max( n*n+wrkbl, n*n+m*n )
                  minwrk = 2*n + m
               ELSE IF( wntus .AND. wntvn ) THEN
*
*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
*
                  wrkbl = n + lwork_cgeqrf
                  wrkbl = max( wrkbl, n+lwork_cungqr_n )
                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
                  maxwrk = n*n + wrkbl
                  minwrk = 2*n + m
               ELSE IF( wntus .AND. wntvo ) THEN
*
*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
*
                  wrkbl = n + lwork_cgeqrf
                  wrkbl = max( wrkbl, n+lwork_cungqr_n )
                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
                  maxwrk = 2*n*n + wrkbl
                  minwrk = 2*n + m
               ELSE IF( wntus .AND. wntvas ) THEN
*
*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
*                 'A')
*
                  wrkbl = n + lwork_cgeqrf
                  wrkbl = max( wrkbl, n+lwork_cungqr_n )
                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
                  maxwrk = n*n + wrkbl
                  minwrk = 2*n + m
               ELSE IF( wntua .AND. wntvn ) THEN
*
*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
*
                  wrkbl = n + lwork_cgeqrf
                  wrkbl = max( wrkbl, n+lwork_cungqr_m )
                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
                  maxwrk = n*n + wrkbl
                  minwrk = 2*n + m
               ELSE IF( wntua .AND. wntvo ) THEN
*
*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
*
                  wrkbl = n + lwork_cgeqrf
                  wrkbl = max( wrkbl, n+lwork_cungqr_m )
                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
                  maxwrk = 2*n*n + wrkbl
                  minwrk = 2*n + m
               ELSE IF( wntua .AND. wntvas ) THEN
*
*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
*                 'A')
*
                  wrkbl = n + lwork_cgeqrf
                  wrkbl = max( wrkbl, n+lwork_cungqr_m )
                  wrkbl = max( wrkbl, 2*n+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_q )
                  wrkbl = max( wrkbl, 2*n+lwork_cungbr_p )
                  maxwrk = n*n + wrkbl
                  minwrk = 2*n + m
               END IF
            ELSE
*
*              Path 10 (M at least N, but not much larger)
*
               CALL cgebrd( m, n, a, lda, s, dum(1), cdum(1),
     $                   cdum(1), cdum(1), -1, ierr )
               lwork_cgebrd = int( cdum(1) )
               maxwrk = 2*n + lwork_cgebrd
               IF( wntus .OR. wntuo ) THEN
                  CALL cungbr( 'Q', m, n, n, a, lda, cdum(1),
     $                   cdum(1), -1, ierr )
                  lwork_cungbr_q = int( cdum(1) )
                  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
               END IF
               IF( wntua ) THEN
                  CALL cungbr( 'Q', m, m, n, a, lda, cdum(1),
     $                   cdum(1), -1, ierr )
                  lwork_cungbr_q = int( cdum(1) )
                  maxwrk = max( maxwrk, 2*n+lwork_cungbr_q )
               END IF
               IF( .NOT.wntvn ) THEN
                  maxwrk = max( maxwrk, 2*n+lwork_cungbr_p )
               END IF
               minwrk = 2*n + m
            END IF
         ELSE IF( minmn.GT.0 ) THEN
*
*           Space needed for CBDSQR is BDSPAC = 5*M
*
            mnthr = ilaenv( 6, 'CGESVD', jobu // jobvt, m, n, 0, 0 )
*           Compute space needed for CGELQF
            CALL cgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
            lwork_cgelqf = int( cdum(1) )
*           Compute space needed for CUNGLQ
            CALL cunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,
     $                   ierr )
            lwork_cunglq_n = int( cdum(1) )
            CALL cunglq( m, n, m, a, lda, cdum(1), cdum(1), -1,
     $                   ierr )
            lwork_cunglq_m = int( cdum(1) )
*           Compute space needed for CGEBRD
            CALL cgebrd( m, m, a, lda, s, dum(1), cdum(1),
     $                   cdum(1), cdum(1), -1, ierr )
            lwork_cgebrd = int( cdum(1) )
*            Compute space needed for CUNGBR P
            CALL cungbr( 'P', m, m, m, a, n, cdum(1),
     $                   cdum(1), -1, ierr )
            lwork_cungbr_p = int( cdum(1) )
*           Compute space needed for CUNGBR Q
            CALL cungbr( 'Q', m, m, m, a, n, cdum(1),
     $                   cdum(1), -1, ierr )
            lwork_cungbr_q = int( cdum(1) )
            IF( n.GE.mnthr ) THEN
               IF( wntvn ) THEN
*
*                 Path 1t(N much larger than M, JOBVT='N')
*
                  maxwrk = m + lwork_cgelqf
                  maxwrk = max( maxwrk, 2*m+lwork_cgebrd )
                  IF( wntuo .OR. wntuas )
     $               maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
                  minwrk = 3*m
               ELSE IF( wntvo .AND. wntun ) THEN
*
*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
*
                  wrkbl = m + lwork_cgelqf
                  wrkbl = max( wrkbl, m+lwork_cunglq_m )
                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
                  maxwrk = max( m*m+wrkbl, m*m+m*n )
                  minwrk = 2*m + n
               ELSE IF( wntvo .AND. wntuas ) THEN
*
*                 Path 3t(N much larger than M, JOBU='S' or 'A',
*                 JOBVT='O')
*
                  wrkbl = m + lwork_cgelqf
                  wrkbl = max( wrkbl, m+lwork_cunglq_m )
                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
                  maxwrk = max( m*m+wrkbl, m*m+m*n )
                  minwrk = 2*m + n
               ELSE IF( wntvs .AND. wntun ) THEN
*
*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
*
                  wrkbl = m + lwork_cgelqf
                  wrkbl = max( wrkbl, m+lwork_cunglq_m )
                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
                  maxwrk = m*m + wrkbl
                  minwrk = 2*m + n
               ELSE IF( wntvs .AND. wntuo ) THEN
*
*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
*
                  wrkbl = m + lwork_cgelqf
                  wrkbl = max( wrkbl, m+lwork_cunglq_m )
                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
                  maxwrk = 2*m*m + wrkbl
                  minwrk = 2*m + n
               ELSE IF( wntvs .AND. wntuas ) THEN
*
*                 Path 6t(N much larger than M, JOBU='S' or 'A',
*                 JOBVT='S')
*
                  wrkbl = m + lwork_cgelqf
                  wrkbl = max( wrkbl, m+lwork_cunglq_m )
                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
                  maxwrk = m*m + wrkbl
                  minwrk = 2*m + n
               ELSE IF( wntva .AND. wntun ) THEN
*
*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
*
                  wrkbl = m + lwork_cgelqf
                  wrkbl = max( wrkbl, m+lwork_cunglq_n )
                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
                  maxwrk = m*m + wrkbl
                  minwrk = 2*m + n
               ELSE IF( wntva .AND. wntuo ) THEN
*
*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
*
                  wrkbl = m + lwork_cgelqf
                  wrkbl = max( wrkbl, m+lwork_cunglq_n )
                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
                  maxwrk = 2*m*m + wrkbl
                  minwrk = 2*m + n
               ELSE IF( wntva .AND. wntuas ) THEN
*
*                 Path 9t(N much larger than M, JOBU='S' or 'A',
*                 JOBVT='A')
*
                  wrkbl = m + lwork_cgelqf
                  wrkbl = max( wrkbl, m+lwork_cunglq_n )
                  wrkbl = max( wrkbl, 2*m+lwork_cgebrd )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_p )
                  wrkbl = max( wrkbl, 2*m+lwork_cungbr_q )
                  maxwrk = m*m + wrkbl
                  minwrk = 2*m + n
               END IF
            ELSE
*
*              Path 10t(N greater than M, but not much larger)
*
               CALL cgebrd( m, n, a, lda, s, dum(1), cdum(1),
     $                   cdum(1), cdum(1), -1, ierr )
               lwork_cgebrd = int( cdum(1) )
               maxwrk = 2*m + lwork_cgebrd
               IF( wntvs .OR. wntvo ) THEN
*                Compute space needed for CUNGBR P
                 CALL cungbr( 'P', m, n, m, a, n, cdum(1),
     $                   cdum(1), -1, ierr )
                 lwork_cungbr_p = int( cdum(1) )
                 maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
               END IF
               IF( wntva ) THEN
                 CALL cungbr( 'P', n,  n, m, a, n, cdum(1),
     $                   cdum(1), -1, ierr )
                 lwork_cungbr_p = int( cdum(1) )
                 maxwrk = max( maxwrk, 2*m+lwork_cungbr_p )
               END IF
               IF( .NOT.wntun ) THEN
                  maxwrk = max( maxwrk, 2*m+lwork_cungbr_q )
               END IF
               minwrk = 2*m + n
            END IF
         END IF
         maxwrk = max( minwrk, maxwrk )
         work( 1 ) = sroundup_lwork(maxwrk)
*
         IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
            info = -13
         END IF
      END IF
*
      IF( info.NE.0 ) THEN
         CALL xerbla( 'CGESVD', -info )
         RETURN
      ELSE IF( lquery ) THEN
         RETURN
      END IF
*
*     Quick return if possible
*
      IF( m.EQ.0 .OR. n.EQ.0 ) THEN
         RETURN
      END IF
*
*     Get machine constants
*
      eps = slamch( 'P' )
      smlnum = sqrt( slamch( 'S' ) ) / eps
      bignum = one / smlnum
*
*     Scale A if max element outside range [SMLNUM,BIGNUM]
*
      anrm = clange( 'M', m, n, a, lda, dum )
      iscl = 0
      IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
         iscl = 1
         CALL clascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
      ELSE IF( anrm.GT.bignum ) THEN
         iscl = 1
         CALL clascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
      END IF
*
      IF( m.GE.n ) THEN
*
*        A has at least as many rows as columns. If A has sufficiently
*        more rows than columns, first reduce using the QR
*        decomposition (if sufficient workspace available)
*
         IF( m.GE.mnthr ) THEN
*
            IF( wntun ) THEN
*
*              Path 1 (M much larger than N, JOBU='N')
*              No left singular vectors to be computed
*
               itau = 1
               iwork = itau + n
*
*              Compute A=Q*R
*              (CWorkspace: need 2*N, prefer N+N*NB)
*              (RWorkspace: need 0)
*
               CALL cgeqrf( m, n, a, lda, work( itau ),
     $                      work( iwork ),
     $                      lwork-iwork+1, ierr )
*
*              Zero out below R
*
               IF( n .GT. 1 ) THEN
                  CALL claset( 'L', n-1, n-1, czero, czero, a( 2,
     $                         1 ),
     $                         lda )
               END IF
               ie = 1
               itauq = 1
               itaup = itauq + n
               iwork = itaup + n
*
*              Bidiagonalize R in A
*              (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
*              (RWorkspace: need N)
*
               CALL cgebrd( n, n, a, lda, s, rwork( ie ),
     $                      work( itauq ),
     $                      work( itaup ), work( iwork ), lwork-iwork+1,
     $                      ierr )
               ncvt = 0
               IF( wntvo .OR. wntvas ) THEN
*
*                 If right singular vectors desired, generate P'.
*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  ncvt = n
               END IF
               irwork = ie + n
*
*              Perform bidiagonal QR iteration, computing right
*              singular vectors of A in A if desired
*              (CWorkspace: 0)
*              (RWorkspace: need BDSPAC)
*
               CALL cbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a,
     $                      lda,
     $                      cdum, 1, cdum, 1, rwork( irwork ), info )
*
*              If right singular vectors desired in VT, copy them there
*
               IF( wntvas )
     $            CALL clacpy( 'F', n, n, a, lda, vt, ldvt )
*
            ELSE IF( wntuo .AND. wntvn ) THEN
*
*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
*              N left singular vectors to be overwritten on A and
*              no right singular vectors to be computed
*
               IF( lwork.GE.n*n+3*n ) THEN
*
*                 Sufficient workspace for a fast algorithm
*
                  ir = 1
                  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
*
*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
*
                     ldwrku = lda
                     ldwrkr = lda
                  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
*
*                    WORK(IU) is LDA by N, WORK(IR) is N by N
*
                     ldwrku = lda
                     ldwrkr = n
                  ELSE
*
*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
*
                     ldwrku = ( lwork-n*n ) / n
                     ldwrkr = n
                  END IF
                  itau = ir + ldwrkr*n
                  iwork = itau + n
*
*                 Compute A=Q*R
*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
*                 (RWorkspace: 0)
*
                  CALL cgeqrf( m, n, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Copy R to WORK(IR) and zero out below it
*
                  CALL clacpy( 'U', n, n, a, lda, work( ir ),
     $                         ldwrkr )
                  CALL claset( 'L', n-1, n-1, czero, czero,
     $                         work( ir+1 ), ldwrkr )
*
*                 Generate Q in A
*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
*                 (RWorkspace: 0)
*
                  CALL cungqr( m, n, n, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  ie = 1
                  itauq = itau
                  itaup = itauq + n
                  iwork = itaup + n
*
*                 Bidiagonalize R in WORK(IR)
*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
*                 (RWorkspace: need N)
*
                  CALL cgebrd( n, n, work( ir ), ldwrkr, s,
     $                         rwork( ie ),
     $                         work( itauq ), work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Generate left vectors bidiagonalizing R
*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
*                 (RWorkspace: need 0)
*
                  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
     $                         work( itauq ), work( iwork ),
     $                         lwork-iwork+1, ierr )
                  irwork = ie + n
*
*                 Perform bidiagonal QR iteration, computing left
*                 singular vectors of R in WORK(IR)
*                 (CWorkspace: need N*N)
*                 (RWorkspace: need BDSPAC)
*
                  CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
     $                         1,
     $                         work( ir ), ldwrkr, cdum, 1,
     $                         rwork( irwork ), info )
                  iu = itauq
*
*                 Multiply Q in A by left singular vectors of R in
*                 WORK(IR), storing result in WORK(IU) and copying to A
*                 (CWorkspace: need N*N+N, prefer N*N+M*N)
*                 (RWorkspace: 0)
*
                  DO 10 i = 1, m, ldwrku
                     chunk = min( m-i+1, ldwrku )
                     CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i,
     $                           1 ),
     $                           lda, work( ir ), ldwrkr, czero,
     $                           work( iu ), ldwrku )
                     CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
     $                            a( i, 1 ), lda )
   10             CONTINUE
*
               ELSE
*
*                 Insufficient workspace for a fast algorithm
*
                  ie = 1
                  itauq = 1
                  itaup = itauq + n
                  iwork = itaup + n
*
*                 Bidiagonalize A
*                 (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
*                 (RWorkspace: N)
*
                  CALL cgebrd( m, n, a, lda, s, rwork( ie ),
     $                         work( itauq ), work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Generate left vectors bidiagonalizing A
*                 (CWorkspace: need 3*N, prefer 2*N+N*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  irwork = ie + n
*
*                 Perform bidiagonal QR iteration, computing left
*                 singular vectors of A in A
*                 (CWorkspace: need 0)
*                 (RWorkspace: need BDSPAC)
*
                  CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
     $                         1,
     $                         a, lda, cdum, 1, rwork( irwork ), info )
*
               END IF
*
            ELSE IF( wntuo .AND. wntvas ) THEN
*
*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
*              N left singular vectors to be overwritten on A and
*              N right singular vectors to be computed in VT
*
               IF( lwork.GE.n*n+3*n ) THEN
*
*                 Sufficient workspace for a fast algorithm
*
                  ir = 1
                  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
*
*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
*
                     ldwrku = lda
                     ldwrkr = lda
                  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
*
*                    WORK(IU) is LDA by N and WORK(IR) is N by N
*
                     ldwrku = lda
                     ldwrkr = n
                  ELSE
*
*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
*
                     ldwrku = ( lwork-n*n ) / n
                     ldwrkr = n
                  END IF
                  itau = ir + ldwrkr*n
                  iwork = itau + n
*
*                 Compute A=Q*R
*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
*                 (RWorkspace: 0)
*
                  CALL cgeqrf( m, n, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Copy R to VT, zeroing out below it
*
                  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
                  IF( n.GT.1 )
     $               CALL claset( 'L', n-1, n-1, czero, czero,
     $                            vt( 2, 1 ), ldvt )
*
*                 Generate Q in A
*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
*                 (RWorkspace: 0)
*
                  CALL cungqr( m, n, n, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  ie = 1
                  itauq = itau
                  itaup = itauq + n
                  iwork = itaup + n
*
*                 Bidiagonalize R in VT, copying result to WORK(IR)
*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
*                 (RWorkspace: need N)
*
                  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
     $                         work( itauq ), work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  CALL clacpy( 'L', n, n, vt, ldvt, work( ir ),
     $                         ldwrkr )
*
*                 Generate left vectors bidiagonalizing R in WORK(IR)
*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
     $                         work( itauq ), work( iwork ),
     $                         lwork-iwork+1, ierr )
*
*                 Generate right vectors bidiagonalizing R in VT
*                 (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  irwork = ie + n
*
*                 Perform bidiagonal QR iteration, computing left
*                 singular vectors of R in WORK(IR) and computing right
*                 singular vectors of R in VT
*                 (CWorkspace: need N*N)
*                 (RWorkspace: need BDSPAC)
*
                  CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
     $                         ldvt, work( ir ), ldwrkr, cdum, 1,
     $                         rwork( irwork ), info )
                  iu = itauq
*
*                 Multiply Q in A by left singular vectors of R in
*                 WORK(IR), storing result in WORK(IU) and copying to A
*                 (CWorkspace: need N*N+N, prefer N*N+M*N)
*                 (RWorkspace: 0)
*
                  DO 20 i = 1, m, ldwrku
                     chunk = min( m-i+1, ldwrku )
                     CALL cgemm( 'N', 'N', chunk, n, n, cone, a( i,
     $                           1 ),
     $                           lda, work( ir ), ldwrkr, czero,
     $                           work( iu ), ldwrku )
                     CALL clacpy( 'F', chunk, n, work( iu ), ldwrku,
     $                            a( i, 1 ), lda )
   20             CONTINUE
*
               ELSE
*
*                 Insufficient workspace for a fast algorithm
*
                  itau = 1
                  iwork = itau + n
*
*                 Compute A=Q*R
*                 (CWorkspace: need 2*N, prefer N+N*NB)
*                 (RWorkspace: 0)
*
                  CALL cgeqrf( m, n, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Copy R to VT, zeroing out below it
*
                  CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
                  IF( n.GT.1 )
     $               CALL claset( 'L', n-1, n-1, czero, czero,
     $                            vt( 2, 1 ), ldvt )
*
*                 Generate Q in A
*                 (CWorkspace: need 2*N, prefer N+N*NB)
*                 (RWorkspace: 0)
*
                  CALL cungqr( m, n, n, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  ie = 1
                  itauq = itau
                  itaup = itauq + n
                  iwork = itaup + n
*
*                 Bidiagonalize R in VT
*                 (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
*                 (RWorkspace: N)
*
                  CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
     $                         work( itauq ), work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Multiply Q in A by left vectors bidiagonalizing R
*                 (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
     $                         work( itauq ), a, lda, work( iwork ),
     $                         lwork-iwork+1, ierr )
*
*                 Generate right vectors bidiagonalizing R in VT
*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  irwork = ie + n
*
*                 Perform bidiagonal QR iteration, computing left
*                 singular vectors of A in A and computing right
*                 singular vectors of A in VT
*                 (CWorkspace: 0)
*                 (RWorkspace: need BDSPAC)
*
                  CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
     $                         ldvt, a, lda, cdum, 1, rwork( irwork ),
     $                         info )
*
               END IF
*
            ELSE IF( wntus ) THEN
*
               IF( wntvn ) THEN
*
*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
*                 N left singular vectors to be computed in U and
*                 no right singular vectors to be computed
*
                  IF( lwork.GE.n*n+3*n ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     ir = 1
                     IF( lwork.GE.wrkbl+lda*n ) THEN
*
*                       WORK(IR) is LDA by N
*
                        ldwrkr = lda
                     ELSE
*
*                       WORK(IR) is N by N
*
                        ldwrkr = n
                     END IF
                     itau = ir + ldwrkr*n
                     iwork = itau + n
*
*                    Compute A=Q*R
*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy R to WORK(IR), zeroing out below it
*
                     CALL clacpy( 'U', n, n, a, lda, work( ir ),
     $                            ldwrkr )
                     CALL claset( 'L', n-1, n-1, czero, czero,
     $                            work( ir+1 ), ldwrkr )
*
*                    Generate Q in A
*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, n, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Bidiagonalize R in WORK(IR)
*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
*                    (RWorkspace: need N)
*
                     CALL cgebrd( n, n, work( ir ), ldwrkr, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate left vectors bidiagonalizing R in WORK(IR)
*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
     $                            work( itauq ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of R in WORK(IR)
*                    (CWorkspace: need N*N)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ),
     $                            cdum,
     $                            1, work( ir ), ldwrkr, cdum, 1,
     $                            rwork( irwork ), info )
*
*                    Multiply Q in A by left singular vectors of R in
*                    WORK(IR), storing result in U
*                    (CWorkspace: need N*N)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
     $                           work( ir ), ldwrkr, czero, u, ldu )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + n
*
*                    Compute A=Q*R, copying result to U
*                    (CWorkspace: need 2*N, prefer N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, n, a, lda, u, ldu )
*
*                    Generate Q in U
*                    (CWorkspace: need 2*N, prefer N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, n, n, u, ldu, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Zero out below R in A
*
                     IF( n .GT. 1 ) THEN
                        CALL claset( 'L', n-1, n-1, czero, czero,
     $                               a( 2, 1 ), lda )
                     END IF
*
*                    Bidiagonalize R in A
*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
*                    (RWorkspace: need N)
*
                     CALL cgebrd( n, n, a, lda, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply Q in U by left vectors bidiagonalizing R
*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
     $                            work( itauq ), u, ldu, work( iwork ),
     $                            lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of A in U
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ),
     $                            cdum,
     $                            1, u, ldu, cdum, 1, rwork( irwork ),
     $                            info )
*
                  END IF
*
               ELSE IF( wntvo ) THEN
*
*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
*                 N left singular vectors to be computed in U and
*                 N right singular vectors to be overwritten on A
*
                  IF( lwork.GE.2*n*n+3*n ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     iu = 1
                     IF( lwork.GE.wrkbl+2*lda*n ) THEN
*
*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
*
                        ldwrku = lda
                        ir = iu + ldwrku*n
                        ldwrkr = lda
                     ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
*
*                       WORK(IU) is LDA by N and WORK(IR) is N by N
*
                        ldwrku = lda
                        ir = iu + ldwrku*n
                        ldwrkr = n
                     ELSE
*
*                       WORK(IU) is N by N and WORK(IR) is N by N
*
                        ldwrku = n
                        ir = iu + ldwrku*n
                        ldwrkr = n
                     END IF
                     itau = ir + ldwrkr*n
                     iwork = itau + n
*
*                    Compute A=Q*R
*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy R to WORK(IU), zeroing out below it
*
                     CALL clacpy( 'U', n, n, a, lda, work( iu ),
     $                            ldwrku )
                     CALL claset( 'L', n-1, n-1, czero, czero,
     $                            work( iu+1 ), ldwrku )
*
*                    Generate Q in A
*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, n, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Bidiagonalize R in WORK(IU), copying result to
*                    WORK(IR)
*                    (CWorkspace: need   2*N*N+3*N,
*                                 prefer 2*N*N+2*N+2*N*NB)
*                    (RWorkspace: need   N)
*
                     CALL cgebrd( n, n, work( iu ), ldwrku, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     CALL clacpy( 'U', n, n, work( iu ), ldwrku,
     $                            work( ir ), ldwrkr )
*
*                    Generate left bidiagonalizing vectors in WORK(IU)
*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
     $                            work( itauq ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate right bidiagonalizing vectors in WORK(IR)
*                    (CWorkspace: need   2*N*N+3*N-1,
*                                 prefer 2*N*N+2*N+(N-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of R in WORK(IU) and computing
*                    right singular vectors of R in WORK(IR)
*                    (CWorkspace: need 2*N*N)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
     $                            work( ir ), ldwrkr, work( iu ),
     $                            ldwrku, cdum, 1, rwork( irwork ),
     $                            info )
*
*                    Multiply Q in A by left singular vectors of R in
*                    WORK(IU), storing result in U
*                    (CWorkspace: need N*N)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
     $                           work( iu ), ldwrku, czero, u, ldu )
*
*                    Copy right singular vectors of R to A
*                    (CWorkspace: need N*N)
*                    (RWorkspace: 0)
*
                     CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
     $                            lda )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + n
*
*                    Compute A=Q*R, copying result to U
*                    (CWorkspace: need 2*N, prefer N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, n, a, lda, u, ldu )
*
*                    Generate Q in U
*                    (CWorkspace: need 2*N, prefer N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, n, n, u, ldu, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Zero out below R in A
*
                     IF( n .GT. 1 ) THEN
                        CALL claset( 'L', n-1, n-1, czero, czero,
     $                               a( 2, 1 ), lda )
                     END IF
*
*                    Bidiagonalize R in A
*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
*                    (RWorkspace: need N)
*
                     CALL cgebrd( n, n, a, lda, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply Q in U by left vectors bidiagonalizing R
*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
     $                            work( itauq ), u, ldu, work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate right vectors bidiagonalizing R in A
*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', n, n, n, a, lda,
     $                            work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of A in U and computing right
*                    singular vectors of A in A
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
     $                            lda, u, ldu, cdum, 1, rwork( irwork ),
     $                            info )
*
                  END IF
*
               ELSE IF( wntvas ) THEN
*
*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
*                         or 'A')
*                 N left singular vectors to be computed in U and
*                 N right singular vectors to be computed in VT
*
                  IF( lwork.GE.n*n+3*n ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     iu = 1
                     IF( lwork.GE.wrkbl+lda*n ) THEN
*
*                       WORK(IU) is LDA by N
*
                        ldwrku = lda
                     ELSE
*
*                       WORK(IU) is N by N
*
                        ldwrku = n
                     END IF
                     itau = iu + ldwrku*n
                     iwork = itau + n
*
*                    Compute A=Q*R
*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy R to WORK(IU), zeroing out below it
*
                     CALL clacpy( 'U', n, n, a, lda, work( iu ),
     $                            ldwrku )
                     CALL claset( 'L', n-1, n-1, czero, czero,
     $                            work( iu+1 ), ldwrku )
*
*                    Generate Q in A
*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, n, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Bidiagonalize R in WORK(IU), copying result to VT
*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
*                    (RWorkspace: need N)
*
                     CALL cgebrd( n, n, work( iu ), ldwrku, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
     $                            ldvt )
*
*                    Generate left bidiagonalizing vectors in WORK(IU)
*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
     $                            work( itauq ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate right bidiagonalizing vectors in VT
*                    (CWorkspace: need   N*N+3*N-1,
*                                 prefer N*N+2*N+(N-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', n, n, n, vt, ldvt,
     $                            work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of R in WORK(IU) and computing
*                    right singular vectors of R in VT
*                    (CWorkspace: need N*N)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
     $                            vt,
     $                            ldvt, work( iu ), ldwrku, cdum, 1,
     $                            rwork( irwork ), info )
*
*                    Multiply Q in A by left singular vectors of R in
*                    WORK(IU), storing result in U
*                    (CWorkspace: need N*N)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, n, cone, a, lda,
     $                           work( iu ), ldwrku, czero, u, ldu )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + n
*
*                    Compute A=Q*R, copying result to U
*                    (CWorkspace: need 2*N, prefer N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, n, a, lda, u, ldu )
*
*                    Generate Q in U
*                    (CWorkspace: need 2*N, prefer N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, n, n, u, ldu, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy R to VT, zeroing out below it
*
                     CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
                     IF( n.GT.1 )
     $                  CALL claset( 'L', n-1, n-1, czero, czero,
     $                               vt( 2, 1 ), ldvt )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Bidiagonalize R in VT
*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
*                    (RWorkspace: need N)
*
                     CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply Q in U by left bidiagonalizing vectors
*                    in VT
*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
     $                            work( itauq ), u, ldu, work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate right bidiagonalizing vectors in VT
*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', n, n, n, vt, ldvt,
     $                            work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of A in U and computing right
*                    singular vectors of A in VT
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ),
     $                            vt,
     $                            ldvt, u, ldu, cdum, 1,
     $                            rwork( irwork ), info )
*
                  END IF
*
               END IF
*
            ELSE IF( wntua ) THEN
*
               IF( wntvn ) THEN
*
*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
*                 M left singular vectors to be computed in U and
*                 no right singular vectors to be computed
*
                  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     ir = 1
                     IF( lwork.GE.wrkbl+lda*n ) THEN
*
*                       WORK(IR) is LDA by N
*
                        ldwrkr = lda
                     ELSE
*
*                       WORK(IR) is N by N
*
                        ldwrkr = n
                     END IF
                     itau = ir + ldwrkr*n
                     iwork = itau + n
*
*                    Compute A=Q*R, copying result to U
*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, n, a, lda, u, ldu )
*
*                    Copy R to WORK(IR), zeroing out below it
*
                     CALL clacpy( 'U', n, n, a, lda, work( ir ),
     $                            ldwrkr )
                     CALL claset( 'L', n-1, n-1, czero, czero,
     $                            work( ir+1 ), ldwrkr )
*
*                    Generate Q in U
*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, m, n, u, ldu, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Bidiagonalize R in WORK(IR)
*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
*                    (RWorkspace: need N)
*
                     CALL cgebrd( n, n, work( ir ), ldwrkr, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate left bidiagonalizing vectors in WORK(IR)
*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', n, n, n, work( ir ), ldwrkr,
     $                            work( itauq ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of R in WORK(IR)
*                    (CWorkspace: need N*N)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, 0, n, 0, s, rwork( ie ),
     $                            cdum,
     $                            1, work( ir ), ldwrkr, cdum, 1,
     $                            rwork( irwork ), info )
*
*                    Multiply Q in U by left singular vectors of R in
*                    WORK(IR), storing result in A
*                    (CWorkspace: need N*N)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
     $                           work( ir ), ldwrkr, czero, a, lda )
*
*                    Copy left singular vectors of A from A to U
*
                     CALL clacpy( 'F', m, n, a, lda, u, ldu )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + n
*
*                    Compute A=Q*R, copying result to U
*                    (CWorkspace: need 2*N, prefer N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, n, a, lda, u, ldu )
*
*                    Generate Q in U
*                    (CWorkspace: need N+M, prefer N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, m, n, u, ldu, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Zero out below R in A
*
                     IF( n .GT. 1 ) THEN
                        CALL claset( 'L', n-1, n-1, czero, czero,
     $                               a( 2, 1 ), lda )
                     END IF
*
*                    Bidiagonalize R in A
*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
*                    (RWorkspace: need N)
*
                     CALL cgebrd( n, n, a, lda, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply Q in U by left bidiagonalizing vectors
*                    in A
*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
     $                            work( itauq ), u, ldu, work( iwork ),
     $                            lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of A in U
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, 0, m, 0, s, rwork( ie ),
     $                            cdum,
     $                            1, u, ldu, cdum, 1, rwork( irwork ),
     $                            info )
*
                  END IF
*
               ELSE IF( wntvo ) THEN
*
*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
*                 M left singular vectors to be computed in U and
*                 N right singular vectors to be overwritten on A
*
                  IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     iu = 1
                     IF( lwork.GE.wrkbl+2*lda*n ) THEN
*
*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
*
                        ldwrku = lda
                        ir = iu + ldwrku*n
                        ldwrkr = lda
                     ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
*
*                       WORK(IU) is LDA by N and WORK(IR) is N by N
*
                        ldwrku = lda
                        ir = iu + ldwrku*n
                        ldwrkr = n
                     ELSE
*
*                       WORK(IU) is N by N and WORK(IR) is N by N
*
                        ldwrku = n
                        ir = iu + ldwrku*n
                        ldwrkr = n
                     END IF
                     itau = ir + ldwrkr*n
                     iwork = itau + n
*
*                    Compute A=Q*R, copying result to U
*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, n, a, lda, u, ldu )
*
*                    Generate Q in U
*                    (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, m, n, u, ldu, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy R to WORK(IU), zeroing out below it
*
                     CALL clacpy( 'U', n, n, a, lda, work( iu ),
     $                            ldwrku )
                     CALL claset( 'L', n-1, n-1, czero, czero,
     $                            work( iu+1 ), ldwrku )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Bidiagonalize R in WORK(IU), copying result to
*                    WORK(IR)
*                    (CWorkspace: need   2*N*N+3*N,
*                                 prefer 2*N*N+2*N+2*N*NB)
*                    (RWorkspace: need   N)
*
                     CALL cgebrd( n, n, work( iu ), ldwrku, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     CALL clacpy( 'U', n, n, work( iu ), ldwrku,
     $                            work( ir ), ldwrkr )
*
*                    Generate left bidiagonalizing vectors in WORK(IU)
*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
     $                            work( itauq ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate right bidiagonalizing vectors in WORK(IR)
*                    (CWorkspace: need   2*N*N+3*N-1,
*                                 prefer 2*N*N+2*N+(N-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', n, n, n, work( ir ), ldwrkr,
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of R in WORK(IU) and computing
*                    right singular vectors of R in WORK(IR)
*                    (CWorkspace: need 2*N*N)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
     $                            work( ir ), ldwrkr, work( iu ),
     $                            ldwrku, cdum, 1, rwork( irwork ),
     $                            info )
*
*                    Multiply Q in U by left singular vectors of R in
*                    WORK(IU), storing result in A
*                    (CWorkspace: need N*N)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
     $                           work( iu ), ldwrku, czero, a, lda )
*
*                    Copy left singular vectors of A from A to U
*
                     CALL clacpy( 'F', m, n, a, lda, u, ldu )
*
*                    Copy right singular vectors of R from WORK(IR) to A
*
                     CALL clacpy( 'F', n, n, work( ir ), ldwrkr, a,
     $                            lda )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + n
*
*                    Compute A=Q*R, copying result to U
*                    (CWorkspace: need 2*N, prefer N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, n, a, lda, u, ldu )
*
*                    Generate Q in U
*                    (CWorkspace: need N+M, prefer N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, m, n, u, ldu, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Zero out below R in A
*
                     IF( n .GT. 1 ) THEN
                        CALL claset( 'L', n-1, n-1, czero, czero,
     $                               a( 2, 1 ), lda )
                     END IF
*
*                    Bidiagonalize R in A
*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
*                    (RWorkspace: need N)
*
                     CALL cgebrd( n, n, a, lda, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply Q in U by left bidiagonalizing vectors
*                    in A
*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
     $                            work( itauq ), u, ldu, work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate right bidiagonalizing vectors in A
*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', n, n, n, a, lda,
     $                            work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of A in U and computing right
*                    singular vectors of A in A
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
     $                            lda, u, ldu, cdum, 1, rwork( irwork ),
     $                            info )
*
                  END IF
*
               ELSE IF( wntvas ) THEN
*
*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
*                         or 'A')
*                 M left singular vectors to be computed in U and
*                 N right singular vectors to be computed in VT
*
                  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     iu = 1
                     IF( lwork.GE.wrkbl+lda*n ) THEN
*
*                       WORK(IU) is LDA by N
*
                        ldwrku = lda
                     ELSE
*
*                       WORK(IU) is N by N
*
                        ldwrku = n
                     END IF
                     itau = iu + ldwrku*n
                     iwork = itau + n
*
*                    Compute A=Q*R, copying result to U
*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, n, a, lda, u, ldu )
*
*                    Generate Q in U
*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, m, n, u, ldu, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy R to WORK(IU), zeroing out below it
*
                     CALL clacpy( 'U', n, n, a, lda, work( iu ),
     $                            ldwrku )
                     CALL claset( 'L', n-1, n-1, czero, czero,
     $                            work( iu+1 ), ldwrku )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Bidiagonalize R in WORK(IU), copying result to VT
*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
*                    (RWorkspace: need N)
*
                     CALL cgebrd( n, n, work( iu ), ldwrku, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     CALL clacpy( 'U', n, n, work( iu ), ldwrku, vt,
     $                            ldvt )
*
*                    Generate left bidiagonalizing vectors in WORK(IU)
*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', n, n, n, work( iu ), ldwrku,
     $                            work( itauq ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate right bidiagonalizing vectors in VT
*                    (CWorkspace: need   N*N+3*N-1,
*                                 prefer N*N+2*N+(N-1)*NB)
*                    (RWorkspace: need   0)
*
                     CALL cungbr( 'P', n, n, n, vt, ldvt,
     $                            work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of R in WORK(IU) and computing
*                    right singular vectors of R in VT
*                    (CWorkspace: need N*N)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
     $                            vt,
     $                            ldvt, work( iu ), ldwrku, cdum, 1,
     $                            rwork( irwork ), info )
*
*                    Multiply Q in U by left singular vectors of R in
*                    WORK(IU), storing result in A
*                    (CWorkspace: need N*N)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, n, cone, u, ldu,
     $                           work( iu ), ldwrku, czero, a, lda )
*
*                    Copy left singular vectors of A from A to U
*
                     CALL clacpy( 'F', m, n, a, lda, u, ldu )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + n
*
*                    Compute A=Q*R, copying result to U
*                    (CWorkspace: need 2*N, prefer N+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cgeqrf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, n, a, lda, u, ldu )
*
*                    Generate Q in U
*                    (CWorkspace: need N+M, prefer N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungqr( m, m, n, u, ldu, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy R from A to VT, zeroing out below it
*
                     CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
                     IF( n.GT.1 )
     $                  CALL claset( 'L', n-1, n-1, czero, czero,
     $                               vt( 2, 1 ), ldvt )
                     ie = 1
                     itauq = itau
                     itaup = itauq + n
                     iwork = itaup + n
*
*                    Bidiagonalize R in VT
*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
*                    (RWorkspace: need N)
*
                     CALL cgebrd( n, n, vt, ldvt, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply Q in U by left bidiagonalizing vectors
*                    in VT
*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
     $                            work( itauq ), u, ldu, work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate right bidiagonalizing vectors in VT
*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', n, n, n, vt, ldvt,
     $                            work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + n
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of A in U and computing right
*                    singular vectors of A in VT
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', n, n, m, 0, s, rwork( ie ),
     $                            vt,
     $                            ldvt, u, ldu, cdum, 1,
     $                            rwork( irwork ), info )
*
                  END IF
*
               END IF
*
            END IF
*
         ELSE
*
*           M .LT. MNTHR
*
*           Path 10 (M at least N, but not much larger)
*           Reduce to bidiagonal form without QR decomposition
*
            ie = 1
            itauq = 1
            itaup = itauq + n
            iwork = itaup + n
*
*           Bidiagonalize A
*           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
*           (RWorkspace: need N)
*
            CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
     $                   work( itaup ), work( iwork ), lwork-iwork+1,
     $                   ierr )
            IF( wntuas ) THEN
*
*              If left singular vectors desired in U, copy result to U
*              and generate left bidiagonalizing vectors in U
*              (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
*              (RWorkspace: 0)
*
               CALL clacpy( 'L', m, n, a, lda, u, ldu )
               IF( wntus )
     $            ncu = n
               IF( wntua )
     $            ncu = m
               CALL cungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
     $                      work( iwork ), lwork-iwork+1, ierr )
            END IF
            IF( wntvas ) THEN
*
*              If right singular vectors desired in VT, copy result to
*              VT and generate right bidiagonalizing vectors in VT
*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
*              (RWorkspace: 0)
*
               CALL clacpy( 'U', n, n, a, lda, vt, ldvt )
               CALL cungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
     $                      work( iwork ), lwork-iwork+1, ierr )
            END IF
            IF( wntuo ) THEN
*
*              If left singular vectors desired in A, generate left
*              bidiagonalizing vectors in A
*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
*              (RWorkspace: 0)
*
               CALL cungbr( 'Q', m, n, n, a, lda, work( itauq ),
     $                      work( iwork ), lwork-iwork+1, ierr )
            END IF
            IF( wntvo ) THEN
*
*              If right singular vectors desired in A, generate right
*              bidiagonalizing vectors in A
*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
*              (RWorkspace: 0)
*
               CALL cungbr( 'P', n, n, n, a, lda, work( itaup ),
     $                      work( iwork ), lwork-iwork+1, ierr )
            END IF
            irwork = ie + n
            IF( wntuas .OR. wntuo )
     $         nru = m
            IF( wntun )
     $         nru = 0
            IF( wntvas .OR. wntvo )
     $         ncvt = n
            IF( wntvn )
     $         ncvt = 0
            IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
*
*              Perform bidiagonal QR iteration, if desired, computing
*              left singular vectors in U and computing right singular
*              vectors in VT
*              (CWorkspace: 0)
*              (RWorkspace: need BDSPAC)
*
               CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
     $                      ldvt, u, ldu, cdum, 1, rwork( irwork ),
     $                      info )
            ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
*
*              Perform bidiagonal QR iteration, if desired, computing
*              left singular vectors in U and computing right singular
*              vectors in A
*              (CWorkspace: 0)
*              (RWorkspace: need BDSPAC)
*
               CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
     $                      lda, u, ldu, cdum, 1, rwork( irwork ),
     $                      info )
            ELSE
*
*              Perform bidiagonal QR iteration, if desired, computing
*              left singular vectors in A and computing right singular
*              vectors in VT
*              (CWorkspace: 0)
*              (RWorkspace: need BDSPAC)
*
               CALL cbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
     $                      ldvt, a, lda, cdum, 1, rwork( irwork ),
     $                      info )
            END IF
*
         END IF
*
      ELSE
*
*        A has more columns than rows. If A has sufficiently more
*        columns than rows, first reduce using the LQ decomposition (if
*        sufficient workspace available)
*
         IF( n.GE.mnthr ) THEN
*
            IF( wntvn ) THEN
*
*              Path 1t(N much larger than M, JOBVT='N')
*              No right singular vectors to be computed
*
               itau = 1
               iwork = itau + m
*
*              Compute A=L*Q
*              (CWorkspace: need 2*M, prefer M+M*NB)
*              (RWorkspace: 0)
*
               CALL cgelqf( m, n, a, lda, work( itau ),
     $                      work( iwork ),
     $                      lwork-iwork+1, ierr )
*
*              Zero out above L
*
               CALL claset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
     $                      lda )
               ie = 1
               itauq = 1
               itaup = itauq + m
               iwork = itaup + m
*
*              Bidiagonalize L in A
*              (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
*              (RWorkspace: need M)
*
               CALL cgebrd( m, m, a, lda, s, rwork( ie ),
     $                      work( itauq ),
     $                      work( itaup ), work( iwork ), lwork-iwork+1,
     $                      ierr )
               IF( wntuo .OR. wntuas ) THEN
*
*                 If left singular vectors desired, generate Q
*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'Q', m, m, m, a, lda, work( itauq ),
     $                         work( iwork ), lwork-iwork+1, ierr )
               END IF
               irwork = ie + m
               nru = 0
               IF( wntuo .OR. wntuas )
     $            nru = m
*
*              Perform bidiagonal QR iteration, computing left singular
*              vectors of A in A if desired
*              (CWorkspace: 0)
*              (RWorkspace: need BDSPAC)
*
               CALL cbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum,
     $                      1,
     $                      a, lda, cdum, 1, rwork( irwork ), info )
*
*              If left singular vectors desired in U, copy them there
*
               IF( wntuas )
     $            CALL clacpy( 'F', m, m, a, lda, u, ldu )
*
            ELSE IF( wntvo .AND. wntun ) THEN
*
*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
*              M right singular vectors to be overwritten on A and
*              no left singular vectors to be computed
*
               IF( lwork.GE.m*m+3*m ) THEN
*
*                 Sufficient workspace for a fast algorithm
*
                  ir = 1
                  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
*
*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
*
                     ldwrku = lda
                     chunk = n
                     ldwrkr = lda
                  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
*
*                    WORK(IU) is LDA by N and WORK(IR) is M by M
*
                     ldwrku = lda
                     chunk = n
                     ldwrkr = m
                  ELSE
*
*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
*
                     ldwrku = m
                     chunk = ( lwork-m*m ) / m
                     ldwrkr = m
                  END IF
                  itau = ir + ldwrkr*m
                  iwork = itau + m
*
*                 Compute A=L*Q
*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cgelqf( m, n, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Copy L to WORK(IR) and zero out above it
*
                  CALL clacpy( 'L', m, m, a, lda, work( ir ),
     $                         ldwrkr )
                  CALL claset( 'U', m-1, m-1, czero, czero,
     $                         work( ir+ldwrkr ), ldwrkr )
*
*                 Generate Q in A
*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cunglq( m, n, m, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  ie = 1
                  itauq = itau
                  itaup = itauq + m
                  iwork = itaup + m
*
*                 Bidiagonalize L in WORK(IR)
*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
*                 (RWorkspace: need M)
*
                  CALL cgebrd( m, m, work( ir ), ldwrkr, s,
     $                         rwork( ie ),
     $                         work( itauq ), work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Generate right vectors bidiagonalizing L
*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
     $                         work( itaup ), work( iwork ),
     $                         lwork-iwork+1, ierr )
                  irwork = ie + m
*
*                 Perform bidiagonal QR iteration, computing right
*                 singular vectors of L in WORK(IR)
*                 (CWorkspace: need M*M)
*                 (RWorkspace: need BDSPAC)
*
                  CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
     $                         work( ir ), ldwrkr, cdum, 1, cdum, 1,
     $                         rwork( irwork ), info )
                  iu = itauq
*
*                 Multiply right singular vectors of L in WORK(IR) by Q
*                 in A, storing result in WORK(IU) and copying to A
*                 (CWorkspace: need M*M+M, prefer M*M+M*N)
*                 (RWorkspace: 0)
*
                  DO 30 i = 1, n, chunk
                     blk = min( n-i+1, chunk )
                     CALL cgemm( 'N', 'N', m, blk, m, cone,
     $                           work( ir ),
     $                           ldwrkr, a( 1, i ), lda, czero,
     $                           work( iu ), ldwrku )
                     CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
     $                            a( 1, i ), lda )
   30             CONTINUE
*
               ELSE
*
*                 Insufficient workspace for a fast algorithm
*
                  ie = 1
                  itauq = 1
                  itaup = itauq + m
                  iwork = itaup + m
*
*                 Bidiagonalize A
*                 (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
*                 (RWorkspace: need M)
*
                  CALL cgebrd( m, n, a, lda, s, rwork( ie ),
     $                         work( itauq ), work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Generate right vectors bidiagonalizing A
*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  irwork = ie + m
*
*                 Perform bidiagonal QR iteration, computing right
*                 singular vectors of A in A
*                 (CWorkspace: 0)
*                 (RWorkspace: need BDSPAC)
*
                  CALL cbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a,
     $                         lda,
     $                         cdum, 1, cdum, 1, rwork( irwork ), info )
*
               END IF
*
            ELSE IF( wntvo .AND. wntuas ) THEN
*
*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
*              M right singular vectors to be overwritten on A and
*              M left singular vectors to be computed in U
*
               IF( lwork.GE.m*m+3*m ) THEN
*
*                 Sufficient workspace for a fast algorithm
*
                  ir = 1
                  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
*
*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
*
                     ldwrku = lda
                     chunk = n
                     ldwrkr = lda
                  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
*
*                    WORK(IU) is LDA by N and WORK(IR) is M by M
*
                     ldwrku = lda
                     chunk = n
                     ldwrkr = m
                  ELSE
*
*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
*
                     ldwrku = m
                     chunk = ( lwork-m*m ) / m
                     ldwrkr = m
                  END IF
                  itau = ir + ldwrkr*m
                  iwork = itau + m
*
*                 Compute A=L*Q
*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cgelqf( m, n, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Copy L to U, zeroing about above it
*
                  CALL clacpy( 'L', m, m, a, lda, u, ldu )
                  CALL claset( 'U', m-1, m-1, czero, czero, u( 1,
     $                         2 ),
     $                         ldu )
*
*                 Generate Q in A
*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cunglq( m, n, m, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  ie = 1
                  itauq = itau
                  itaup = itauq + m
                  iwork = itaup + m
*
*                 Bidiagonalize L in U, copying result to WORK(IR)
*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
*                 (RWorkspace: need M)
*
                  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
     $                         work( itauq ), work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  CALL clacpy( 'U', m, m, u, ldu, work( ir ),
     $                         ldwrkr )
*
*                 Generate right vectors bidiagonalizing L in WORK(IR)
*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
     $                         work( itaup ), work( iwork ),
     $                         lwork-iwork+1, ierr )
*
*                 Generate left vectors bidiagonalizing L in U
*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  irwork = ie + m
*
*                 Perform bidiagonal QR iteration, computing left
*                 singular vectors of L in U, and computing right
*                 singular vectors of L in WORK(IR)
*                 (CWorkspace: need M*M)
*                 (RWorkspace: need BDSPAC)
*
                  CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
     $                         work( ir ), ldwrkr, u, ldu, cdum, 1,
     $                         rwork( irwork ), info )
                  iu = itauq
*
*                 Multiply right singular vectors of L in WORK(IR) by Q
*                 in A, storing result in WORK(IU) and copying to A
*                 (CWorkspace: need M*M+M, prefer M*M+M*N))
*                 (RWorkspace: 0)
*
                  DO 40 i = 1, n, chunk
                     blk = min( n-i+1, chunk )
                     CALL cgemm( 'N', 'N', m, blk, m, cone,
     $                           work( ir ),
     $                           ldwrkr, a( 1, i ), lda, czero,
     $                           work( iu ), ldwrku )
                     CALL clacpy( 'F', m, blk, work( iu ), ldwrku,
     $                            a( 1, i ), lda )
   40             CONTINUE
*
               ELSE
*
*                 Insufficient workspace for a fast algorithm
*
                  itau = 1
                  iwork = itau + m
*
*                 Compute A=L*Q
*                 (CWorkspace: need 2*M, prefer M+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cgelqf( m, n, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Copy L to U, zeroing out above it
*
                  CALL clacpy( 'L', m, m, a, lda, u, ldu )
                  CALL claset( 'U', m-1, m-1, czero, czero, u( 1,
     $                         2 ),
     $                         ldu )
*
*                 Generate Q in A
*                 (CWorkspace: need 2*M, prefer M+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cunglq( m, n, m, a, lda, work( itau ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  ie = 1
                  itauq = itau
                  itaup = itauq + m
                  iwork = itaup + m
*
*                 Bidiagonalize L in U
*                 (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
*                 (RWorkspace: need M)
*
                  CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
     $                         work( itauq ), work( itaup ),
     $                         work( iwork ), lwork-iwork+1, ierr )
*
*                 Multiply right vectors bidiagonalizing L by Q in A
*                 (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
*                 (RWorkspace: 0)
*
                  CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
     $                         work( itaup ), a, lda, work( iwork ),
     $                         lwork-iwork+1, ierr )
*
*                 Generate left vectors bidiagonalizing L in U
*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
*                 (RWorkspace: 0)
*
                  CALL cungbr( 'Q', m, m, m, u, ldu, work( itauq ),
     $                         work( iwork ), lwork-iwork+1, ierr )
                  irwork = ie + m
*
*                 Perform bidiagonal QR iteration, computing left
*                 singular vectors of A in U and computing right
*                 singular vectors of A in A
*                 (CWorkspace: 0)
*                 (RWorkspace: need BDSPAC)
*
                  CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a,
     $                         lda,
     $                         u, ldu, cdum, 1, rwork( irwork ), info )
*
               END IF
*
            ELSE IF( wntvs ) THEN
*
               IF( wntun ) THEN
*
*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
*                 M right singular vectors to be computed in VT and
*                 no left singular vectors to be computed
*
                  IF( lwork.GE.m*m+3*m ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     ir = 1
                     IF( lwork.GE.wrkbl+lda*m ) THEN
*
*                       WORK(IR) is LDA by M
*
                        ldwrkr = lda
                     ELSE
*
*                       WORK(IR) is M by M
*
                        ldwrkr = m
                     END IF
                     itau = ir + ldwrkr*m
                     iwork = itau + m
*
*                    Compute A=L*Q
*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy L to WORK(IR), zeroing out above it
*
                     CALL clacpy( 'L', m, m, a, lda, work( ir ),
     $                            ldwrkr )
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            work( ir+ldwrkr ), ldwrkr )
*
*                    Generate Q in A
*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( m, n, m, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Bidiagonalize L in WORK(IR)
*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
*                    (RWorkspace: need M)
*
                     CALL cgebrd( m, m, work( ir ), ldwrkr, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate right vectors bidiagonalizing L in
*                    WORK(IR)
*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing right
*                    singular vectors of L in WORK(IR)
*                    (CWorkspace: need M*M)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
     $                            work( ir ), ldwrkr, cdum, 1, cdum, 1,
     $                            rwork( irwork ), info )
*
*                    Multiply right singular vectors of L in WORK(IR) by
*                    Q in A, storing result in VT
*                    (CWorkspace: need M*M)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
     $                           ldwrkr, a, lda, czero, vt, ldvt )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + m
*
*                    Compute A=L*Q
*                    (CWorkspace: need 2*M, prefer M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy result to VT
*
                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
*
*                    Generate Q in VT
*                    (CWorkspace: need 2*M, prefer M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( m, n, m, vt, ldvt, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Zero out above L in A
*
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            a( 1, 2 ), lda )
*
*                    Bidiagonalize L in A
*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
*                    (RWorkspace: need M)
*
                     CALL cgebrd( m, m, a, lda, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply right vectors bidiagonalizing L by Q in VT
*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
     $                            work( itaup ), vt, ldvt,
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing right
*                    singular vectors of A in VT
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ),
     $                            vt,
     $                            ldvt, cdum, 1, cdum, 1,
     $                            rwork( irwork ), info )
*
                  END IF
*
               ELSE IF( wntuo ) THEN
*
*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
*                 M right singular vectors to be computed in VT and
*                 M left singular vectors to be overwritten on A
*
                  IF( lwork.GE.2*m*m+3*m ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     iu = 1
                     IF( lwork.GE.wrkbl+2*lda*m ) THEN
*
*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
*
                        ldwrku = lda
                        ir = iu + ldwrku*m
                        ldwrkr = lda
                     ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
*
*                       WORK(IU) is LDA by M and WORK(IR) is M by M
*
                        ldwrku = lda
                        ir = iu + ldwrku*m
                        ldwrkr = m
                     ELSE
*
*                       WORK(IU) is M by M and WORK(IR) is M by M
*
                        ldwrku = m
                        ir = iu + ldwrku*m
                        ldwrkr = m
                     END IF
                     itau = ir + ldwrkr*m
                     iwork = itau + m
*
*                    Compute A=L*Q
*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy L to WORK(IU), zeroing out below it
*
                     CALL clacpy( 'L', m, m, a, lda, work( iu ),
     $                            ldwrku )
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            work( iu+ldwrku ), ldwrku )
*
*                    Generate Q in A
*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( m, n, m, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Bidiagonalize L in WORK(IU), copying result to
*                    WORK(IR)
*                    (CWorkspace: need   2*M*M+3*M,
*                                 prefer 2*M*M+2*M+2*M*NB)
*                    (RWorkspace: need   M)
*
                     CALL cgebrd( m, m, work( iu ), ldwrku, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, m, work( iu ), ldwrku,
     $                            work( ir ), ldwrkr )
*
*                    Generate right bidiagonalizing vectors in WORK(IU)
*                    (CWorkspace: need   2*M*M+3*M-1,
*                                 prefer 2*M*M+2*M+(M-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate left bidiagonalizing vectors in WORK(IR)
*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
     $                            work( itauq ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of L in WORK(IR) and computing
*                    right singular vectors of L in WORK(IU)
*                    (CWorkspace: need 2*M*M)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
     $                            work( iu ), ldwrku, work( ir ),
     $                            ldwrkr, cdum, 1, rwork( irwork ),
     $                            info )
*
*                    Multiply right singular vectors of L in WORK(IU) by
*                    Q in A, storing result in VT
*                    (CWorkspace: need M*M)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
     $                           ldwrku, a, lda, czero, vt, ldvt )
*
*                    Copy left singular vectors of L to A
*                    (CWorkspace: need M*M)
*                    (RWorkspace: 0)
*
                     CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
     $                            lda )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + m
*
*                    Compute A=L*Q, copying result to VT
*                    (CWorkspace: need 2*M, prefer M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
*
*                    Generate Q in VT
*                    (CWorkspace: need 2*M, prefer M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( m, n, m, vt, ldvt, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Zero out above L in A
*
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            a( 1, 2 ), lda )
*
*                    Bidiagonalize L in A
*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
*                    (RWorkspace: need M)
*
                     CALL cgebrd( m, m, a, lda, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply right vectors bidiagonalizing L by Q in VT
*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
     $                            work( itaup ), vt, ldvt,
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Generate left bidiagonalizing vectors of L in A
*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', m, m, m, a, lda,
     $                            work( itauq ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of A in A and computing right
*                    singular vectors of A in VT
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ),
     $                            vt,
     $                            ldvt, a, lda, cdum, 1,
     $                            rwork( irwork ), info )
*
                  END IF
*
               ELSE IF( wntuas ) THEN
*
*                 Path 6t(N much larger than M, JOBU='S' or 'A',
*                         JOBVT='S')
*                 M right singular vectors to be computed in VT and
*                 M left singular vectors to be computed in U
*
                  IF( lwork.GE.m*m+3*m ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     iu = 1
                     IF( lwork.GE.wrkbl+lda*m ) THEN
*
*                       WORK(IU) is LDA by N
*
                        ldwrku = lda
                     ELSE
*
*                       WORK(IU) is LDA by M
*
                        ldwrku = m
                     END IF
                     itau = iu + ldwrku*m
                     iwork = itau + m
*
*                    Compute A=L*Q
*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy L to WORK(IU), zeroing out above it
*
                     CALL clacpy( 'L', m, m, a, lda, work( iu ),
     $                            ldwrku )
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            work( iu+ldwrku ), ldwrku )
*
*                    Generate Q in A
*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( m, n, m, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Bidiagonalize L in WORK(IU), copying result to U
*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
*                    (RWorkspace: need M)
*
                     CALL cgebrd( m, m, work( iu ), ldwrku, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
     $                            ldu )
*
*                    Generate right bidiagonalizing vectors in WORK(IU)
*                    (CWorkspace: need   M*M+3*M-1,
*                                 prefer M*M+2*M+(M-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate left bidiagonalizing vectors in U
*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', m, m, m, u, ldu,
     $                            work( itauq ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of L in U and computing right
*                    singular vectors of L in WORK(IU)
*                    (CWorkspace: need M*M)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
     $                            work( iu ), ldwrku, u, ldu, cdum, 1,
     $                            rwork( irwork ), info )
*
*                    Multiply right singular vectors of L in WORK(IU) by
*                    Q in A, storing result in VT
*                    (CWorkspace: need M*M)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
     $                           ldwrku, a, lda, czero, vt, ldvt )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + m
*
*                    Compute A=L*Q, copying result to VT
*                    (CWorkspace: need 2*M, prefer M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
*
*                    Generate Q in VT
*                    (CWorkspace: need 2*M, prefer M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( m, n, m, vt, ldvt, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy L to U, zeroing out above it
*
                     CALL clacpy( 'L', m, m, a, lda, u, ldu )
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            u( 1, 2 ), ldu )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Bidiagonalize L in U
*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
*                    (RWorkspace: need M)
*
                     CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply right bidiagonalizing vectors in U by Q
*                    in VT
*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
     $                            work( itaup ), vt, ldvt,
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Generate left bidiagonalizing vectors in U
*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', m, m, m, u, ldu,
     $                            work( itauq ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of A in U and computing right
*                    singular vectors of A in VT
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ),
     $                            vt,
     $                            ldvt, u, ldu, cdum, 1,
     $                            rwork( irwork ), info )
*
                  END IF
*
               END IF
*
            ELSE IF( wntva ) THEN
*
               IF( wntun ) THEN
*
*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
*                 N right singular vectors to be computed in VT and
*                 no left singular vectors to be computed
*
                  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     ir = 1
                     IF( lwork.GE.wrkbl+lda*m ) THEN
*
*                       WORK(IR) is LDA by M
*
                        ldwrkr = lda
                     ELSE
*
*                       WORK(IR) is M by M
*
                        ldwrkr = m
                     END IF
                     itau = ir + ldwrkr*m
                     iwork = itau + m
*
*                    Compute A=L*Q, copying result to VT
*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
*
*                    Copy L to WORK(IR), zeroing out above it
*
                     CALL clacpy( 'L', m, m, a, lda, work( ir ),
     $                            ldwrkr )
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            work( ir+ldwrkr ), ldwrkr )
*
*                    Generate Q in VT
*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Bidiagonalize L in WORK(IR)
*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
*                    (RWorkspace: need M)
*
                     CALL cgebrd( m, m, work( ir ), ldwrkr, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate right bidiagonalizing vectors in WORK(IR)
*                    (CWorkspace: need   M*M+3*M-1,
*                                 prefer M*M+2*M+(M-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', m, m, m, work( ir ), ldwrkr,
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing right
*                    singular vectors of L in WORK(IR)
*                    (CWorkspace: need M*M)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
     $                            work( ir ), ldwrkr, cdum, 1, cdum, 1,
     $                            rwork( irwork ), info )
*
*                    Multiply right singular vectors of L in WORK(IR) by
*                    Q in VT, storing result in A
*                    (CWorkspace: need M*M)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, m, cone, work( ir ),
     $                           ldwrkr, vt, ldvt, czero, a, lda )
*
*                    Copy right singular vectors of A from A to VT
*
                     CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + m
*
*                    Compute A=L*Q, copying result to VT
*                    (CWorkspace: need 2*M, prefer M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
*
*                    Generate Q in VT
*                    (CWorkspace: need M+N, prefer M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Zero out above L in A
*
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            a( 1, 2 ), lda )
*
*                    Bidiagonalize L in A
*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
*                    (RWorkspace: need M)
*
                     CALL cgebrd( m, m, a, lda, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply right bidiagonalizing vectors in A by Q
*                    in VT
*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
     $                            work( itaup ), vt, ldvt,
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing right
*                    singular vectors of A in VT
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, n, 0, 0, s, rwork( ie ),
     $                            vt,
     $                            ldvt, cdum, 1, cdum, 1,
     $                            rwork( irwork ), info )
*
                  END IF
*
               ELSE IF( wntuo ) THEN
*
*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
*                 N right singular vectors to be computed in VT and
*                 M left singular vectors to be overwritten on A
*
                  IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     iu = 1
                     IF( lwork.GE.wrkbl+2*lda*m ) THEN
*
*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
*
                        ldwrku = lda
                        ir = iu + ldwrku*m
                        ldwrkr = lda
                     ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
*
*                       WORK(IU) is LDA by M and WORK(IR) is M by M
*
                        ldwrku = lda
                        ir = iu + ldwrku*m
                        ldwrkr = m
                     ELSE
*
*                       WORK(IU) is M by M and WORK(IR) is M by M
*
                        ldwrku = m
                        ir = iu + ldwrku*m
                        ldwrkr = m
                     END IF
                     itau = ir + ldwrkr*m
                     iwork = itau + m
*
*                    Compute A=L*Q, copying result to VT
*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
*
*                    Generate Q in VT
*                    (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy L to WORK(IU), zeroing out above it
*
                     CALL clacpy( 'L', m, m, a, lda, work( iu ),
     $                            ldwrku )
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            work( iu+ldwrku ), ldwrku )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Bidiagonalize L in WORK(IU), copying result to
*                    WORK(IR)
*                    (CWorkspace: need   2*M*M+3*M,
*                                 prefer 2*M*M+2*M+2*M*NB)
*                    (RWorkspace: need   M)
*
                     CALL cgebrd( m, m, work( iu ), ldwrku, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, m, work( iu ), ldwrku,
     $                            work( ir ), ldwrkr )
*
*                    Generate right bidiagonalizing vectors in WORK(IU)
*                    (CWorkspace: need   2*M*M+3*M-1,
*                                 prefer 2*M*M+2*M+(M-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate left bidiagonalizing vectors in WORK(IR)
*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', m, m, m, work( ir ), ldwrkr,
     $                            work( itauq ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of L in WORK(IR) and computing
*                    right singular vectors of L in WORK(IU)
*                    (CWorkspace: need 2*M*M)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
     $                            work( iu ), ldwrku, work( ir ),
     $                            ldwrkr, cdum, 1, rwork( irwork ),
     $                            info )
*
*                    Multiply right singular vectors of L in WORK(IU) by
*                    Q in VT, storing result in A
*                    (CWorkspace: need M*M)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
     $                           ldwrku, vt, ldvt, czero, a, lda )
*
*                    Copy right singular vectors of A from A to VT
*
                     CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
*
*                    Copy left singular vectors of A from WORK(IR) to A
*
                     CALL clacpy( 'F', m, m, work( ir ), ldwrkr, a,
     $                            lda )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + m
*
*                    Compute A=L*Q, copying result to VT
*                    (CWorkspace: need 2*M, prefer M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
*
*                    Generate Q in VT
*                    (CWorkspace: need M+N, prefer M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Zero out above L in A
*
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            a( 1, 2 ), lda )
*
*                    Bidiagonalize L in A
*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
*                    (RWorkspace: need M)
*
                     CALL cgebrd( m, m, a, lda, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply right bidiagonalizing vectors in A by Q
*                    in VT
*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'P', 'L', 'C', m, n, m, a, lda,
     $                            work( itaup ), vt, ldvt,
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Generate left bidiagonalizing vectors in A
*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', m, m, m, a, lda,
     $                            work( itauq ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of A in A and computing right
*                    singular vectors of A in VT
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ),
     $                            vt,
     $                            ldvt, a, lda, cdum, 1,
     $                            rwork( irwork ), info )
*
                  END IF
*
               ELSE IF( wntuas ) THEN
*
*                 Path 9t(N much larger than M, JOBU='S' or 'A',
*                         JOBVT='A')
*                 N right singular vectors to be computed in VT and
*                 M left singular vectors to be computed in U
*
                  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
*
*                    Sufficient workspace for a fast algorithm
*
                     iu = 1
                     IF( lwork.GE.wrkbl+lda*m ) THEN
*
*                       WORK(IU) is LDA by M
*
                        ldwrku = lda
                     ELSE
*
*                       WORK(IU) is M by M
*
                        ldwrku = m
                     END IF
                     itau = iu + ldwrku*m
                     iwork = itau + m
*
*                    Compute A=L*Q, copying result to VT
*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
*
*                    Generate Q in VT
*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy L to WORK(IU), zeroing out above it
*
                     CALL clacpy( 'L', m, m, a, lda, work( iu ),
     $                            ldwrku )
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            work( iu+ldwrku ), ldwrku )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Bidiagonalize L in WORK(IU), copying result to U
*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
*                    (RWorkspace: need M)
*
                     CALL cgebrd( m, m, work( iu ), ldwrku, s,
     $                            rwork( ie ), work( itauq ),
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
                     CALL clacpy( 'L', m, m, work( iu ), ldwrku, u,
     $                            ldu )
*
*                    Generate right bidiagonalizing vectors in WORK(IU)
*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'P', m, m, m, work( iu ), ldwrku,
     $                            work( itaup ), work( iwork ),
     $                            lwork-iwork+1, ierr )
*
*                    Generate left bidiagonalizing vectors in U
*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', m, m, m, u, ldu,
     $                            work( itauq ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of L in U and computing right
*                    singular vectors of L in WORK(IU)
*                    (CWorkspace: need M*M)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
     $                            work( iu ), ldwrku, u, ldu, cdum, 1,
     $                            rwork( irwork ), info )
*
*                    Multiply right singular vectors of L in WORK(IU) by
*                    Q in VT, storing result in A
*                    (CWorkspace: need M*M)
*                    (RWorkspace: 0)
*
                     CALL cgemm( 'N', 'N', m, n, m, cone, work( iu ),
     $                           ldwrku, vt, ldvt, czero, a, lda )
*
*                    Copy right singular vectors of A from A to VT
*
                     CALL clacpy( 'F', m, n, a, lda, vt, ldvt )
*
                  ELSE
*
*                    Insufficient workspace for a fast algorithm
*
                     itau = 1
                     iwork = itau + m
*
*                    Compute A=L*Q, copying result to VT
*                    (CWorkspace: need 2*M, prefer M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cgelqf( m, n, a, lda, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
*
*                    Generate Q in VT
*                    (CWorkspace: need M+N, prefer M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunglq( n, n, m, vt, ldvt, work( itau ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Copy L to U, zeroing out above it
*
                     CALL clacpy( 'L', m, m, a, lda, u, ldu )
                     CALL claset( 'U', m-1, m-1, czero, czero,
     $                            u( 1, 2 ), ldu )
                     ie = 1
                     itauq = itau
                     itaup = itauq + m
                     iwork = itaup + m
*
*                    Bidiagonalize L in U
*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
*                    (RWorkspace: need M)
*
                     CALL cgebrd( m, m, u, ldu, s, rwork( ie ),
     $                            work( itauq ), work( itaup ),
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Multiply right bidiagonalizing vectors in U by Q
*                    in VT
*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
*                    (RWorkspace: 0)
*
                     CALL cunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
     $                            work( itaup ), vt, ldvt,
     $                            work( iwork ), lwork-iwork+1, ierr )
*
*                    Generate left bidiagonalizing vectors in U
*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
*                    (RWorkspace: 0)
*
                     CALL cungbr( 'Q', m, m, m, u, ldu,
     $                            work( itauq ),
     $                            work( iwork ), lwork-iwork+1, ierr )
                     irwork = ie + m
*
*                    Perform bidiagonal QR iteration, computing left
*                    singular vectors of A in U and computing right
*                    singular vectors of A in VT
*                    (CWorkspace: 0)
*                    (RWorkspace: need BDSPAC)
*
                     CALL cbdsqr( 'U', m, n, m, 0, s, rwork( ie ),
     $                            vt,
     $                            ldvt, u, ldu, cdum, 1,
     $                            rwork( irwork ), info )
*
                  END IF
*
               END IF
*
            END IF
*
         ELSE
*
*           N .LT. MNTHR
*
*           Path 10t(N greater than M, but not much larger)
*           Reduce to bidiagonal form without LQ decomposition
*
            ie = 1
            itauq = 1
            itaup = itauq + m
            iwork = itaup + m
*
*           Bidiagonalize A
*           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
*           (RWorkspace: M)
*
            CALL cgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
     $                   work( itaup ), work( iwork ), lwork-iwork+1,
     $                   ierr )
            IF( wntuas ) THEN
*
*              If left singular vectors desired in U, copy result to U
*              and generate left bidiagonalizing vectors in U
*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
*              (RWorkspace: 0)
*
               CALL clacpy( 'L', m, m, a, lda, u, ldu )
               CALL cungbr( 'Q', m, m, n, u, ldu, work( itauq ),
     $                      work( iwork ), lwork-iwork+1, ierr )
            END IF
            IF( wntvas ) THEN
*
*              If right singular vectors desired in VT, copy result to
*              VT and generate right bidiagonalizing vectors in VT
*              (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
*              (RWorkspace: 0)
*
               CALL clacpy( 'U', m, n, a, lda, vt, ldvt )
               IF( wntva )
     $            nrvt = n
               IF( wntvs )
     $            nrvt = m
               CALL cungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
     $                      work( iwork ), lwork-iwork+1, ierr )
            END IF
            IF( wntuo ) THEN
*
*              If left singular vectors desired in A, generate left
*              bidiagonalizing vectors in A
*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
*              (RWorkspace: 0)
*
               CALL cungbr( 'Q', m, m, n, a, lda, work( itauq ),
     $                      work( iwork ), lwork-iwork+1, ierr )
            END IF
            IF( wntvo ) THEN
*
*              If right singular vectors desired in A, generate right
*              bidiagonalizing vectors in A
*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
*              (RWorkspace: 0)
*
               CALL cungbr( 'P', m, n, m, a, lda, work( itaup ),
     $                      work( iwork ), lwork-iwork+1, ierr )
            END IF
            irwork = ie + m
            IF( wntuas .OR. wntuo )
     $         nru = m
            IF( wntun )
     $         nru = 0
            IF( wntvas .OR. wntvo )
     $         ncvt = n
            IF( wntvn )
     $         ncvt = 0
            IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
*
*              Perform bidiagonal QR iteration, if desired, computing
*              left singular vectors in U and computing right singular
*              vectors in VT
*              (CWorkspace: 0)
*              (RWorkspace: need BDSPAC)
*
               CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
     $                      ldvt, u, ldu, cdum, 1, rwork( irwork ),
     $                      info )
            ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
*
*              Perform bidiagonal QR iteration, if desired, computing
*              left singular vectors in U and computing right singular
*              vectors in A
*              (CWorkspace: 0)
*              (RWorkspace: need BDSPAC)
*
               CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
     $                      lda, u, ldu, cdum, 1, rwork( irwork ),
     $                      info )
            ELSE
*
*              Perform bidiagonal QR iteration, if desired, computing
*              left singular vectors in A and computing right singular
*              vectors in VT
*              (CWorkspace: 0)
*              (RWorkspace: need BDSPAC)
*
               CALL cbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
     $                      ldvt, a, lda, cdum, 1, rwork( irwork ),
     $                      info )
            END IF
*
         END IF
*
      END IF
*
*     Undo scaling if necessary
*
      IF( iscl.EQ.1 ) THEN
         IF( anrm.GT.bignum )
     $      CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
     $                   ierr )
         IF( info.NE.0 .AND. anrm.GT.bignum )
     $      CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
     $                   rwork( ie ), minmn, ierr )
         IF( anrm.LT.smlnum )
     $      CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
     $                   ierr )
         IF( info.NE.0 .AND. anrm.LT.smlnum )
     $      CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
     $                   rwork( ie ), minmn, ierr )
      END IF
*
*     Return optimal workspace in WORK(1)
*
      work( 1 ) = sroundup_lwork(maxwrk)
*
      RETURN
*
*     End of CGESVD
*

Here is the call graph for this function:

Here is the caller graph for this function: