subroutine zgesvd	(	character	JOBU,
		character	JOBVT,
		integer	M,
		integer	N,
		complex16, dimension( lda, )	A,
		integer	LDA,
		double precision, dimension( * )	S,
		complex16, dimension( ldu, )	U,
		integer	LDU,
		complex16, dimension( ldvt, )	VT,
		integer	LDVT,
		complex16, dimension( )	WORK,
		integer	LWORK,
		double precision, dimension( * )	RWORK,
		integer	INFO
	)

ZGESVD computes the singular value decomposition (SVD) for GE matrices

Download ZGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:

 ZGESVD computes the singular value decomposition (SVD) of a complex
 M-by-N matrix A, optionally computing the left and/or right singular
 vectors. The SVD is written

      A = U * SIGMA * conjugate-transpose(V)

 where SIGMA is an M-by-N matrix which is zero except for its
 min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
 V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
 are the singular values of A; they are real and non-negative, and
 are returned in descending order.  The first min(m,n) columns of
 U and V are the left and right singular vectors of A.

 Note that the routine returns V**H, not V.

Parameters

[in]	JOBU	JOBU is CHARACTER*1 Specifies options for computing all or part of the matrix U: = 'A': all M columns of U are returned in array U: = 'S': the first min(m,n) columns of U (the left singular vectors) are returned in the array U; = 'O': the first min(m,n) columns of U (the left singular vectors) are overwritten on the array A; = 'N': no columns of U (no left singular vectors) are computed.
[in]	JOBVT	JOBVT is CHARACTER1 Specifies options for computing all or part of the matrix VH: = 'A': all N rows of VH are returned in the array VT; = 'S': the first min(m,n) rows of VH (the right singular vectors) are returned in the array VT; = 'O': the first min(m,n) rows of VH (the right singular vectors) are overwritten on the array A; = 'N': no rows of V*H (no right singular vectors) are computed. JOBVT and JOBU cannot both be 'O'.
[in]	M	M is INTEGER The number of rows of the input matrix A. M >= 0.
[in]	N	N is INTEGER The number of columns of the input matrix A. N >= 0.
[in,out]	A	A is COMPLEX16 array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, if JOBU = 'O', A is overwritten with the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBVT = 'O', A is overwritten with the first min(m,n) rows of V*H (the right singular vectors, stored rowwise); if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A are destroyed.
[in]	LDA	LDA is INTEGER The leading dimension of the array A. LDA >= max(1,M).
[out]	S	S is DOUBLE PRECISION array, dimension (min(M,N)) The singular values of A, sorted so that S(i) >= S(i+1).
[out]	U	U is COMPLEX*16 array, dimension (LDU,UCOL) (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'. If JOBU = 'A', U contains the M-by-M unitary matrix U; if JOBU = 'S', U contains the first min(m,n) columns of U (the left singular vectors, stored columnwise); if JOBU = 'N' or 'O', U is not referenced.
[in]	LDU	LDU is INTEGER The leading dimension of the array U. LDU >= 1; if JOBU = 'S' or 'A', LDU >= M.
[out]	VT	VT is COMPLEX16 array, dimension (LDVT,N) If JOBVT = 'A', VT contains the N-by-N unitary matrix VH; if JOBVT = 'S', VT contains the first min(m,n) rows of V*H (the right singular vectors, stored rowwise); if JOBVT = 'N' or 'O', VT is not referenced.
[in]	LDVT	LDVT is INTEGER The leading dimension of the array VT. LDVT >= 1; if JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[out]	WORK	WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]	LWORK	LWORK is INTEGER The dimension of the array WORK. LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)). For good performance, LWORK should generally be larger. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA.
[out]	RWORK	RWORK is DOUBLE PRECISION array, dimension (5min(M,N)) On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the unconverged superdiagonal elements of an upper bidiagonal matrix B whose diagonal is in S (not necessarily sorted). B satisfies A = U B * VT, so it has the same singular values as A, and singular vectors related by U and VT.
[out]	INFO	INFO is INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if ZBDSQR did not converge, INFO specifies how many superdiagonals of an intermediate bidiagonal form B did not converge to zero. See the description of RWORK above for details.

Author: Univ. of Tennessee; Univ. of California Berkeley; Univ. of Colorado Denver; NAG Ltd.

Date: April 2012

Definition at line 216 of file zgesvd.f.

 *
 *  -- LAPACK driver routine (version 3.6.1) --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *     April 2012
 *
 *     .. Scalar Arguments ..
       CHARACTER          jobu, jobvt
       INTEGER            info, lda, ldu, ldvt, lwork, m, n
 *     ..
 *     .. Array Arguments ..
       DOUBLE PRECISION   rwork( * ), s( * )
       COMPLEX*16         a( lda, * ), u( ldu, * ), vt( ldvt, * ),
      $                   work( * )
 *     ..
 *
 *  =====================================================================
 *
 *     .. Parameters ..
       COMPLEX*16         czero, cone
       parameter                ( czero = ( 0.0d0, 0.0d0 ),
      $                   cone = ( 1.0d0, 0.0d0 ) )
       DOUBLE PRECISION   zero, one
       parameter                ( zero = 0.0d0, one = 1.0d0 )
 *     ..
 *     .. Local Scalars ..
       LOGICAL            lquery, wntua, wntuas, wntun, wntuo, wntus,
      $                   wntva, wntvas, wntvn, wntvo, wntvs
       INTEGER            blk, chunk, i, ie, ierr, ir, irwork, iscl,
      $                   itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
      $                   maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
      $                   nrvt, wrkbl
       INTEGER            lwork_zgeqrf, lwork_zungqr_n, lwork_zungqr_m,
      $                   lwork_zgebrd, lwork_zungbr_p, lwork_zungbr_q,
      $                   lwork_zgelqf, lwork_zunglq_n, lwork_zunglq_m
       DOUBLE PRECISION   anrm, bignum, eps, smlnum
 *     ..
 *     .. Local Arrays ..
       DOUBLE PRECISION   dum( 1 )
       COMPLEX*16         cdum( 1 )
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           dlascl, xerbla, zbdsqr, zgebrd, zgelqf, zgemm,
      $                   zgeqrf, zlacpy, zlascl, zlaset, zungbr, zunglq,
      $                   zungqr, zunmbr
 *     ..
 *     .. External Functions ..
       LOGICAL            lsame
       INTEGER            ilaenv
       DOUBLE PRECISION   dlamch, zlange
       EXTERNAL           lsame, ilaenv, dlamch, zlange
 *     ..
 *     .. Intrinsic Functions ..
       INTRINSIC          max, min, sqrt
 *     ..
 *     .. Executable Statements ..
 *
 *     Test the input arguments
 *
       info = 0
       minmn = min( m, n )
       wntua = lsame( jobu, 'A' )
       wntus = lsame( jobu, 'S' )
       wntuas = wntua .OR. wntus
       wntuo = lsame( jobu, 'O' )
       wntun = lsame( jobu, 'N' )
       wntva = lsame( jobvt, 'A' )
       wntvs = lsame( jobvt, 'S' )
       wntvas = wntva .OR. wntvs
       wntvo = lsame( jobvt, 'O' )
       wntvn = lsame( jobvt, 'N' )
       lquery = ( lwork.EQ.-1 )
 *
       IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
          info = -1
       ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
      $         ( wntvo .AND. wntuo ) ) THEN
          info = -2
       ELSE IF( m.LT.0 ) THEN
          info = -3
       ELSE IF( n.LT.0 ) THEN
          info = -4
       ELSE IF( lda.LT.max( 1, m ) ) THEN
          info = -6
       ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
          info = -9
       ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
      $         ( wntvs .AND. ldvt.LT.minmn ) ) THEN
          info = -11
       END IF
 *
 *     Compute workspace
 *      (Note: Comments in the code beginning "Workspace:" describe the
 *       minimal amount of workspace needed at that point in the code,
 *       as well as the preferred amount for good performance.
 *       CWorkspace refers to complex workspace, and RWorkspace to
 *       real workspace. NB refers to the optimal block size for the
 *       immediately following subroutine, as returned by ILAENV.)
 *
       IF( info.EQ.0 ) THEN
          minwrk = 1
          maxwrk = 1
          IF( m.GE.n .AND. minmn.GT.0 ) THEN
 *
 *           Space needed for ZBDSQR is BDSPAC = 5*N
 *
             mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
 *           Compute space needed for ZGEQRF
             CALL zgeqrf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
             lwork_zgeqrf = int( cdum(1) )
 *           Compute space needed for ZUNGQR
             CALL zungqr( m, n, n, a, lda, cdum(1), cdum(1), -1, ierr )
             lwork_zungqr_n = int( cdum(1) )
             CALL zungqr( m, m, n, a, lda, cdum(1), cdum(1), -1, ierr )
             lwork_zungqr_m = int( cdum(1) )
 *           Compute space needed for ZGEBRD
             CALL zgebrd( n, n, a, lda, s, dum(1), cdum(1),
      $                   cdum(1), cdum(1), -1, ierr )
             lwork_zgebrd = int( cdum(1) )
 *           Compute space needed for ZUNGBR
             CALL zungbr( 'P', n, n, n, a, lda, cdum(1),
      $                   cdum(1), -1, ierr )
             lwork_zungbr_p = int( cdum(1) )
             CALL zungbr( 'Q', n, n, n, a, lda, cdum(1),
      $                   cdum(1), -1, ierr )
             lwork_zungbr_q = int( cdum(1) )
 *
             IF( m.GE.mnthr ) THEN
                IF( wntun ) THEN
 *
 *                 Path 1 (M much larger than N, JOBU='N')
 *
                   maxwrk = n + lwork_zgeqrf
                   maxwrk = max( maxwrk, 2*n+lwork_zgebrd )
                   IF( wntvo .OR. wntvas )
      $               maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
                   minwrk = 3*n
                ELSE IF( wntuo .AND. wntvn ) THEN
 *
 *                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
 *
                   wrkbl = n + lwork_zgeqrf
                   wrkbl = max( wrkbl, n+lwork_zungqr_n )
                   wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
                   maxwrk = max( n*n+wrkbl, n*n+m*n )
                   minwrk = 2*n + m
                ELSE IF( wntuo .AND. wntvas ) THEN
 *
 *                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
 *                 'A')
 *
                   wrkbl = n + lwork_zgeqrf
                   wrkbl = max( wrkbl, n+lwork_zungqr_n )
                   wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
                   maxwrk = max( n*n+wrkbl, n*n+m*n )
                   minwrk = 2*n + m
                ELSE IF( wntus .AND. wntvn ) THEN
 *
 *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
 *
                   wrkbl = n + lwork_zgeqrf
                   wrkbl = max( wrkbl, n+lwork_zungqr_n )
                   wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
                   maxwrk = n*n + wrkbl
                   minwrk = 2*n + m
                ELSE IF( wntus .AND. wntvo ) THEN
 *
 *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
 *
                   wrkbl = n + lwork_zgeqrf
                   wrkbl = max( wrkbl, n+lwork_zungqr_n )
                   wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
                   maxwrk = 2*n*n + wrkbl
                   minwrk = 2*n + m
                ELSE IF( wntus .AND. wntvas ) THEN
 *
 *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
 *                 'A')
 *
                   wrkbl = n + lwork_zgeqrf
                   wrkbl = max( wrkbl, n+lwork_zungqr_n )
                   wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
                   maxwrk = n*n + wrkbl
                   minwrk = 2*n + m
                ELSE IF( wntua .AND. wntvn ) THEN
 *
 *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
 *
                   wrkbl = n + lwork_zgeqrf
                   wrkbl = max( wrkbl, n+lwork_zungqr_m )
                   wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
                   maxwrk = n*n + wrkbl
                   minwrk = 2*n + m
                ELSE IF( wntua .AND. wntvo ) THEN
 *
 *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
 *
                   wrkbl = n + lwork_zgeqrf
                   wrkbl = max( wrkbl, n+lwork_zungqr_m )
                   wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
                   maxwrk = 2*n*n + wrkbl
                   minwrk = 2*n + m
                ELSE IF( wntua .AND. wntvas ) THEN
 *
 *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
 *                 'A')
 *
                   wrkbl = n + lwork_zgeqrf
                   wrkbl = max( wrkbl, n+lwork_zungqr_m )
                   wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
                   wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
                   maxwrk = n*n + wrkbl
                   minwrk = 2*n + m
                END IF
             ELSE
 *
 *              Path 10 (M at least N, but not much larger)
 *
                CALL zgebrd( m, n, a, lda, s, dum(1), cdum(1),
      $                   cdum(1), cdum(1), -1, ierr )
                lwork_zgebrd = int( cdum(1) )
                maxwrk = 2*n + lwork_zgebrd
                IF( wntus .OR. wntuo ) THEN
                   CALL zungbr( 'Q', m, n, n, a, lda, cdum(1),
      $                   cdum(1), -1, ierr )
                   lwork_zungbr_q = int( cdum(1) )
                   maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
                END IF
                IF( wntua ) THEN
                   CALL zungbr( 'Q', m, m, n, a, lda, cdum(1),
      $                   cdum(1), -1, ierr )
                   lwork_zungbr_q = int( cdum(1) )
                   maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
                END IF
                IF( .NOT.wntvn ) THEN
                   maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
                END IF
                minwrk = 2*n + m
             END IF
          ELSE IF( minmn.GT.0 ) THEN
 *
 *           Space needed for ZBDSQR is BDSPAC = 5*M
 *
             mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
 *           Compute space needed for ZGELQF
             CALL zgelqf( m, n, a, lda, cdum(1), cdum(1), -1, ierr )
             lwork_zgelqf = int( cdum(1) )
 *           Compute space needed for ZUNGLQ
             CALL zunglq( n, n, m, cdum(1), n, cdum(1), cdum(1), -1,
      $                   ierr )
             lwork_zunglq_n = int( cdum(1) )
             CALL zunglq( m, n, m, a, lda, cdum(1), cdum(1), -1, ierr )
             lwork_zunglq_m = int( cdum(1) )
 *           Compute space needed for ZGEBRD
             CALL zgebrd( m, m, a, lda, s, dum(1), cdum(1),
      $                   cdum(1), cdum(1), -1, ierr )
             lwork_zgebrd = int( cdum(1) )
 *            Compute space needed for ZUNGBR P
             CALL zungbr( 'P', m, m, m, a, n, cdum(1),
      $                   cdum(1), -1, ierr )
             lwork_zungbr_p = int( cdum(1) )
 *           Compute space needed for ZUNGBR Q
             CALL zungbr( 'Q', m, m, m, a, n, cdum(1),
      $                   cdum(1), -1, ierr )
             lwork_zungbr_q = int( cdum(1) )
             IF( n.GE.mnthr ) THEN
                IF( wntvn ) THEN
 *
 *                 Path 1t(N much larger than M, JOBVT='N')
 *
                   maxwrk = m + lwork_zgelqf
                   maxwrk = max( maxwrk, 2*m+lwork_zgebrd )
                   IF( wntuo .OR. wntuas )
      $               maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
                   minwrk = 3*m
                ELSE IF( wntvo .AND. wntun ) THEN
 *
 *                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
 *
                   wrkbl = m + lwork_zgelqf
                   wrkbl = max( wrkbl, m+lwork_zunglq_m )
                   wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
                   maxwrk = max( m*m+wrkbl, m*m+m*n )
                   minwrk = 2*m + n
                ELSE IF( wntvo .AND. wntuas ) THEN
 *
 *                 Path 3t(N much larger than M, JOBU='S' or 'A',
 *                 JOBVT='O')
 *
                   wrkbl = m + lwork_zgelqf
                   wrkbl = max( wrkbl, m+lwork_zunglq_m )
                   wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
                   maxwrk = max( m*m+wrkbl, m*m+m*n )
                   minwrk = 2*m + n
                ELSE IF( wntvs .AND. wntun ) THEN
 *
 *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
 *
                   wrkbl = m + lwork_zgelqf
                   wrkbl = max( wrkbl, m+lwork_zunglq_m )
                   wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
                   maxwrk = m*m + wrkbl
                   minwrk = 2*m + n
                ELSE IF( wntvs .AND. wntuo ) THEN
 *
 *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
 *
                   wrkbl = m + lwork_zgelqf
                   wrkbl = max( wrkbl, m+lwork_zunglq_m )
                   wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
                   maxwrk = 2*m*m + wrkbl
                   minwrk = 2*m + n
                ELSE IF( wntvs .AND. wntuas ) THEN
 *
 *                 Path 6t(N much larger than M, JOBU='S' or 'A',
 *                 JOBVT='S')
 *
                   wrkbl = m + lwork_zgelqf
                   wrkbl = max( wrkbl, m+lwork_zunglq_m )
                   wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
                   maxwrk = m*m + wrkbl
                   minwrk = 2*m + n
                ELSE IF( wntva .AND. wntun ) THEN
 *
 *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
 *
                   wrkbl = m + lwork_zgelqf
                   wrkbl = max( wrkbl, m+lwork_zunglq_n )
                   wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
                   maxwrk = m*m + wrkbl
                   minwrk = 2*m + n
                ELSE IF( wntva .AND. wntuo ) THEN
 *
 *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
 *
                   wrkbl = m + lwork_zgelqf
                   wrkbl = max( wrkbl, m+lwork_zunglq_n )
                   wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
                   maxwrk = 2*m*m + wrkbl
                   minwrk = 2*m + n
                ELSE IF( wntva .AND. wntuas ) THEN
 *
 *                 Path 9t(N much larger than M, JOBU='S' or 'A',
 *                 JOBVT='A')
 *
                   wrkbl = m + lwork_zgelqf
                   wrkbl = max( wrkbl, m+lwork_zunglq_n )
                   wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
                   wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
                   maxwrk = m*m + wrkbl
                   minwrk = 2*m + n
                END IF
             ELSE
 *
 *              Path 10t(N greater than M, but not much larger)
 *
                CALL zgebrd( m, n, a, lda, s, dum(1), cdum(1),
      $                   cdum(1), cdum(1), -1, ierr )
                lwork_zgebrd = int( cdum(1) )
                maxwrk = 2*m + lwork_zgebrd
                IF( wntvs .OR. wntvo ) THEN
 *                Compute space needed for ZUNGBR P
                  CALL zungbr( 'P', m, n, m, a, n, cdum(1),
      $                   cdum(1), -1, ierr )
                  lwork_zungbr_p = int( cdum(1) )
                  maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
                END IF
                IF( wntva ) THEN
                  CALL zungbr( 'P', n,  n, m, a, n, cdum(1),
      $                   cdum(1), -1, ierr )
                  lwork_zungbr_p = int( cdum(1) )
                  maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
                END IF
                IF( .NOT.wntun ) THEN
                   maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
                END IF
                minwrk = 2*m + n
             END IF
          END IF
          maxwrk = max( maxwrk, minwrk )
          work( 1 ) = maxwrk
 *
          IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
             info = -13
          END IF
       END IF
 *
       IF( info.NE.0 ) THEN
          CALL xerbla( 'ZGESVD', -info )
          RETURN
       ELSE IF( lquery ) THEN
          RETURN
       END IF
 *
 *     Quick return if possible
 *
       IF( m.EQ.0 .OR. n.EQ.0 ) THEN
          RETURN
       END IF
 *
 *     Get machine constants
 *
       eps = dlamch( 'P' )
       smlnum = sqrt( dlamch( 'S' ) ) / eps
       bignum = one / smlnum
 *
 *     Scale A if max element outside range [SMLNUM,BIGNUM]
 *
       anrm = zlange( 'M', m, n, a, lda, dum )
       iscl = 0
       IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
          iscl = 1
          CALL zlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
       ELSE IF( anrm.GT.bignum ) THEN
          iscl = 1
          CALL zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
       END IF
 *
       IF( m.GE.n ) THEN
 *
 *        A has at least as many rows as columns. If A has sufficiently
 *        more rows than columns, first reduce using the QR
 *        decomposition (if sufficient workspace available)
 *
          IF( m.GE.mnthr ) THEN
 *
             IF( wntun ) THEN
 *
 *              Path 1 (M much larger than N, JOBU='N')
 *              No left singular vectors to be computed
 *
                itau = 1
                iwork = itau + n
 *
 *              Compute A=Q*R
 *              (CWorkspace: need 2*N, prefer N+N*NB)
 *              (RWorkspace: need 0)
 *
                CALL zgeqrf( m, n, a, lda, work( itau ), work( iwork ),
      $                      lwork-iwork+1, ierr )
 *
 *              Zero out below R
 *
                IF( n .GT. 1 ) THEN
                   CALL zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
      $                         lda )
                END IF
                ie = 1
                itauq = 1
                itaup = itauq + n
                iwork = itaup + n
 *
 *              Bidiagonalize R in A
 *              (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
 *              (RWorkspace: need N)
 *
                CALL zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
      $                      work( itaup ), work( iwork ), lwork-iwork+1,
      $                      ierr )
                ncvt = 0
                IF( wntvo .OR. wntvas ) THEN
 *
 *                 If right singular vectors desired, generate P'.
 *                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   ncvt = n
                END IF
                irwork = ie + n
 *
 *              Perform bidiagonal QR iteration, computing right
 *              singular vectors of A in A if desired
 *              (CWorkspace: 0)
 *              (RWorkspace: need BDSPAC)
 *
                CALL zbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
      $                      cdum, 1, cdum, 1, rwork( irwork ), info )
 *
 *              If right singular vectors desired in VT, copy them there
 *
                IF( wntvas )
      $            CALL zlacpy( 'F', n, n, a, lda, vt, ldvt )
 *
             ELSE IF( wntuo .AND. wntvn ) THEN
 *
 *              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
 *              N left singular vectors to be overwritten on A and
 *              no right singular vectors to be computed
 *
                IF( lwork.GE.n*n+3*n ) THEN
 *
 *                 Sufficient workspace for a fast algorithm
 *
                   ir = 1
                   IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
 *
 *                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
 *
                      ldwrku = lda
                      ldwrkr = lda
                   ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
 *
 *                    WORK(IU) is LDA by N, WORK(IR) is N by N
 *
                      ldwrku = lda
                      ldwrkr = n
                   ELSE
 *
 *                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
 *
                      ldwrku = ( lwork-n*n ) / n
                      ldwrkr = n
                   END IF
                   itau = ir + ldwrkr*n
                   iwork = itau + n
 *
 *                 Compute A=Q*R
 *                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zgeqrf( m, n, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Copy R to WORK(IR) and zero out below it
 *
                   CALL zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
                   CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                         work( ir+1 ), ldwrkr )
 *
 *                 Generate Q in A
 *                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungqr( m, n, n, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   ie = 1
                   itauq = itau
                   itaup = itauq + n
                   iwork = itaup + n
 *
 *                 Bidiagonalize R in WORK(IR)
 *                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
 *                 (RWorkspace: need N)
 *
                   CALL zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
      $                         work( itauq ), work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Generate left vectors bidiagonalizing R
 *                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
 *                 (RWorkspace: need 0)
 *
                   CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
      $                         work( itauq ), work( iwork ),
      $                         lwork-iwork+1, ierr )
                   irwork = ie + n
 *
 *                 Perform bidiagonal QR iteration, computing left
 *                 singular vectors of R in WORK(IR)
 *                 (CWorkspace: need N*N)
 *                 (RWorkspace: need BDSPAC)
 *
                   CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
      $                         work( ir ), ldwrkr, cdum, 1,
      $                         rwork( irwork ), info )
                   iu = itauq
 *
 *                 Multiply Q in A by left singular vectors of R in
 *                 WORK(IR), storing result in WORK(IU) and copying to A
 *                 (CWorkspace: need N*N+N, prefer N*N+M*N)
 *                 (RWorkspace: 0)
 *
                   DO 10 i = 1, m, ldwrku
                      chunk = min( m-i+1, ldwrku )
                      CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
      $                           lda, work( ir ), ldwrkr, czero,
      $                           work( iu ), ldwrku )
                      CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
      $                            a( i, 1 ), lda )
    10             CONTINUE
 *
                ELSE
 *
 *                 Insufficient workspace for a fast algorithm
 *
                   ie = 1
                   itauq = 1
                   itaup = itauq + n
                   iwork = itaup + n
 *
 *                 Bidiagonalize A
 *                 (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
 *                 (RWorkspace: N)
 *
                   CALL zgebrd( m, n, a, lda, s, rwork( ie ),
      $                         work( itauq ), work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Generate left vectors bidiagonalizing A
 *                 (CWorkspace: need 3*N, prefer 2*N+N*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   irwork = ie + n
 *
 *                 Perform bidiagonal QR iteration, computing left
 *                 singular vectors of A in A
 *                 (CWorkspace: need 0)
 *                 (RWorkspace: need BDSPAC)
 *
                   CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
      $                         a, lda, cdum, 1, rwork( irwork ), info )
 *
                END IF
 *
             ELSE IF( wntuo .AND. wntvas ) THEN
 *
 *              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
 *              N left singular vectors to be overwritten on A and
 *              N right singular vectors to be computed in VT
 *
                IF( lwork.GE.n*n+3*n ) THEN
 *
 *                 Sufficient workspace for a fast algorithm
 *
                   ir = 1
                   IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
 *
                      ldwrku = lda
                      ldwrkr = lda
                   ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is N by N
 *
                      ldwrku = lda
                      ldwrkr = n
                   ELSE
 *
 *                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
 *
                      ldwrku = ( lwork-n*n ) / n
                      ldwrkr = n
                   END IF
                   itau = ir + ldwrkr*n
                   iwork = itau + n
 *
 *                 Compute A=Q*R
 *                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zgeqrf( m, n, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Copy R to VT, zeroing out below it
 *
                   CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
                   IF( n.GT.1 )
      $               CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                            vt( 2, 1 ), ldvt )
 *
 *                 Generate Q in A
 *                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungqr( m, n, n, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   ie = 1
                   itauq = itau
                   itaup = itauq + n
                   iwork = itaup + n
 *
 *                 Bidiagonalize R in VT, copying result to WORK(IR)
 *                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
 *                 (RWorkspace: need N)
 *
                   CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
      $                         work( itauq ), work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   CALL zlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
 *
 *                 Generate left vectors bidiagonalizing R in WORK(IR)
 *                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
      $                         work( itauq ), work( iwork ),
      $                         lwork-iwork+1, ierr )
 *
 *                 Generate right vectors bidiagonalizing R in VT
 *                 (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   irwork = ie + n
 *
 *                 Perform bidiagonal QR iteration, computing left
 *                 singular vectors of R in WORK(IR) and computing right
 *                 singular vectors of R in VT
 *                 (CWorkspace: need N*N)
 *                 (RWorkspace: need BDSPAC)
 *
                   CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
      $                         ldvt, work( ir ), ldwrkr, cdum, 1,
      $                         rwork( irwork ), info )
                   iu = itauq
 *
 *                 Multiply Q in A by left singular vectors of R in
 *                 WORK(IR), storing result in WORK(IU) and copying to A
 *                 (CWorkspace: need N*N+N, prefer N*N+M*N)
 *                 (RWorkspace: 0)
 *
                   DO 20 i = 1, m, ldwrku
                      chunk = min( m-i+1, ldwrku )
                      CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
      $                           lda, work( ir ), ldwrkr, czero,
      $                           work( iu ), ldwrku )
                      CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
      $                            a( i, 1 ), lda )
    20             CONTINUE
 *
                ELSE
 *
 *                 Insufficient workspace for a fast algorithm
 *
                   itau = 1
                   iwork = itau + n
 *
 *                 Compute A=Q*R
 *                 (CWorkspace: need 2*N, prefer N+N*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zgeqrf( m, n, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Copy R to VT, zeroing out below it
 *
                   CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
                   IF( n.GT.1 )
      $               CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                            vt( 2, 1 ), ldvt )
 *
 *                 Generate Q in A
 *                 (CWorkspace: need 2*N, prefer N+N*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungqr( m, n, n, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   ie = 1
                   itauq = itau
                   itaup = itauq + n
                   iwork = itaup + n
 *
 *                 Bidiagonalize R in VT
 *                 (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
 *                 (RWorkspace: N)
 *
                   CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
      $                         work( itauq ), work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Multiply Q in A by left vectors bidiagonalizing R
 *                 (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
      $                         work( itauq ), a, lda, work( iwork ),
      $                         lwork-iwork+1, ierr )
 *
 *                 Generate right vectors bidiagonalizing R in VT
 *                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   irwork = ie + n
 *
 *                 Perform bidiagonal QR iteration, computing left
 *                 singular vectors of A in A and computing right
 *                 singular vectors of A in VT
 *                 (CWorkspace: 0)
 *                 (RWorkspace: need BDSPAC)
 *
                   CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
      $                         ldvt, a, lda, cdum, 1, rwork( irwork ),
      $                         info )
 *
                END IF
 *
             ELSE IF( wntus ) THEN
 *
                IF( wntvn ) THEN
 *
 *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
 *                 N left singular vectors to be computed in U and
 *                 no right singular vectors to be computed
 *
                   IF( lwork.GE.n*n+3*n ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      ir = 1
                      IF( lwork.GE.wrkbl+lda*n ) THEN
 *
 *                       WORK(IR) is LDA by N
 *
                         ldwrkr = lda
                      ELSE
 *
 *                       WORK(IR) is N by N
 *
                         ldwrkr = n
                      END IF
                      itau = ir + ldwrkr*n
                      iwork = itau + n
 *
 *                    Compute A=Q*R
 *                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy R to WORK(IR), zeroing out below it
 *
                      CALL zlacpy( 'U', n, n, a, lda, work( ir ),
      $                            ldwrkr )
                      CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                            work( ir+1 ), ldwrkr )
 *
 *                    Generate Q in A
 *                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, n, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Bidiagonalize R in WORK(IR)
 *                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
 *                    (RWorkspace: need N)
 *
                      CALL zgebrd( n, n, work( ir ), ldwrkr, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate left vectors bidiagonalizing R in WORK(IR)
 *                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
      $                            work( itauq ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IR)
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
      $                            1, work( ir ), ldwrkr, cdum, 1,
      $                            rwork( irwork ), info )
 *
 *                    Multiply Q in A by left singular vectors of R in
 *                    WORK(IR), storing result in U
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
      $                           work( ir ), ldwrkr, czero, u, ldu )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + n
 *
 *                    Compute A=Q*R, copying result to U
 *                    (CWorkspace: need 2*N, prefer N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, n, a, lda, u, ldu )
 *
 *                    Generate Q in U
 *                    (CWorkspace: need 2*N, prefer N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, n, n, u, ldu, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Zero out below R in A
 *
                      IF( n .GT. 1 ) THEN
                         CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                               a( 2, 1 ), lda )
                      END IF
 *
 *                    Bidiagonalize R in A
 *                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
 *                    (RWorkspace: need N)
 *
                      CALL zgebrd( n, n, a, lda, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply Q in U by left vectors bidiagonalizing R
 *                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
      $                            work( itauq ), u, ldu, work( iwork ),
      $                            lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of A in U
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
      $                            1, u, ldu, cdum, 1, rwork( irwork ),
      $                            info )
 *
                   END IF
 *
                ELSE IF( wntvo ) THEN
 *
 *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
 *                 N left singular vectors to be computed in U and
 *                 N right singular vectors to be overwritten on A
 *
                   IF( lwork.GE.2*n*n+3*n ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      iu = 1
                      IF( lwork.GE.wrkbl+2*lda*n ) THEN
 *
 *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
 *
                         ldwrku = lda
                         ir = iu + ldwrku*n
                         ldwrkr = lda
                      ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
 *
 *                       WORK(IU) is LDA by N and WORK(IR) is N by N
 *
                         ldwrku = lda
                         ir = iu + ldwrku*n
                         ldwrkr = n
                      ELSE
 *
 *                       WORK(IU) is N by N and WORK(IR) is N by N
 *
                         ldwrku = n
                         ir = iu + ldwrku*n
                         ldwrkr = n
                      END IF
                      itau = ir + ldwrkr*n
                      iwork = itau + n
 *
 *                    Compute A=Q*R
 *                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy R to WORK(IU), zeroing out below it
 *
                      CALL zlacpy( 'U', n, n, a, lda, work( iu ),
      $                            ldwrku )
                      CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                            work( iu+1 ), ldwrku )
 *
 *                    Generate Q in A
 *                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, n, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Bidiagonalize R in WORK(IU), copying result to
 *                    WORK(IR)
 *                    (CWorkspace: need   2*N*N+3*N,
 *                                 prefer 2*N*N+2*N+2*N*NB)
 *                    (RWorkspace: need   N)
 *
                      CALL zgebrd( n, n, work( iu ), ldwrku, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
      $                            work( ir ), ldwrkr )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IU)
 *                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
      $                            work( itauq ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IR)
 *                    (CWorkspace: need   2*N*N+3*N-1,
 *                                 prefer 2*N*N+2*N+(N-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IU) and computing
 *                    right singular vectors of R in WORK(IR)
 *                    (CWorkspace: need 2*N*N)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
      $                            work( ir ), ldwrkr, work( iu ),
      $                            ldwrku, cdum, 1, rwork( irwork ),
      $                            info )
 *
 *                    Multiply Q in A by left singular vectors of R in
 *                    WORK(IU), storing result in U
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
      $                           work( iu ), ldwrku, czero, u, ldu )
 *
 *                    Copy right singular vectors of R to A
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: 0)
 *
                      CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
      $                            lda )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + n
 *
 *                    Compute A=Q*R, copying result to U
 *                    (CWorkspace: need 2*N, prefer N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, n, a, lda, u, ldu )
 *
 *                    Generate Q in U
 *                    (CWorkspace: need 2*N, prefer N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, n, n, u, ldu, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Zero out below R in A
 *
                      IF( n .GT. 1 ) THEN
                         CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                               a( 2, 1 ), lda )
                      END IF
 *
 *                    Bidiagonalize R in A
 *                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
 *                    (RWorkspace: need N)
 *
                      CALL zgebrd( n, n, a, lda, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply Q in U by left vectors bidiagonalizing R
 *                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
      $                            work( itauq ), u, ldu, work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate right vectors bidiagonalizing R in A
 *                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of A in U and computing right
 *                    singular vectors of A in A
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
      $                            lda, u, ldu, cdum, 1, rwork( irwork ),
      $                            info )
 *
                   END IF
 *
                ELSE IF( wntvas ) THEN
 *
 *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
 *                         or 'A')
 *                 N left singular vectors to be computed in U and
 *                 N right singular vectors to be computed in VT
 *
                   IF( lwork.GE.n*n+3*n ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      iu = 1
                      IF( lwork.GE.wrkbl+lda*n ) THEN
 *
 *                       WORK(IU) is LDA by N
 *
                         ldwrku = lda
                      ELSE
 *
 *                       WORK(IU) is N by N
 *
                         ldwrku = n
                      END IF
                      itau = iu + ldwrku*n
                      iwork = itau + n
 *
 *                    Compute A=Q*R
 *                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy R to WORK(IU), zeroing out below it
 *
                      CALL zlacpy( 'U', n, n, a, lda, work( iu ),
      $                            ldwrku )
                      CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                            work( iu+1 ), ldwrku )
 *
 *                    Generate Q in A
 *                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, n, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Bidiagonalize R in WORK(IU), copying result to VT
 *                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
 *                    (RWorkspace: need N)
 *
                      CALL zgebrd( n, n, work( iu ), ldwrku, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
      $                            ldvt )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IU)
 *                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
      $                            work( itauq ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate right bidiagonalizing vectors in VT
 *                    (CWorkspace: need   N*N+3*N-1,
 *                                 prefer N*N+2*N+(N-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IU) and computing
 *                    right singular vectors of R in VT
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
      $                            ldvt, work( iu ), ldwrku, cdum, 1,
      $                            rwork( irwork ), info )
 *
 *                    Multiply Q in A by left singular vectors of R in
 *                    WORK(IU), storing result in U
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
      $                           work( iu ), ldwrku, czero, u, ldu )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + n
 *
 *                    Compute A=Q*R, copying result to U
 *                    (CWorkspace: need 2*N, prefer N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, n, a, lda, u, ldu )
 *
 *                    Generate Q in U
 *                    (CWorkspace: need 2*N, prefer N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, n, n, u, ldu, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy R to VT, zeroing out below it
 *
                      CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
                      IF( n.GT.1 )
      $                  CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                               vt( 2, 1 ), ldvt )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Bidiagonalize R in VT
 *                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
 *                    (RWorkspace: need N)
 *
                      CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply Q in U by left bidiagonalizing vectors
 *                    in VT
 *                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
      $                            work( itauq ), u, ldu, work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate right bidiagonalizing vectors in VT
 *                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of A in U and computing right
 *                    singular vectors of A in VT
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
      $                            ldvt, u, ldu, cdum, 1,
      $                            rwork( irwork ), info )
 *
                   END IF
 *
                END IF
 *
             ELSE IF( wntua ) THEN
 *
                IF( wntvn ) THEN
 *
 *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
 *                 M left singular vectors to be computed in U and
 *                 no right singular vectors to be computed
 *
                   IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      ir = 1
                      IF( lwork.GE.wrkbl+lda*n ) THEN
 *
 *                       WORK(IR) is LDA by N
 *
                         ldwrkr = lda
                      ELSE
 *
 *                       WORK(IR) is N by N
 *
                         ldwrkr = n
                      END IF
                      itau = ir + ldwrkr*n
                      iwork = itau + n
 *
 *                    Compute A=Q*R, copying result to U
 *                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, n, a, lda, u, ldu )
 *
 *                    Copy R to WORK(IR), zeroing out below it
 *
                      CALL zlacpy( 'U', n, n, a, lda, work( ir ),
      $                            ldwrkr )
                      CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                            work( ir+1 ), ldwrkr )
 *
 *                    Generate Q in U
 *                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, m, n, u, ldu, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Bidiagonalize R in WORK(IR)
 *                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
 *                    (RWorkspace: need N)
 *
                      CALL zgebrd( n, n, work( ir ), ldwrkr, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IR)
 *                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
      $                            work( itauq ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IR)
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
      $                            1, work( ir ), ldwrkr, cdum, 1,
      $                            rwork( irwork ), info )
 *
 *                    Multiply Q in U by left singular vectors of R in
 *                    WORK(IR), storing result in A
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
      $                           work( ir ), ldwrkr, czero, a, lda )
 *
 *                    Copy left singular vectors of A from A to U
 *
                      CALL zlacpy( 'F', m, n, a, lda, u, ldu )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + n
 *
 *                    Compute A=Q*R, copying result to U
 *                    (CWorkspace: need 2*N, prefer N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, n, a, lda, u, ldu )
 *
 *                    Generate Q in U
 *                    (CWorkspace: need N+M, prefer N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, m, n, u, ldu, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Zero out below R in A
 *
                      IF( n .GT. 1 ) THEN
                         CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                               a( 2, 1 ), lda )
                      END IF
 *
 *                    Bidiagonalize R in A
 *                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
 *                    (RWorkspace: need N)
 *
                      CALL zgebrd( n, n, a, lda, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply Q in U by left bidiagonalizing vectors
 *                    in A
 *                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
      $                            work( itauq ), u, ldu, work( iwork ),
      $                            lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of A in U
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
      $                            1, u, ldu, cdum, 1, rwork( irwork ),
      $                            info )
 *
                   END IF
 *
                ELSE IF( wntvo ) THEN
 *
 *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
 *                 M left singular vectors to be computed in U and
 *                 N right singular vectors to be overwritten on A
 *
                   IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      iu = 1
                      IF( lwork.GE.wrkbl+2*lda*n ) THEN
 *
 *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
 *
                         ldwrku = lda
                         ir = iu + ldwrku*n
                         ldwrkr = lda
                      ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
 *
 *                       WORK(IU) is LDA by N and WORK(IR) is N by N
 *
                         ldwrku = lda
                         ir = iu + ldwrku*n
                         ldwrkr = n
                      ELSE
 *
 *                       WORK(IU) is N by N and WORK(IR) is N by N
 *
                         ldwrku = n
                         ir = iu + ldwrku*n
                         ldwrkr = n
                      END IF
                      itau = ir + ldwrkr*n
                      iwork = itau + n
 *
 *                    Compute A=Q*R, copying result to U
 *                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, n, a, lda, u, ldu )
 *
 *                    Generate Q in U
 *                    (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, m, n, u, ldu, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy R to WORK(IU), zeroing out below it
 *
                      CALL zlacpy( 'U', n, n, a, lda, work( iu ),
      $                            ldwrku )
                      CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                            work( iu+1 ), ldwrku )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Bidiagonalize R in WORK(IU), copying result to
 *                    WORK(IR)
 *                    (CWorkspace: need   2*N*N+3*N,
 *                                 prefer 2*N*N+2*N+2*N*NB)
 *                    (RWorkspace: need   N)
 *
                      CALL zgebrd( n, n, work( iu ), ldwrku, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
      $                            work( ir ), ldwrkr )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IU)
 *                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
      $                            work( itauq ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IR)
 *                    (CWorkspace: need   2*N*N+3*N-1,
 *                                 prefer 2*N*N+2*N+(N-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IU) and computing
 *                    right singular vectors of R in WORK(IR)
 *                    (CWorkspace: need 2*N*N)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
      $                            work( ir ), ldwrkr, work( iu ),
      $                            ldwrku, cdum, 1, rwork( irwork ),
      $                            info )
 *
 *                    Multiply Q in U by left singular vectors of R in
 *                    WORK(IU), storing result in A
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
      $                           work( iu ), ldwrku, czero, a, lda )
 *
 *                    Copy left singular vectors of A from A to U
 *
                      CALL zlacpy( 'F', m, n, a, lda, u, ldu )
 *
 *                    Copy right singular vectors of R from WORK(IR) to A
 *
                      CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
      $                            lda )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + n
 *
 *                    Compute A=Q*R, copying result to U
 *                    (CWorkspace: need 2*N, prefer N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, n, a, lda, u, ldu )
 *
 *                    Generate Q in U
 *                    (CWorkspace: need N+M, prefer N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, m, n, u, ldu, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Zero out below R in A
 *
                      IF( n .GT. 1 ) THEN
                         CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                               a( 2, 1 ), lda )
                      END IF
 *
 *                    Bidiagonalize R in A
 *                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
 *                    (RWorkspace: need N)
 *
                      CALL zgebrd( n, n, a, lda, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply Q in U by left bidiagonalizing vectors
 *                    in A
 *                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
      $                            work( itauq ), u, ldu, work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate right bidiagonalizing vectors in A
 *                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of A in U and computing right
 *                    singular vectors of A in A
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
      $                            lda, u, ldu, cdum, 1, rwork( irwork ),
      $                            info )
 *
                   END IF
 *
                ELSE IF( wntvas ) THEN
 *
 *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
 *                         or 'A')
 *                 M left singular vectors to be computed in U and
 *                 N right singular vectors to be computed in VT
 *
                   IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      iu = 1
                      IF( lwork.GE.wrkbl+lda*n ) THEN
 *
 *                       WORK(IU) is LDA by N
 *
                         ldwrku = lda
                      ELSE
 *
 *                       WORK(IU) is N by N
 *
                         ldwrku = n
                      END IF
                      itau = iu + ldwrku*n
                      iwork = itau + n
 *
 *                    Compute A=Q*R, copying result to U
 *                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, n, a, lda, u, ldu )
 *
 *                    Generate Q in U
 *                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, m, n, u, ldu, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy R to WORK(IU), zeroing out below it
 *
                      CALL zlacpy( 'U', n, n, a, lda, work( iu ),
      $                            ldwrku )
                      CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                            work( iu+1 ), ldwrku )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Bidiagonalize R in WORK(IU), copying result to VT
 *                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
 *                    (RWorkspace: need N)
 *
                      CALL zgebrd( n, n, work( iu ), ldwrku, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
      $                            ldvt )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IU)
 *                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
      $                            work( itauq ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate right bidiagonalizing vectors in VT
 *                    (CWorkspace: need   N*N+3*N-1,
 *                                 prefer N*N+2*N+(N-1)*NB)
 *                    (RWorkspace: need   0)
 *
                      CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of R in WORK(IU) and computing
 *                    right singular vectors of R in VT
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
      $                            ldvt, work( iu ), ldwrku, cdum, 1,
      $                            rwork( irwork ), info )
 *
 *                    Multiply Q in U by left singular vectors of R in
 *                    WORK(IU), storing result in A
 *                    (CWorkspace: need N*N)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
      $                           work( iu ), ldwrku, czero, a, lda )
 *
 *                    Copy left singular vectors of A from A to U
 *
                      CALL zlacpy( 'F', m, n, a, lda, u, ldu )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + n
 *
 *                    Compute A=Q*R, copying result to U
 *                    (CWorkspace: need 2*N, prefer N+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgeqrf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, n, a, lda, u, ldu )
 *
 *                    Generate Q in U
 *                    (CWorkspace: need N+M, prefer N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungqr( m, m, n, u, ldu, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy R from A to VT, zeroing out below it
 *
                      CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
                      IF( n.GT.1 )
      $                  CALL zlaset( 'L', n-1, n-1, czero, czero,
      $                               vt( 2, 1 ), ldvt )
                      ie = 1
                      itauq = itau
                      itaup = itauq + n
                      iwork = itaup + n
 *
 *                    Bidiagonalize R in VT
 *                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
 *                    (RWorkspace: need N)
 *
                      CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply Q in U by left bidiagonalizing vectors
 *                    in VT
 *                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
      $                            work( itauq ), u, ldu, work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate right bidiagonalizing vectors in VT
 *                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + n
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of A in U and computing right
 *                    singular vectors of A in VT
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
      $                            ldvt, u, ldu, cdum, 1,
      $                            rwork( irwork ), info )
 *
                   END IF
 *
                END IF
 *
             END IF
 *
          ELSE
 *
 *           M .LT. MNTHR
 *
 *           Path 10 (M at least N, but not much larger)
 *           Reduce to bidiagonal form without QR decomposition
 *
             ie = 1
             itauq = 1
             itaup = itauq + n
             iwork = itaup + n
 *
 *           Bidiagonalize A
 *           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
 *           (RWorkspace: need N)
 *
             CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
      $                   work( itaup ), work( iwork ), lwork-iwork+1,
      $                   ierr )
             IF( wntuas ) THEN
 *
 *              If left singular vectors desired in U, copy result to U
 *              and generate left bidiagonalizing vectors in U
 *              (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
 *              (RWorkspace: 0)
 *
                CALL zlacpy( 'L', m, n, a, lda, u, ldu )
                IF( wntus )
      $            ncu = n
                IF( wntua )
      $            ncu = m
                CALL zungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
      $                      work( iwork ), lwork-iwork+1, ierr )
             END IF
             IF( wntvas ) THEN
 *
 *              If right singular vectors desired in VT, copy result to
 *              VT and generate right bidiagonalizing vectors in VT
 *              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
 *              (RWorkspace: 0)
 *
                CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
                CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
      $                      work( iwork ), lwork-iwork+1, ierr )
             END IF
             IF( wntuo ) THEN
 *
 *              If left singular vectors desired in A, generate left
 *              bidiagonalizing vectors in A
 *              (CWorkspace: need 3*N, prefer 2*N+N*NB)
 *              (RWorkspace: 0)
 *
                CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
      $                      work( iwork ), lwork-iwork+1, ierr )
             END IF
             IF( wntvo ) THEN
 *
 *              If right singular vectors desired in A, generate right
 *              bidiagonalizing vectors in A
 *              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
 *              (RWorkspace: 0)
 *
                CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
      $                      work( iwork ), lwork-iwork+1, ierr )
             END IF
             irwork = ie + n
             IF( wntuas .OR. wntuo )
      $         nru = m
             IF( wntun )
      $         nru = 0
             IF( wntvas .OR. wntvo )
      $         ncvt = n
             IF( wntvn )
      $         ncvt = 0
             IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
 *
 *              Perform bidiagonal QR iteration, if desired, computing
 *              left singular vectors in U and computing right singular
 *              vectors in VT
 *              (CWorkspace: 0)
 *              (RWorkspace: need BDSPAC)
 *
                CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
      $                      ldvt, u, ldu, cdum, 1, rwork( irwork ),
      $                      info )
             ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
 *
 *              Perform bidiagonal QR iteration, if desired, computing
 *              left singular vectors in U and computing right singular
 *              vectors in A
 *              (CWorkspace: 0)
 *              (RWorkspace: need BDSPAC)
 *
                CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
      $                      lda, u, ldu, cdum, 1, rwork( irwork ),
      $                      info )
             ELSE
 *
 *              Perform bidiagonal QR iteration, if desired, computing
 *              left singular vectors in A and computing right singular
 *              vectors in VT
 *              (CWorkspace: 0)
 *              (RWorkspace: need BDSPAC)
 *
                CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
      $                      ldvt, a, lda, cdum, 1, rwork( irwork ),
      $                      info )
             END IF
 *
          END IF
 *
       ELSE
 *
 *        A has more columns than rows. If A has sufficiently more
 *        columns than rows, first reduce using the LQ decomposition (if
 *        sufficient workspace available)
 *
          IF( n.GE.mnthr ) THEN
 *
             IF( wntvn ) THEN
 *
 *              Path 1t(N much larger than M, JOBVT='N')
 *              No right singular vectors to be computed
 *
                itau = 1
                iwork = itau + m
 *
 *              Compute A=L*Q
 *              (CWorkspace: need 2*M, prefer M+M*NB)
 *              (RWorkspace: 0)
 *
                CALL zgelqf( m, n, a, lda, work( itau ), work( iwork ),
      $                      lwork-iwork+1, ierr )
 *
 *              Zero out above L
 *
                CALL zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
      $                      lda )
                ie = 1
                itauq = 1
                itaup = itauq + m
                iwork = itaup + m
 *
 *              Bidiagonalize L in A
 *              (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
 *              (RWorkspace: need M)
 *
                CALL zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
      $                      work( itaup ), work( iwork ), lwork-iwork+1,
      $                      ierr )
                IF( wntuo .OR. wntuas ) THEN
 *
 *                 If left singular vectors desired, generate Q
 *                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                END IF
                irwork = ie + m
                nru = 0
                IF( wntuo .OR. wntuas )
      $            nru = m
 *
 *              Perform bidiagonal QR iteration, computing left singular
 *              vectors of A in A if desired
 *              (CWorkspace: 0)
 *              (RWorkspace: need BDSPAC)
 *
                CALL zbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
      $                      a, lda, cdum, 1, rwork( irwork ), info )
 *
 *              If left singular vectors desired in U, copy them there
 *
                IF( wntuas )
      $            CALL zlacpy( 'F', m, m, a, lda, u, ldu )
 *
             ELSE IF( wntvo .AND. wntun ) THEN
 *
 *              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
 *              M right singular vectors to be overwritten on A and
 *              no left singular vectors to be computed
 *
                IF( lwork.GE.m*m+3*m ) THEN
 *
 *                 Sufficient workspace for a fast algorithm
 *
                   ir = 1
                   IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
 *
                      ldwrku = lda
                      chunk = n
                      ldwrkr = lda
                   ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is M by M
 *
                      ldwrku = lda
                      chunk = n
                      ldwrkr = m
                   ELSE
 *
 *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
 *
                      ldwrku = m
                      chunk = ( lwork-m*m ) / m
                      ldwrkr = m
                   END IF
                   itau = ir + ldwrkr*m
                   iwork = itau + m
 *
 *                 Compute A=L*Q
 *                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zgelqf( m, n, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Copy L to WORK(IR) and zero out above it
 *
                   CALL zlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
                   CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                         work( ir+ldwrkr ), ldwrkr )
 *
 *                 Generate Q in A
 *                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zunglq( m, n, m, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   ie = 1
                   itauq = itau
                   itaup = itauq + m
                   iwork = itaup + m
 *
 *                 Bidiagonalize L in WORK(IR)
 *                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
 *                 (RWorkspace: need M)
 *
                   CALL zgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
      $                         work( itauq ), work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Generate right vectors bidiagonalizing L
 *                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
      $                         work( itaup ), work( iwork ),
      $                         lwork-iwork+1, ierr )
                   irwork = ie + m
 *
 *                 Perform bidiagonal QR iteration, computing right
 *                 singular vectors of L in WORK(IR)
 *                 (CWorkspace: need M*M)
 *                 (RWorkspace: need BDSPAC)
 *
                   CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
      $                         work( ir ), ldwrkr, cdum, 1, cdum, 1,
      $                         rwork( irwork ), info )
                   iu = itauq
 *
 *                 Multiply right singular vectors of L in WORK(IR) by Q
 *                 in A, storing result in WORK(IU) and copying to A
 *                 (CWorkspace: need M*M+M, prefer M*M+M*N)
 *                 (RWorkspace: 0)
 *
                   DO 30 i = 1, n, chunk
                      blk = min( n-i+1, chunk )
                      CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
      $                           ldwrkr, a( 1, i ), lda, czero,
      $                           work( iu ), ldwrku )
                      CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
      $                            a( 1, i ), lda )
    30             CONTINUE
 *
                ELSE
 *
 *                 Insufficient workspace for a fast algorithm
 *
                   ie = 1
                   itauq = 1
                   itaup = itauq + m
                   iwork = itaup + m
 *
 *                 Bidiagonalize A
 *                 (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
 *                 (RWorkspace: need M)
 *
                   CALL zgebrd( m, n, a, lda, s, rwork( ie ),
      $                         work( itauq ), work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Generate right vectors bidiagonalizing A
 *                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   irwork = ie + m
 *
 *                 Perform bidiagonal QR iteration, computing right
 *                 singular vectors of A in A
 *                 (CWorkspace: 0)
 *                 (RWorkspace: need BDSPAC)
 *
                   CALL zbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
      $                         cdum, 1, cdum, 1, rwork( irwork ), info )
 *
                END IF
 *
             ELSE IF( wntvo .AND. wntuas ) THEN
 *
 *              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
 *              M right singular vectors to be overwritten on A and
 *              M left singular vectors to be computed in U
 *
                IF( lwork.GE.m*m+3*m ) THEN
 *
 *                 Sufficient workspace for a fast algorithm
 *
                   ir = 1
                   IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
 *
                      ldwrku = lda
                      chunk = n
                      ldwrkr = lda
                   ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
 *
 *                    WORK(IU) is LDA by N and WORK(IR) is M by M
 *
                      ldwrku = lda
                      chunk = n
                      ldwrkr = m
                   ELSE
 *
 *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
 *
                      ldwrku = m
                      chunk = ( lwork-m*m ) / m
                      ldwrkr = m
                   END IF
                   itau = ir + ldwrkr*m
                   iwork = itau + m
 *
 *                 Compute A=L*Q
 *                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zgelqf( m, n, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Copy L to U, zeroing about above it
 *
                   CALL zlacpy( 'L', m, m, a, lda, u, ldu )
                   CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
      $                         ldu )
 *
 *                 Generate Q in A
 *                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zunglq( m, n, m, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   ie = 1
                   itauq = itau
                   itaup = itauq + m
                   iwork = itaup + m
 *
 *                 Bidiagonalize L in U, copying result to WORK(IR)
 *                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
 *                 (RWorkspace: need M)
 *
                   CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
      $                         work( itauq ), work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   CALL zlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
 *
 *                 Generate right vectors bidiagonalizing L in WORK(IR)
 *                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
      $                         work( itaup ), work( iwork ),
      $                         lwork-iwork+1, ierr )
 *
 *                 Generate left vectors bidiagonalizing L in U
 *                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   irwork = ie + m
 *
 *                 Perform bidiagonal QR iteration, computing left
 *                 singular vectors of L in U, and computing right
 *                 singular vectors of L in WORK(IR)
 *                 (CWorkspace: need M*M)
 *                 (RWorkspace: need BDSPAC)
 *
                   CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
      $                         work( ir ), ldwrkr, u, ldu, cdum, 1,
      $                         rwork( irwork ), info )
                   iu = itauq
 *
 *                 Multiply right singular vectors of L in WORK(IR) by Q
 *                 in A, storing result in WORK(IU) and copying to A
 *                 (CWorkspace: need M*M+M, prefer M*M+M*N))
 *                 (RWorkspace: 0)
 *
                   DO 40 i = 1, n, chunk
                      blk = min( n-i+1, chunk )
                      CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
      $                           ldwrkr, a( 1, i ), lda, czero,
      $                           work( iu ), ldwrku )
                      CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
      $                            a( 1, i ), lda )
    40             CONTINUE
 *
                ELSE
 *
 *                 Insufficient workspace for a fast algorithm
 *
                   itau = 1
                   iwork = itau + m
 *
 *                 Compute A=L*Q
 *                 (CWorkspace: need 2*M, prefer M+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zgelqf( m, n, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Copy L to U, zeroing out above it
 *
                   CALL zlacpy( 'L', m, m, a, lda, u, ldu )
                   CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
      $                         ldu )
 *
 *                 Generate Q in A
 *                 (CWorkspace: need 2*M, prefer M+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zunglq( m, n, m, a, lda, work( itau ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   ie = 1
                   itauq = itau
                   itaup = itauq + m
                   iwork = itaup + m
 *
 *                 Bidiagonalize L in U
 *                 (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
 *                 (RWorkspace: need M)
 *
                   CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
      $                         work( itauq ), work( itaup ),
      $                         work( iwork ), lwork-iwork+1, ierr )
 *
 *                 Multiply right vectors bidiagonalizing L by Q in A
 *                 (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
      $                         work( itaup ), a, lda, work( iwork ),
      $                         lwork-iwork+1, ierr )
 *
 *                 Generate left vectors bidiagonalizing L in U
 *                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
 *                 (RWorkspace: 0)
 *
                   CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
      $                         work( iwork ), lwork-iwork+1, ierr )
                   irwork = ie + m
 *
 *                 Perform bidiagonal QR iteration, computing left
 *                 singular vectors of A in U and computing right
 *                 singular vectors of A in A
 *                 (CWorkspace: 0)
 *                 (RWorkspace: need BDSPAC)
 *
                   CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
      $                         u, ldu, cdum, 1, rwork( irwork ), info )
 *
                END IF
 *
             ELSE IF( wntvs ) THEN
 *
                IF( wntun ) THEN
 *
 *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
 *                 M right singular vectors to be computed in VT and
 *                 no left singular vectors to be computed
 *
                   IF( lwork.GE.m*m+3*m ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      ir = 1
                      IF( lwork.GE.wrkbl+lda*m ) THEN
 *
 *                       WORK(IR) is LDA by M
 *
                         ldwrkr = lda
                      ELSE
 *
 *                       WORK(IR) is M by M
 *
                         ldwrkr = m
                      END IF
                      itau = ir + ldwrkr*m
                      iwork = itau + m
 *
 *                    Compute A=L*Q
 *                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy L to WORK(IR), zeroing out above it
 *
                      CALL zlacpy( 'L', m, m, a, lda, work( ir ),
      $                            ldwrkr )
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            work( ir+ldwrkr ), ldwrkr )
 *
 *                    Generate Q in A
 *                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( m, n, m, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Bidiagonalize L in WORK(IR)
 *                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
 *                    (RWorkspace: need M)
 *
                      CALL zgebrd( m, m, work( ir ), ldwrkr, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate right vectors bidiagonalizing L in
 *                    WORK(IR)
 *                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing right
 *                    singular vectors of L in WORK(IR)
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
      $                            work( ir ), ldwrkr, cdum, 1, cdum, 1,
      $                            rwork( irwork ), info )
 *
 *                    Multiply right singular vectors of L in WORK(IR) by
 *                    Q in A, storing result in VT
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
      $                           ldwrkr, a, lda, czero, vt, ldvt )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + m
 *
 *                    Compute A=L*Q
 *                    (CWorkspace: need 2*M, prefer M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy result to VT
 *
                      CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
 *
 *                    Generate Q in VT
 *                    (CWorkspace: need 2*M, prefer M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( m, n, m, vt, ldvt, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Zero out above L in A
 *
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            a( 1, 2 ), lda )
 *
 *                    Bidiagonalize L in A
 *                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
 *                    (RWorkspace: need M)
 *
                      CALL zgebrd( m, m, a, lda, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply right vectors bidiagonalizing L by Q in VT
 *                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
      $                            work( itaup ), vt, ldvt,
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing right
 *                    singular vectors of A in VT
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
      $                            ldvt, cdum, 1, cdum, 1,
      $                            rwork( irwork ), info )
 *
                   END IF
 *
                ELSE IF( wntuo ) THEN
 *
 *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
 *                 M right singular vectors to be computed in VT and
 *                 M left singular vectors to be overwritten on A
 *
                   IF( lwork.GE.2*m*m+3*m ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      iu = 1
                      IF( lwork.GE.wrkbl+2*lda*m ) THEN
 *
 *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
 *
                         ldwrku = lda
                         ir = iu + ldwrku*m
                         ldwrkr = lda
                      ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
 *
 *                       WORK(IU) is LDA by M and WORK(IR) is M by M
 *
                         ldwrku = lda
                         ir = iu + ldwrku*m
                         ldwrkr = m
                      ELSE
 *
 *                       WORK(IU) is M by M and WORK(IR) is M by M
 *
                         ldwrku = m
                         ir = iu + ldwrku*m
                         ldwrkr = m
                      END IF
                      itau = ir + ldwrkr*m
                      iwork = itau + m
 *
 *                    Compute A=L*Q
 *                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy L to WORK(IU), zeroing out below it
 *
                      CALL zlacpy( 'L', m, m, a, lda, work( iu ),
      $                            ldwrku )
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            work( iu+ldwrku ), ldwrku )
 *
 *                    Generate Q in A
 *                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( m, n, m, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Bidiagonalize L in WORK(IU), copying result to
 *                    WORK(IR)
 *                    (CWorkspace: need   2*M*M+3*M,
 *                                 prefer 2*M*M+2*M+2*M*NB)
 *                    (RWorkspace: need   M)
 *
                      CALL zgebrd( m, m, work( iu ), ldwrku, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
      $                            work( ir ), ldwrkr )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IU)
 *                    (CWorkspace: need   2*M*M+3*M-1,
 *                                 prefer 2*M*M+2*M+(M-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IR)
 *                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
      $                            work( itauq ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of L in WORK(IR) and computing
 *                    right singular vectors of L in WORK(IU)
 *                    (CWorkspace: need 2*M*M)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
      $                            work( iu ), ldwrku, work( ir ),
      $                            ldwrkr, cdum, 1, rwork( irwork ),
      $                            info )
 *
 *                    Multiply right singular vectors of L in WORK(IU) by
 *                    Q in A, storing result in VT
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
      $                           ldwrku, a, lda, czero, vt, ldvt )
 *
 *                    Copy left singular vectors of L to A
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: 0)
 *
                      CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
      $                            lda )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + m
 *
 *                    Compute A=L*Q, copying result to VT
 *                    (CWorkspace: need 2*M, prefer M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
 *
 *                    Generate Q in VT
 *                    (CWorkspace: need 2*M, prefer M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( m, n, m, vt, ldvt, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Zero out above L in A
 *
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            a( 1, 2 ), lda )
 *
 *                    Bidiagonalize L in A
 *                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
 *                    (RWorkspace: need M)
 *
                      CALL zgebrd( m, m, a, lda, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply right vectors bidiagonalizing L by Q in VT
 *                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
      $                            work( itaup ), vt, ldvt,
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Generate left bidiagonalizing vectors of L in A
 *                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of A in A and computing right
 *                    singular vectors of A in VT
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
      $                            ldvt, a, lda, cdum, 1,
      $                            rwork( irwork ), info )
 *
                   END IF
 *
                ELSE IF( wntuas ) THEN
 *
 *                 Path 6t(N much larger than M, JOBU='S' or 'A',
 *                         JOBVT='S')
 *                 M right singular vectors to be computed in VT and
 *                 M left singular vectors to be computed in U
 *
                   IF( lwork.GE.m*m+3*m ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      iu = 1
                      IF( lwork.GE.wrkbl+lda*m ) THEN
 *
 *                       WORK(IU) is LDA by N
 *
                         ldwrku = lda
                      ELSE
 *
 *                       WORK(IU) is LDA by M
 *
                         ldwrku = m
                      END IF
                      itau = iu + ldwrku*m
                      iwork = itau + m
 *
 *                    Compute A=L*Q
 *                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy L to WORK(IU), zeroing out above it
 *
                      CALL zlacpy( 'L', m, m, a, lda, work( iu ),
      $                            ldwrku )
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            work( iu+ldwrku ), ldwrku )
 *
 *                    Generate Q in A
 *                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( m, n, m, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Bidiagonalize L in WORK(IU), copying result to U
 *                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
 *                    (RWorkspace: need M)
 *
                      CALL zgebrd( m, m, work( iu ), ldwrku, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, m, work( iu ), ldwrku, u,
      $                            ldu )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IU)
 *                    (CWorkspace: need   M*M+3*M-1,
 *                                 prefer M*M+2*M+(M-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate left bidiagonalizing vectors in U
 *                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of L in U and computing right
 *                    singular vectors of L in WORK(IU)
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
      $                            work( iu ), ldwrku, u, ldu, cdum, 1,
      $                            rwork( irwork ), info )
 *
 *                    Multiply right singular vectors of L in WORK(IU) by
 *                    Q in A, storing result in VT
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
      $                           ldwrku, a, lda, czero, vt, ldvt )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + m
 *
 *                    Compute A=L*Q, copying result to VT
 *                    (CWorkspace: need 2*M, prefer M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
 *
 *                    Generate Q in VT
 *                    (CWorkspace: need 2*M, prefer M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( m, n, m, vt, ldvt, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy L to U, zeroing out above it
 *
                      CALL zlacpy( 'L', m, m, a, lda, u, ldu )
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            u( 1, 2 ), ldu )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Bidiagonalize L in U
 *                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
 *                    (RWorkspace: need M)
 *
                      CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply right bidiagonalizing vectors in U by Q
 *                    in VT
 *                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
      $                            work( itaup ), vt, ldvt,
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Generate left bidiagonalizing vectors in U
 *                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of A in U and computing right
 *                    singular vectors of A in VT
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
      $                            ldvt, u, ldu, cdum, 1,
      $                            rwork( irwork ), info )
 *
                   END IF
 *
                END IF
 *
             ELSE IF( wntva ) THEN
 *
                IF( wntun ) THEN
 *
 *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
 *                 N right singular vectors to be computed in VT and
 *                 no left singular vectors to be computed
 *
                   IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      ir = 1
                      IF( lwork.GE.wrkbl+lda*m ) THEN
 *
 *                       WORK(IR) is LDA by M
 *
                         ldwrkr = lda
                      ELSE
 *
 *                       WORK(IR) is M by M
 *
                         ldwrkr = m
                      END IF
                      itau = ir + ldwrkr*m
                      iwork = itau + m
 *
 *                    Compute A=L*Q, copying result to VT
 *                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
 *
 *                    Copy L to WORK(IR), zeroing out above it
 *
                      CALL zlacpy( 'L', m, m, a, lda, work( ir ),
      $                            ldwrkr )
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            work( ir+ldwrkr ), ldwrkr )
 *
 *                    Generate Q in VT
 *                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( n, n, m, vt, ldvt, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Bidiagonalize L in WORK(IR)
 *                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
 *                    (RWorkspace: need M)
 *
                      CALL zgebrd( m, m, work( ir ), ldwrkr, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IR)
 *                    (CWorkspace: need   M*M+3*M-1,
 *                                 prefer M*M+2*M+(M-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing right
 *                    singular vectors of L in WORK(IR)
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
      $                            work( ir ), ldwrkr, cdum, 1, cdum, 1,
      $                            rwork( irwork ), info )
 *
 *                    Multiply right singular vectors of L in WORK(IR) by
 *                    Q in VT, storing result in A
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
      $                           ldwrkr, vt, ldvt, czero, a, lda )
 *
 *                    Copy right singular vectors of A from A to VT
 *
                      CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + m
 *
 *                    Compute A=L*Q, copying result to VT
 *                    (CWorkspace: need 2*M, prefer M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
 *
 *                    Generate Q in VT
 *                    (CWorkspace: need M+N, prefer M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( n, n, m, vt, ldvt, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Zero out above L in A
 *
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            a( 1, 2 ), lda )
 *
 *                    Bidiagonalize L in A
 *                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
 *                    (RWorkspace: need M)
 *
                      CALL zgebrd( m, m, a, lda, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply right bidiagonalizing vectors in A by Q
 *                    in VT
 *                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
      $                            work( itaup ), vt, ldvt,
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing right
 *                    singular vectors of A in VT
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
      $                            ldvt, cdum, 1, cdum, 1,
      $                            rwork( irwork ), info )
 *
                   END IF
 *
                ELSE IF( wntuo ) THEN
 *
 *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
 *                 N right singular vectors to be computed in VT and
 *                 M left singular vectors to be overwritten on A
 *
                   IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      iu = 1
                      IF( lwork.GE.wrkbl+2*lda*m ) THEN
 *
 *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
 *
                         ldwrku = lda
                         ir = iu + ldwrku*m
                         ldwrkr = lda
                      ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
 *
 *                       WORK(IU) is LDA by M and WORK(IR) is M by M
 *
                         ldwrku = lda
                         ir = iu + ldwrku*m
                         ldwrkr = m
                      ELSE
 *
 *                       WORK(IU) is M by M and WORK(IR) is M by M
 *
                         ldwrku = m
                         ir = iu + ldwrku*m
                         ldwrkr = m
                      END IF
                      itau = ir + ldwrkr*m
                      iwork = itau + m
 *
 *                    Compute A=L*Q, copying result to VT
 *                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
 *
 *                    Generate Q in VT
 *                    (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( n, n, m, vt, ldvt, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy L to WORK(IU), zeroing out above it
 *
                      CALL zlacpy( 'L', m, m, a, lda, work( iu ),
      $                            ldwrku )
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            work( iu+ldwrku ), ldwrku )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Bidiagonalize L in WORK(IU), copying result to
 *                    WORK(IR)
 *                    (CWorkspace: need   2*M*M+3*M,
 *                                 prefer 2*M*M+2*M+2*M*NB)
 *                    (RWorkspace: need   M)
 *
                      CALL zgebrd( m, m, work( iu ), ldwrku, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
      $                            work( ir ), ldwrkr )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IU)
 *                    (CWorkspace: need   2*M*M+3*M-1,
 *                                 prefer 2*M*M+2*M+(M-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate left bidiagonalizing vectors in WORK(IR)
 *                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
      $                            work( itauq ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of L in WORK(IR) and computing
 *                    right singular vectors of L in WORK(IU)
 *                    (CWorkspace: need 2*M*M)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
      $                            work( iu ), ldwrku, work( ir ),
      $                            ldwrkr, cdum, 1, rwork( irwork ),
      $                            info )
 *
 *                    Multiply right singular vectors of L in WORK(IU) by
 *                    Q in VT, storing result in A
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
      $                           ldwrku, vt, ldvt, czero, a, lda )
 *
 *                    Copy right singular vectors of A from A to VT
 *
                      CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
 *
 *                    Copy left singular vectors of A from WORK(IR) to A
 *
                      CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
      $                            lda )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + m
 *
 *                    Compute A=L*Q, copying result to VT
 *                    (CWorkspace: need 2*M, prefer M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
 *
 *                    Generate Q in VT
 *                    (CWorkspace: need M+N, prefer M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( n, n, m, vt, ldvt, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Zero out above L in A
 *
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            a( 1, 2 ), lda )
 *
 *                    Bidiagonalize L in A
 *                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
 *                    (RWorkspace: need M)
 *
                      CALL zgebrd( m, m, a, lda, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply right bidiagonalizing vectors in A by Q
 *                    in VT
 *                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
      $                            work( itaup ), vt, ldvt,
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Generate left bidiagonalizing vectors in A
 *                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of A in A and computing right
 *                    singular vectors of A in VT
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
      $                            ldvt, a, lda, cdum, 1,
      $                            rwork( irwork ), info )
 *
                   END IF
 *
                ELSE IF( wntuas ) THEN
 *
 *                 Path 9t(N much larger than M, JOBU='S' or 'A',
 *                         JOBVT='A')
 *                 N right singular vectors to be computed in VT and
 *                 M left singular vectors to be computed in U
 *
                   IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
 *
 *                    Sufficient workspace for a fast algorithm
 *
                      iu = 1
                      IF( lwork.GE.wrkbl+lda*m ) THEN
 *
 *                       WORK(IU) is LDA by M
 *
                         ldwrku = lda
                      ELSE
 *
 *                       WORK(IU) is M by M
 *
                         ldwrku = m
                      END IF
                      itau = iu + ldwrku*m
                      iwork = itau + m
 *
 *                    Compute A=L*Q, copying result to VT
 *                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
 *
 *                    Generate Q in VT
 *                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( n, n, m, vt, ldvt, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy L to WORK(IU), zeroing out above it
 *
                      CALL zlacpy( 'L', m, m, a, lda, work( iu ),
      $                            ldwrku )
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            work( iu+ldwrku ), ldwrku )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Bidiagonalize L in WORK(IU), copying result to U
 *                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
 *                    (RWorkspace: need M)
 *
                      CALL zgebrd( m, m, work( iu ), ldwrku, s,
      $                            rwork( ie ), work( itauq ),
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
                      CALL zlacpy( 'L', m, m, work( iu ), ldwrku, u,
      $                            ldu )
 *
 *                    Generate right bidiagonalizing vectors in WORK(IU)
 *                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
      $                            work( itaup ), work( iwork ),
      $                            lwork-iwork+1, ierr )
 *
 *                    Generate left bidiagonalizing vectors in U
 *                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of L in U and computing right
 *                    singular vectors of L in WORK(IU)
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
      $                            work( iu ), ldwrku, u, ldu, cdum, 1,
      $                            rwork( irwork ), info )
 *
 *                    Multiply right singular vectors of L in WORK(IU) by
 *                    Q in VT, storing result in A
 *                    (CWorkspace: need M*M)
 *                    (RWorkspace: 0)
 *
                      CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
      $                           ldwrku, vt, ldvt, czero, a, lda )
 *
 *                    Copy right singular vectors of A from A to VT
 *
                      CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
 *
                   ELSE
 *
 *                    Insufficient workspace for a fast algorithm
 *
                      itau = 1
                      iwork = itau + m
 *
 *                    Compute A=L*Q, copying result to VT
 *                    (CWorkspace: need 2*M, prefer M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zgelqf( m, n, a, lda, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
 *
 *                    Generate Q in VT
 *                    (CWorkspace: need M+N, prefer M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunglq( n, n, m, vt, ldvt, work( itau ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Copy L to U, zeroing out above it
 *
                      CALL zlacpy( 'L', m, m, a, lda, u, ldu )
                      CALL zlaset( 'U', m-1, m-1, czero, czero,
      $                            u( 1, 2 ), ldu )
                      ie = 1
                      itauq = itau
                      itaup = itauq + m
                      iwork = itaup + m
 *
 *                    Bidiagonalize L in U
 *                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
 *                    (RWorkspace: need M)
 *
                      CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
      $                            work( itauq ), work( itaup ),
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Multiply right bidiagonalizing vectors in U by Q
 *                    in VT
 *                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
      $                            work( itaup ), vt, ldvt,
      $                            work( iwork ), lwork-iwork+1, ierr )
 *
 *                    Generate left bidiagonalizing vectors in U
 *                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
 *                    (RWorkspace: 0)
 *
                      CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
      $                            work( iwork ), lwork-iwork+1, ierr )
                      irwork = ie + m
 *
 *                    Perform bidiagonal QR iteration, computing left
 *                    singular vectors of A in U and computing right
 *                    singular vectors of A in VT
 *                    (CWorkspace: 0)
 *                    (RWorkspace: need BDSPAC)
 *
                      CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
      $                            ldvt, u, ldu, cdum, 1,
      $                            rwork( irwork ), info )
 *
                   END IF
 *
                END IF
 *
             END IF
 *
          ELSE
 *
 *           N .LT. MNTHR
 *
 *           Path 10t(N greater than M, but not much larger)
 *           Reduce to bidiagonal form without LQ decomposition
 *
             ie = 1
             itauq = 1
             itaup = itauq + m
             iwork = itaup + m
 *
 *           Bidiagonalize A
 *           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
 *           (RWorkspace: M)
 *
             CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
      $                   work( itaup ), work( iwork ), lwork-iwork+1,
      $                   ierr )
             IF( wntuas ) THEN
 *
 *              If left singular vectors desired in U, copy result to U
 *              and generate left bidiagonalizing vectors in U
 *              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
 *              (RWorkspace: 0)
 *
                CALL zlacpy( 'L', m, m, a, lda, u, ldu )
                CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
      $                      work( iwork ), lwork-iwork+1, ierr )
             END IF
             IF( wntvas ) THEN
 *
 *              If right singular vectors desired in VT, copy result to
 *              VT and generate right bidiagonalizing vectors in VT
 *              (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
 *              (RWorkspace: 0)
 *
                CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
                IF( wntva )
      $            nrvt = n
                IF( wntvs )
      $            nrvt = m
                CALL zungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
      $                      work( iwork ), lwork-iwork+1, ierr )
             END IF
             IF( wntuo ) THEN
 *
 *              If left singular vectors desired in A, generate left
 *              bidiagonalizing vectors in A
 *              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
 *              (RWorkspace: 0)
 *
                CALL zungbr( 'Q', m, m, n, a, lda, work( itauq ),
      $                      work( iwork ), lwork-iwork+1, ierr )
             END IF
             IF( wntvo ) THEN
 *
 *              If right singular vectors desired in A, generate right
 *              bidiagonalizing vectors in A
 *              (CWorkspace: need 3*M, prefer 2*M+M*NB)
 *              (RWorkspace: 0)
 *
                CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
      $                      work( iwork ), lwork-iwork+1, ierr )
             END IF
             irwork = ie + m
             IF( wntuas .OR. wntuo )
      $         nru = m
             IF( wntun )
      $         nru = 0
             IF( wntvas .OR. wntvo )
      $         ncvt = n
             IF( wntvn )
      $         ncvt = 0
             IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
 *
 *              Perform bidiagonal QR iteration, if desired, computing
 *              left singular vectors in U and computing right singular
 *              vectors in VT
 *              (CWorkspace: 0)
 *              (RWorkspace: need BDSPAC)
 *
                CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
      $                      ldvt, u, ldu, cdum, 1, rwork( irwork ),
      $                      info )
             ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
 *
 *              Perform bidiagonal QR iteration, if desired, computing
 *              left singular vectors in U and computing right singular
 *              vectors in A
 *              (CWorkspace: 0)
 *              (RWorkspace: need BDSPAC)
 *
                CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
      $                      lda, u, ldu, cdum, 1, rwork( irwork ),
      $                      info )
             ELSE
 *
 *              Perform bidiagonal QR iteration, if desired, computing
 *              left singular vectors in A and computing right singular
 *              vectors in VT
 *              (CWorkspace: 0)
 *              (RWorkspace: need BDSPAC)
 *
                CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
      $                      ldvt, a, lda, cdum, 1, rwork( irwork ),
      $                      info )
             END IF
 *
          END IF
 *
       END IF
 *
 *     Undo scaling if necessary
 *
       IF( iscl.EQ.1 ) THEN
          IF( anrm.GT.bignum )
      $      CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
      $                   ierr )
          IF( info.NE.0 .AND. anrm.GT.bignum )
      $      CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
      $                   rwork( ie ), minmn, ierr )
          IF( anrm.LT.smlnum )
      $      CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
      $                   ierr )
          IF( info.NE.0 .AND. anrm.LT.smlnum )
      $      CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
      $                   rwork( ie ), minmn, ierr )
       END IF
 *
 *     Return optimal workspace in WORK(1)
 *
       work( 1 ) = maxwrk
 *
       RETURN
 *
 *     End of ZGESVD
 *

Here is the call graph for this function:

Here is the caller graph for this function: