◆ dgesdd()

subroutine dgesdd	(	character	jobz,
		integer	m,
		integer	n,
		double precision, dimension( lda, * )	a,
		integer	lda,
		double precision, dimension( * )	s,
		double precision, dimension( ldu, * )	u,
		integer	ldu,
		double precision, dimension( ldvt, * )	vt,
		integer	ldvt,
		double precision, dimension( * )	work,
		integer	lwork,
		integer, dimension( * )	iwork,
		integer	info )

DGESDD

Download DGESDD + dependencies [TGZ] [ZIP] [TXT]

Purpose:

!>
!> DGESDD computes the singular value decomposition (SVD) of a real
!> M-by-N matrix A, optionally computing the left and right singular
!> vectors.  If singular vectors are desired, it uses a
!> divide-and-conquer algorithm.
!>
!> The SVD is written
!>
!>      A = U * SIGMA * transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
!> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns VT = V**T, not V.
!>
!>

Parameters

[in]	JOBZ	!> JOBZ is CHARACTER1 !> Specifies options for computing all or part of the matrix U: !> = 'A': all M columns of U and all N rows of VT are !> returned in the arrays U and VT; !> = 'S': the first min(M,N) columns of U and the first !> min(M,N) rows of VT are returned in the arrays U !> and VT; !> = 'O': If M >= N, the first N columns of U are overwritten !> on the array A and all rows of VT are returned in !> the array VT; !> otherwise, all columns of U are returned in the !> array U and the first M rows of VT are overwritten !> in the array A; !> = 'N': no columns of U or rows of V*T are computed. !>
[in]	M	!> M is INTEGER !> The number of rows of the input matrix A. M >= 0. !>
[in]	N	!> N is INTEGER !> The number of columns of the input matrix A. N >= 0. !>
[in,out]	A	!> A is DOUBLE PRECISION array, dimension (LDA,N) !> On entry, the M-by-N matrix A. !> On exit, !> if JOBZ = 'O', A is overwritten with the first N columns !> of U (the left singular vectors, stored !> columnwise) if M >= N; !> A is overwritten with the first M rows !> of V**T (the right singular vectors, stored !> rowwise) otherwise. !> if JOBZ .ne. 'O', the contents of A are destroyed. !>
[in]	LDA	!> LDA is INTEGER !> The leading dimension of the array A. LDA >= max(1,M). !>
[out]	S	!> S is DOUBLE PRECISION array, dimension (min(M,N)) !> The singular values of A, sorted so that S(i) >= S(i+1). !>
[out]	U	!> U is DOUBLE PRECISION array, dimension (LDU,UCOL) !> UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N; !> UCOL = min(M,N) if JOBZ = 'S'. !> If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M !> orthogonal matrix U; !> if JOBZ = 'S', U contains the first min(M,N) columns of U !> (the left singular vectors, stored columnwise); !> if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced. !>
[in]	LDU	!> LDU is INTEGER !> The leading dimension of the array U. LDU >= 1; if !> JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M. !>
[out]	VT	!> VT is DOUBLE PRECISION array, dimension (LDVT,N) !> If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the !> N-by-N orthogonal matrix VT; !> if JOBZ = 'S', VT contains the first min(M,N) rows of !> VT (the right singular vectors, stored rowwise); !> if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced. !>
[in]	LDVT	!> LDVT is INTEGER !> The leading dimension of the array VT. LDVT >= 1; !> if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N; !> if JOBZ = 'S', LDVT >= min(M,N). !>
[out]	WORK	!> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) !> On exit, if INFO = 0, WORK(1) returns the optimal LWORK; !>
[in]	LWORK	!> LWORK is INTEGER !> The dimension of the array WORK. LWORK >= 1. !> If LWORK = -1, a workspace query is assumed. The optimal !> size for the WORK array is calculated and stored in WORK(1), !> and no other work except argument checking is performed. !> !> Let mx = max(M,N) and mn = min(M,N). !> If JOBZ = 'N', LWORK >= 3mn + max( mx, 7mn ). !> If JOBZ = 'O', LWORK >= 3mn + max( mx, 5mnmn + 4mn ). !> If JOBZ = 'S', LWORK >= 4mnmn + 7mn. !> If JOBZ = 'A', LWORK >= 4mnmn + 6mn + mx. !> These are not tight minimums in all cases; see comments inside code. !> For good performance, LWORK should generally be larger; !> a query is recommended. !>
[out]	IWORK	!> IWORK is INTEGER array, dimension (8*min(M,N)) !>
[out]	INFO	!> INFO is INTEGER !> < 0: if INFO = -i, the i-th argument had an illegal value. !> = -4: if A had a NAN entry. !> > 0: DBDSDC did not converge, updating process failed. !> = 0: successful exit. !>

Author: Univ. of Tennessee; Univ. of California Berkeley; Univ. of Colorado Denver; NAG Ltd.

Contributors:: Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA

Definition at line 209 of file dgesdd.f.

      implicit none
*
*  -- LAPACK driver routine --
*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
*     .. Scalar Arguments ..
      CHARACTER          JOBZ
      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
*     ..
*     .. Array Arguments ..
      INTEGER            IWORK( * )
      DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
     $                   VT( LDVT, * ), WORK( * )
*     ..
*
*  =====================================================================
*
*     .. Parameters ..
      DOUBLE PRECISION   ZERO, ONE
      parameter( zero = 0.0d0, one = 1.0d0 )
*     ..
*     .. Local Scalars ..
      LOGICAL            LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
     $                   IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
     $                   LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
     $                   MNTHR, NWORK, WRKBL
      INTEGER            LWORK_DGEBRD_MN, LWORK_DGEBRD_MM,
     $                   LWORK_DGEBRD_NN, LWORK_DGELQF_MN,
     $                   LWORK_DGEQRF_MN,
     $                   LWORK_DORGBR_P_MM, LWORK_DORGBR_Q_NN,
     $                   LWORK_DORGLQ_MN, LWORK_DORGLQ_NN,
     $                   LWORK_DORGQR_MM, LWORK_DORGQR_MN,
     $                   LWORK_DORMBR_PRT_MM, LWORK_DORMBR_QLN_MM,
     $                   LWORK_DORMBR_PRT_MN, LWORK_DORMBR_QLN_MN,
     $                   LWORK_DORMBR_PRT_NN, LWORK_DORMBR_QLN_NN
      DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
*     ..
*     .. Local Arrays ..
      INTEGER            IDUM( 1 )
      DOUBLE PRECISION   DUM( 1 )
*     ..
*     .. External Subroutines ..
      EXTERNAL           dbdsdc, dgebrd, dgelqf, dgemm, dgeqrf,
     $                   dlacpy,
     $                   dlascl, dlaset, dorgbr, dorglq, dorgqr, dormbr,
     $                   xerbla
*     ..
*     .. External Functions ..
      LOGICAL            LSAME, DISNAN
      DOUBLE PRECISION   DLAMCH, DLANGE, DROUNDUP_LWORK
      EXTERNAL           dlamch, dlange, lsame, disnan, 
     $                   droundup_lwork
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          int, max, min, sqrt
*     ..
*     .. Executable Statements ..
*
*     Test the input arguments
*
      info   = 0
      minmn  = min( m, n )
      wntqa  = lsame( jobz, 'A' )
      wntqs  = lsame( jobz, 'S' )
      wntqas = wntqa .OR. wntqs
      wntqo  = lsame( jobz, 'O' )
      wntqn  = lsame( jobz, 'N' )
      lquery = ( lwork.EQ.-1 )
*
      IF( .NOT.( wntqa .OR. wntqs .OR. wntqo .OR. wntqn ) ) THEN
         info = -1
      ELSE IF( m.LT.0 ) THEN
         info = -2
      ELSE IF( n.LT.0 ) THEN
         info = -3
      ELSE IF( lda.LT.max( 1, m ) ) THEN
         info = -5
      ELSE IF( ldu.LT.1 .OR. ( wntqas .AND. ldu.LT.m ) .OR.
     $         ( wntqo .AND. m.LT.n .AND. ldu.LT.m ) ) THEN
         info = -8
      ELSE IF( ldvt.LT.1 .OR. ( wntqa .AND. ldvt.LT.n ) .OR.
     $         ( wntqs .AND. ldvt.LT.minmn ) .OR.
     $         ( wntqo .AND. m.GE.n .AND. ldvt.LT.n ) ) THEN
         info = -10
      END IF
*
*     Compute workspace
*       Note: Comments in the code beginning "Workspace:" describe the
*       minimal amount of workspace allocated at that point in the code,
*       as well as the preferred amount for good performance.
*       NB refers to the optimal block size for the immediately
*       following subroutine, as returned by ILAENV.
*
      IF( info.EQ.0 ) THEN
         minwrk = 1
         maxwrk = 1
         bdspac = 0
         mnthr  = int( minmn*11.0d0 / 6.0d0 )
         IF( m.GE.n .AND. minmn.GT.0 ) THEN
*
*           Compute space needed for DBDSDC
*
            IF( wntqn ) THEN
*              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
*              keep 7*N for backwards compatibility.
               bdspac = 7*n
            ELSE
               bdspac = 3*n*n + 4*n
            END IF
*
*           Compute space preferred for each routine
            CALL dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
     $                   dum(1), dum(1), -1, ierr )
            lwork_dgebrd_mn = int( dum(1) )
*
            CALL dgebrd( n, n, dum(1), n, dum(1), dum(1), dum(1),
     $                   dum(1), dum(1), -1, ierr )
            lwork_dgebrd_nn = int( dum(1) )
*
            CALL dgeqrf( m, n, dum(1), m, dum(1), dum(1), -1, ierr )
            lwork_dgeqrf_mn = int( dum(1) )
*
            CALL dorgbr( 'Q', n, n, n, dum(1), n, dum(1), dum(1), -1,
     $                   ierr )
            lwork_dorgbr_q_nn = int( dum(1) )
*
            CALL dorgqr( m, m, n, dum(1), m, dum(1), dum(1), -1,
     $                   ierr )
            lwork_dorgqr_mm = int( dum(1) )
*
            CALL dorgqr( m, n, n, dum(1), m, dum(1), dum(1), -1,
     $                   ierr )
            lwork_dorgqr_mn = int( dum(1) )
*
            CALL dormbr( 'P', 'R', 'T', n, n, n, dum(1), n,
     $                   dum(1), dum(1), n, dum(1), -1, ierr )
            lwork_dormbr_prt_nn = int( dum(1) )
*
            CALL dormbr( 'Q', 'L', 'N', n, n, n, dum(1), n,
     $                   dum(1), dum(1), n, dum(1), -1, ierr )
            lwork_dormbr_qln_nn = int( dum(1) )
*
            CALL dormbr( 'Q', 'L', 'N', m, n, n, dum(1), m,
     $                   dum(1), dum(1), m, dum(1), -1, ierr )
            lwork_dormbr_qln_mn = int( dum(1) )
*
            CALL dormbr( 'Q', 'L', 'N', m, m, n, dum(1), m,
     $                   dum(1), dum(1), m, dum(1), -1, ierr )
            lwork_dormbr_qln_mm = int( dum(1) )
*
            IF( m.GE.mnthr ) THEN
               IF( wntqn ) THEN
*
*                 Path 1 (M >> N, JOBZ='N')
*
                  wrkbl = n + lwork_dgeqrf_mn
                  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
                  maxwrk = max( wrkbl, bdspac + n )
                  minwrk = bdspac + n
               ELSE IF( wntqo ) THEN
*
*                 Path 2 (M >> N, JOBZ='O')
*
                  wrkbl = n + lwork_dgeqrf_mn
                  wrkbl = max( wrkbl,   n + lwork_dorgqr_mn )
                  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
                  wrkbl = max( wrkbl, 3*n + bdspac )
                  maxwrk = wrkbl + 2*n*n
                  minwrk = bdspac + 2*n*n + 3*n
               ELSE IF( wntqs ) THEN
*
*                 Path 3 (M >> N, JOBZ='S')
*
                  wrkbl = n + lwork_dgeqrf_mn
                  wrkbl = max( wrkbl,   n + lwork_dorgqr_mn )
                  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
                  wrkbl = max( wrkbl, 3*n + bdspac )
                  maxwrk = wrkbl + n*n
                  minwrk = bdspac + n*n + 3*n
               ELSE IF( wntqa ) THEN
*
*                 Path 4 (M >> N, JOBZ='A')
*
                  wrkbl = n + lwork_dgeqrf_mn
                  wrkbl = max( wrkbl,   n + lwork_dorgqr_mm )
                  wrkbl = max( wrkbl, 3*n + lwork_dgebrd_nn )
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_nn )
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
                  wrkbl = max( wrkbl, 3*n + bdspac )
                  maxwrk = wrkbl + n*n
                  minwrk = n*n + max( 3*n + bdspac, n + m )
               END IF
            ELSE
*
*              Path 5 (M >= N, but not much larger)
*
               wrkbl = 3*n + lwork_dgebrd_mn
               IF( wntqn ) THEN
*                 Path 5n (M >= N, jobz='N')
                  maxwrk = max( wrkbl, 3*n + bdspac )
                  minwrk = 3*n + max( m, bdspac )
               ELSE IF( wntqo ) THEN
*                 Path 5o (M >= N, jobz='O')
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn )
                  wrkbl = max( wrkbl, 3*n + bdspac )
                  maxwrk = wrkbl + m*n
                  minwrk = 3*n + max( m, n*n + bdspac )
               ELSE IF( wntqs ) THEN
*                 Path 5s (M >= N, jobz='S')
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mn )
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
                  maxwrk = max( wrkbl, 3*n + bdspac )
                  minwrk = 3*n + max( m, bdspac )
               ELSE IF( wntqa ) THEN
*                 Path 5a (M >= N, jobz='A')
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_qln_mm )
                  wrkbl = max( wrkbl, 3*n + lwork_dormbr_prt_nn )
                  maxwrk = max( wrkbl, 3*n + bdspac )
                  minwrk = 3*n + max( m, bdspac )
               END IF
            END IF
         ELSE IF( minmn.GT.0 ) THEN
*
*           Compute space needed for DBDSDC
*
            IF( wntqn ) THEN
*              dbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
*              keep 7*N for backwards compatibility.
               bdspac = 7*m
            ELSE
               bdspac = 3*m*m + 4*m
            END IF
*
*           Compute space preferred for each routine
            CALL dgebrd( m, n, dum(1), m, dum(1), dum(1), dum(1),
     $                   dum(1), dum(1), -1, ierr )
            lwork_dgebrd_mn = int( dum(1) )
*
            CALL dgebrd( m, m, a, m, s, dum(1), dum(1),
     $                   dum(1), dum(1), -1, ierr )
            lwork_dgebrd_mm = int( dum(1) )
*
            CALL dgelqf( m, n, a, m, dum(1), dum(1), -1, ierr )
            lwork_dgelqf_mn = int( dum(1) )
*
            CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1,
     $                   ierr )
            lwork_dorglq_nn = int( dum(1) )
*
            CALL dorglq( m, n, m, a, m, dum(1), dum(1), -1, ierr )
            lwork_dorglq_mn = int( dum(1) )
*
            CALL dorgbr( 'P', m, m, m, a, n, dum(1), dum(1), -1,
     $                   ierr )
            lwork_dorgbr_p_mm = int( dum(1) )
*
            CALL dormbr( 'P', 'R', 'T', m, m, m, dum(1), m,
     $                   dum(1), dum(1), m, dum(1), -1, ierr )
            lwork_dormbr_prt_mm = int( dum(1) )
*
            CALL dormbr( 'P', 'R', 'T', m, n, m, dum(1), m,
     $                   dum(1), dum(1), m, dum(1), -1, ierr )
            lwork_dormbr_prt_mn = int( dum(1) )
*
            CALL dormbr( 'P', 'R', 'T', n, n, m, dum(1), n,
     $                   dum(1), dum(1), n, dum(1), -1, ierr )
            lwork_dormbr_prt_nn = int( dum(1) )
*
            CALL dormbr( 'Q', 'L', 'N', m, m, m, dum(1), m,
     $                   dum(1), dum(1), m, dum(1), -1, ierr )
            lwork_dormbr_qln_mm = int( dum(1) )
*
            IF( n.GE.mnthr ) THEN
               IF( wntqn ) THEN
*
*                 Path 1t (N >> M, JOBZ='N')
*
                  wrkbl = m + lwork_dgelqf_mn
                  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
                  maxwrk = max( wrkbl, bdspac + m )
                  minwrk = bdspac + m
               ELSE IF( wntqo ) THEN
*
*                 Path 2t (N >> M, JOBZ='O')
*
                  wrkbl = m + lwork_dgelqf_mn
                  wrkbl = max( wrkbl,   m + lwork_dorglq_mn )
                  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )
                  wrkbl = max( wrkbl, 3*m + bdspac )
                  maxwrk = wrkbl + 2*m*m
                  minwrk = bdspac + 2*m*m + 3*m
               ELSE IF( wntqs ) THEN
*
*                 Path 3t (N >> M, JOBZ='S')
*
                  wrkbl = m + lwork_dgelqf_mn
                  wrkbl = max( wrkbl,   m + lwork_dorglq_mn )
                  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )
                  wrkbl = max( wrkbl, 3*m + bdspac )
                  maxwrk = wrkbl + m*m
                  minwrk = bdspac + m*m + 3*m
               ELSE IF( wntqa ) THEN
*
*                 Path 4t (N >> M, JOBZ='A')
*
                  wrkbl = m + lwork_dgelqf_mn
                  wrkbl = max( wrkbl,   m + lwork_dorglq_nn )
                  wrkbl = max( wrkbl, 3*m + lwork_dgebrd_mm )
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mm )
                  wrkbl = max( wrkbl, 3*m + bdspac )
                  maxwrk = wrkbl + m*m
                  minwrk = m*m + max( 3*m + bdspac, m + n )
               END IF
            ELSE
*
*              Path 5t (N > M, but not much larger)
*
               wrkbl = 3*m + lwork_dgebrd_mn
               IF( wntqn ) THEN
*                 Path 5tn (N > M, jobz='N')
                  maxwrk = max( wrkbl, 3*m + bdspac )
                  minwrk = 3*m + max( n, bdspac )
               ELSE IF( wntqo ) THEN
*                 Path 5to (N > M, jobz='O')
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn )
                  wrkbl = max( wrkbl, 3*m + bdspac )
                  maxwrk = wrkbl + m*n
                  minwrk = 3*m + max( n, m*m + bdspac )
               ELSE IF( wntqs ) THEN
*                 Path 5ts (N > M, jobz='S')
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_mn )
                  maxwrk = max( wrkbl, 3*m + bdspac )
                  minwrk = 3*m + max( n, bdspac )
               ELSE IF( wntqa ) THEN
*                 Path 5ta (N > M, jobz='A')
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_qln_mm )
                  wrkbl = max( wrkbl, 3*m + lwork_dormbr_prt_nn )
                  maxwrk = max( wrkbl, 3*m + bdspac )
                  minwrk = 3*m + max( n, bdspac )
               END IF
            END IF
         END IF
 
         maxwrk = max( maxwrk, minwrk )
         work( 1 ) = droundup_lwork( maxwrk )
*
         IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
            info = -12
         END IF
      END IF
*
      IF( info.NE.0 ) THEN
         CALL xerbla( 'DGESDD', -info )
         RETURN
      ELSE IF( lquery ) THEN
         RETURN
      END IF
*
*     Quick return if possible
*
      IF( m.EQ.0 .OR. n.EQ.0 ) THEN
         RETURN
      END IF
*
*     Get machine constants
*
      eps = dlamch( 'P' )
      smlnum = sqrt( dlamch( 'S' ) ) / eps
      bignum = one / smlnum
*
*     Scale A if max element outside range [SMLNUM,BIGNUM]
*
      anrm = dlange( 'M', m, n, a, lda, dum )
      IF( disnan( anrm ) ) THEN
          info = -4
          RETURN
      END IF
      iscl = 0
      IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
         iscl = 1
         CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
      ELSE IF( anrm.GT.bignum ) THEN
         iscl = 1
         CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
      END IF
*
      IF( m.GE.n ) THEN
*
*        A has at least as many rows as columns. If A has sufficiently
*        more rows than columns, first reduce using the QR
*        decomposition (if sufficient workspace available)
*
         IF( m.GE.mnthr ) THEN
*
            IF( wntqn ) THEN
*
*              Path 1 (M >> N, JOBZ='N')
*              No singular vectors to be computed
*
               itau = 1
               nwork = itau + n
*
*              Compute A=Q*R
*              Workspace: need   N [tau] + N    [work]
*              Workspace: prefer N [tau] + N*NB [work]
*
               CALL dgeqrf( m, n, a, lda, work( itau ),
     $                      work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Zero out below R
*
               CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
     $                      lda )
               ie = 1
               itauq = ie + n
               itaup = itauq + n
               nwork = itaup + n
*
*              Bidiagonalize R in A
*              Workspace: need   3*N [e, tauq, taup] + N      [work]
*              Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]
*
               CALL dgebrd( n, n, a, lda, s, work( ie ),
     $                      work( itauq ),
     $                      work( itaup ), work( nwork ), lwork-nwork+1,
     $                      ierr )
               nwork = ie + n
*
*              Perform bidiagonal SVD, computing singular values only
*              Workspace: need   N [e] + BDSPAC
*
               CALL dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum,
     $                      1,
     $                      dum, idum, work( nwork ), iwork, info )
*
            ELSE IF( wntqo ) THEN
*
*              Path 2 (M >> N, JOBZ = 'O')
*              N left singular vectors to be overwritten on A and
*              N right singular vectors to be computed in VT
*
               ir = 1
*
*              WORK(IR) is LDWRKR by N
*
               IF( lwork .GE. lda*n + n*n + 3*n + bdspac ) THEN
                  ldwrkr = lda
               ELSE
                  ldwrkr = ( lwork - n*n - 3*n - bdspac ) / n
               END IF
               itau = ir + ldwrkr*n
               nwork = itau + n
*
*              Compute A=Q*R
*              Workspace: need   N*N [R] + N [tau] + N    [work]
*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
*
               CALL dgeqrf( m, n, a, lda, work( itau ),
     $                      work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Copy R to WORK(IR), zeroing out below it
*
               CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
               CALL dlaset( 'L', n - 1, n - 1, zero, zero,
     $                      work(ir+1),
     $                      ldwrkr )
*
*              Generate Q in A
*              Workspace: need   N*N [R] + N [tau] + N    [work]
*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
*
               CALL dorgqr( m, n, n, a, lda, work( itau ),
     $                      work( nwork ), lwork - nwork + 1, ierr )
               ie = itau
               itauq = ie + n
               itaup = itauq + n
               nwork = itaup + n
*
*              Bidiagonalize R in WORK(IR)
*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work]
*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
*
               CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
     $                      work( itauq ), work( itaup ), work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              WORK(IU) is N by N
*
               iu = nwork
               nwork = iu + n*n
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in WORK(IU) and computing right
*              singular vectors of bidiagonal matrix in VT
*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC
*
               CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),
     $                      n,
     $                      vt, ldvt, dum, idum, work( nwork ), iwork,
     $                      info )
*
*              Overwrite WORK(IU) by left singular vectors of R
*              and VT by right singular vectors of R
*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N    [work]
*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', n, n, n, work( ir ),
     $                      ldwrkr,
     $                      work( itauq ), work( iu ), n, work( nwork ),
     $                      lwork - nwork + 1, ierr )
               CALL dormbr( 'P', 'R', 'T', n, n, n, work( ir ),
     $                      ldwrkr,
     $                      work( itaup ), vt, ldvt, work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Multiply Q in A by left singular vectors of R in
*              WORK(IU), storing result in WORK(IR) and copying to A
*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N*N [U]
*              Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]
*
               DO 10 i = 1, m, ldwrkr
                  chunk = min( m - i + 1, ldwrkr )
                  CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
     $                        lda, work( iu ), n, zero, work( ir ),
     $                        ldwrkr )
                  CALL dlacpy( 'F', chunk, n, work( ir ), ldwrkr,
     $                         a( i, 1 ), lda )
   10          CONTINUE
*
            ELSE IF( wntqs ) THEN
*
*              Path 3 (M >> N, JOBZ='S')
*              N left singular vectors to be computed in U and
*              N right singular vectors to be computed in VT
*
               ir = 1
*
*              WORK(IR) is N by N
*
               ldwrkr = n
               itau = ir + ldwrkr*n
               nwork = itau + n
*
*              Compute A=Q*R
*              Workspace: need   N*N [R] + N [tau] + N    [work]
*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
*
               CALL dgeqrf( m, n, a, lda, work( itau ),
     $                      work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Copy R to WORK(IR), zeroing out below it
*
               CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
               CALL dlaset( 'L', n - 1, n - 1, zero, zero,
     $                      work(ir+1),
     $                      ldwrkr )
*
*              Generate Q in A
*              Workspace: need   N*N [R] + N [tau] + N    [work]
*              Workspace: prefer N*N [R] + N [tau] + N*NB [work]
*
               CALL dorgqr( m, n, n, a, lda, work( itau ),
     $                      work( nwork ), lwork - nwork + 1, ierr )
               ie = itau
               itauq = ie + n
               itaup = itauq + n
               nwork = itaup + n
*
*              Bidiagonalize R in WORK(IR)
*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N      [work]
*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
*
               CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
     $                      work( itauq ), work( itaup ), work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagoal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + BDSPAC
*
               CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
     $                      ldvt, dum, idum, work( nwork ), iwork,
     $                      info )
*
*              Overwrite U by left singular vectors of R and VT
*              by right singular vectors of R
*              Workspace: need   N*N [R] + 3*N [e, tauq, taup] + N    [work]
*              Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', n, n, n, work( ir ),
     $                      ldwrkr,
     $                      work( itauq ), u, ldu, work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
               CALL dormbr( 'P', 'R', 'T', n, n, n, work( ir ),
     $                      ldwrkr,
     $                      work( itaup ), vt, ldvt, work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Multiply Q in A by left singular vectors of R in
*              WORK(IR), storing result in U
*              Workspace: need   N*N [R]
*
               CALL dlacpy( 'F', n, n, u, ldu, work( ir ), ldwrkr )
               CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
     $                     work( ir ),
     $                     ldwrkr, zero, u, ldu )
*
            ELSE IF( wntqa ) THEN
*
*              Path 4 (M >> N, JOBZ='A')
*              M left singular vectors to be computed in U and
*              N right singular vectors to be computed in VT
*
               iu = 1
*
*              WORK(IU) is N by N
*
               ldwrku = n
               itau = iu + ldwrku*n
               nwork = itau + n
*
*              Compute A=Q*R, copying result to U
*              Workspace: need   N*N [U] + N [tau] + N    [work]
*              Workspace: prefer N*N [U] + N [tau] + N*NB [work]
*
               CALL dgeqrf( m, n, a, lda, work( itau ),
     $                      work( nwork ),
     $                      lwork - nwork + 1, ierr )
               CALL dlacpy( 'L', m, n, a, lda, u, ldu )
*
*              Generate Q in U
*              Workspace: need   N*N [U] + N [tau] + M    [work]
*              Workspace: prefer N*N [U] + N [tau] + M*NB [work]
               CALL dorgqr( m, m, n, u, ldu, work( itau ),
     $                      work( nwork ), lwork - nwork + 1, ierr )
*
*              Produce R in A, zeroing out other entries
*
               CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
     $                      lda )
               ie = itau
               itauq = ie + n
               itaup = itauq + n
               nwork = itaup + n
*
*              Bidiagonalize R in A
*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N      [work]
*              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]
*
               CALL dgebrd( n, n, a, lda, s, work( ie ),
     $                      work( itauq ),
     $                      work( itaup ), work( nwork ), lwork-nwork+1,
     $                      ierr )
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in WORK(IU) and computing right
*              singular vectors of bidiagonal matrix in VT
*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + BDSPAC
*
               CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),
     $                      n,
     $                      vt, ldvt, dum, idum, work( nwork ), iwork,
     $                      info )
*
*              Overwrite WORK(IU) by left singular vectors of R and VT
*              by right singular vectors of R
*              Workspace: need   N*N [U] + 3*N [e, tauq, taup] + N    [work]
*              Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', n, n, n, a, lda,
     $                      work( itauq ), work( iu ), ldwrku,
     $                      work( nwork ), lwork - nwork + 1, ierr )
               CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,
     $                      work( itaup ), vt, ldvt, work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Multiply Q in U by left singular vectors of R in
*              WORK(IU), storing result in A
*              Workspace: need   N*N [U]
*
               CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
     $                     work( iu ),
     $                     ldwrku, zero, a, lda )
*
*              Copy left singular vectors of A from A to U
*
               CALL dlacpy( 'F', m, n, a, lda, u, ldu )
*
            END IF
*
         ELSE
*
*           M .LT. MNTHR
*
*           Path 5 (M >= N, but not much larger)
*           Reduce to bidiagonal form without QR decomposition
*
            ie = 1
            itauq = ie + n
            itaup = itauq + n
            nwork = itaup + n
*
*           Bidiagonalize A
*           Workspace: need   3*N [e, tauq, taup] + M        [work]
*           Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]
*
            CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
     $                   work( itaup ), work( nwork ), lwork-nwork+1,
     $                   ierr )
            IF( wntqn ) THEN
*
*              Path 5n (M >= N, JOBZ='N')
*              Perform bidiagonal SVD, only computing singular values
*              Workspace: need   3*N [e, tauq, taup] + BDSPAC
*
               CALL dbdsdc( 'U', 'N', n, s, work( ie ), dum, 1, dum,
     $                      1,
     $                      dum, idum, work( nwork ), iwork, info )
            ELSE IF( wntqo ) THEN
*              Path 5o (M >= N, JOBZ='O')
               iu = nwork
               IF( lwork .GE. m*n + 3*n + bdspac ) THEN
*
*                 WORK( IU ) is M by N
*
                  ldwrku = m
                  nwork = iu + ldwrku*n
                  CALL dlaset( 'F', m, n, zero, zero, work( iu ),
     $                         ldwrku )
*                 IR is unused; silence compile warnings
                  ir = -1
               ELSE
*
*                 WORK( IU ) is N by N
*
                  ldwrku = n
                  nwork = iu + ldwrku*n
*
*                 WORK(IR) is LDWRKR by N
*
                  ir = nwork
                  ldwrkr = ( lwork - n*n - 3*n ) / n
               END IF
               nwork = iu + ldwrku*n
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in WORK(IU) and computing right
*              singular vectors of bidiagonal matrix in VT
*              Workspace: need   3*N [e, tauq, taup] + N*N [U] + BDSPAC
*
               CALL dbdsdc( 'U', 'I', n, s, work( ie ), work( iu ),
     $                      ldwrku, vt, ldvt, dum, idum, work( nwork ),
     $                      iwork, info )
*
*              Overwrite VT by right singular vectors of A
*              Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work]
*              Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
*
               CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,
     $                      work( itaup ), vt, ldvt, work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
               IF( lwork .GE. m*n + 3*n + bdspac ) THEN
*
*                 Path 5o-fast
*                 Overwrite WORK(IU) by left singular vectors of A
*                 Workspace: need   3*N [e, tauq, taup] + M*N [U] + N    [work]
*                 Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]
*
                  CALL dormbr( 'Q', 'L', 'N', m, n, n, a, lda,
     $                         work( itauq ), work( iu ), ldwrku,
     $                         work( nwork ), lwork - nwork + 1, ierr )
*
*                 Copy left singular vectors of A from WORK(IU) to A
*
                  CALL dlacpy( 'F', m, n, work( iu ), ldwrku, a,
     $                         lda )
               ELSE
*
*                 Path 5o-slow
*                 Generate Q in A
*                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + N    [work]
*                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
*
                  CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
     $                         work( nwork ), lwork - nwork + 1, ierr )
*
*                 Multiply Q in A by left singular vectors of
*                 bidiagonal matrix in WORK(IU), storing result in
*                 WORK(IR) and copying to A
*                 Workspace: need   3*N [e, tauq, taup] + N*N [U] + NB*N [R]
*                 Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N  [R]
*
                  DO 20 i = 1, m, ldwrkr
                     chunk = min( m - i + 1, ldwrkr )
                     CALL dgemm( 'N', 'N', chunk, n, n, one, a( i,
     $                           1 ),
     $                           lda, work( iu ), ldwrku, zero,
     $                           work( ir ), ldwrkr )
                     CALL dlacpy( 'F', chunk, n, work( ir ), ldwrkr,
     $                            a( i, 1 ), lda )
   20             CONTINUE
               END IF
*
            ELSE IF( wntqs ) THEN
*
*              Path 5s (M >= N, JOBZ='S')
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              Workspace: need   3*N [e, tauq, taup] + BDSPAC
*
               CALL dlaset( 'F', m, n, zero, zero, u, ldu )
               CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
     $                      ldvt, dum, idum, work( nwork ), iwork,
     $                      info )
*
*              Overwrite U by left singular vectors of A and VT
*              by right singular vectors of A
*              Workspace: need   3*N [e, tauq, taup] + N    [work]
*              Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', m, n, n, a, lda,
     $                      work( itauq ), u, ldu, work( nwork ),
     $                      lwork - nwork + 1, ierr )
               CALL dormbr( 'P', 'R', 'T', n, n, n, a, lda,
     $                      work( itaup ), vt, ldvt, work( nwork ),
     $                      lwork - nwork + 1, ierr )
            ELSE IF( wntqa ) THEN
*
*              Path 5a (M >= N, JOBZ='A')
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              Workspace: need   3*N [e, tauq, taup] + BDSPAC
*
               CALL dlaset( 'F', m, m, zero, zero, u, ldu )
               CALL dbdsdc( 'U', 'I', n, s, work( ie ), u, ldu, vt,
     $                      ldvt, dum, idum, work( nwork ), iwork,
     $                      info )
*
*              Set the right corner of U to identity matrix
*
               IF( m.GT.n ) THEN
                  CALL dlaset( 'F', m - n, m - n, zero, one, u(n+1,
     $                         n+1),
     $                         ldu )
               END IF
*
*              Overwrite U by left singular vectors of A and VT
*              by right singular vectors of A
*              Workspace: need   3*N [e, tauq, taup] + M    [work]
*              Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
     $                      work( itauq ), u, ldu, work( nwork ),
     $                      lwork - nwork + 1, ierr )
               CALL dormbr( 'P', 'R', 'T', n, n, m, a, lda,
     $                      work( itaup ), vt, ldvt, work( nwork ),
     $                      lwork - nwork + 1, ierr )
            END IF
*
         END IF
*
      ELSE
*
*        A has more columns than rows. If A has sufficiently more
*        columns than rows, first reduce using the LQ decomposition (if
*        sufficient workspace available)
*
         IF( n.GE.mnthr ) THEN
*
            IF( wntqn ) THEN
*
*              Path 1t (N >> M, JOBZ='N')
*              No singular vectors to be computed
*
               itau = 1
               nwork = itau + m
*
*              Compute A=L*Q
*              Workspace: need   M [tau] + M [work]
*              Workspace: prefer M [tau] + M*NB [work]
*
               CALL dgelqf( m, n, a, lda, work( itau ),
     $                      work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Zero out above L
*
               CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
     $                      lda )
               ie = 1
               itauq = ie + m
               itaup = itauq + m
               nwork = itaup + m
*
*              Bidiagonalize L in A
*              Workspace: need   3*M [e, tauq, taup] + M      [work]
*              Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]
*
               CALL dgebrd( m, m, a, lda, s, work( ie ),
     $                      work( itauq ),
     $                      work( itaup ), work( nwork ), lwork-nwork+1,
     $                      ierr )
               nwork = ie + m
*
*              Perform bidiagonal SVD, computing singular values only
*              Workspace: need   M [e] + BDSPAC
*
               CALL dbdsdc( 'U', 'N', m, s, work( ie ), dum, 1, dum,
     $                      1,
     $                      dum, idum, work( nwork ), iwork, info )
*
            ELSE IF( wntqo ) THEN
*
*              Path 2t (N >> M, JOBZ='O')
*              M right singular vectors to be overwritten on A and
*              M left singular vectors to be computed in U
*
               ivt = 1
*
*              WORK(IVT) is M by M
*              WORK(IL)  is M by M; it is later resized to M by chunk for gemm
*
               il = ivt + m*m
               IF( lwork .GE. m*n + m*m + 3*m + bdspac ) THEN
                  ldwrkl = m
                  chunk = n
               ELSE
                  ldwrkl = m
                  chunk = ( lwork - m*m ) / m
               END IF
               itau = il + ldwrkl*m
               nwork = itau + m
*
*              Compute A=L*Q
*              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]
*              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
*
               CALL dgelqf( m, n, a, lda, work( itau ),
     $                      work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Copy L to WORK(IL), zeroing about above it
*
               CALL dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
               CALL dlaset( 'U', m - 1, m - 1, zero, zero,
     $                      work( il + ldwrkl ), ldwrkl )
*
*              Generate Q in A
*              Workspace: need   M*M [VT] + M*M [L] + M [tau] + M    [work]
*              Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
*
               CALL dorglq( m, n, m, a, lda, work( itau ),
     $                      work( nwork ), lwork - nwork + 1, ierr )
               ie = itau
               itauq = ie + m
               itaup = itauq + m
               nwork = itaup + m
*
*              Bidiagonalize L in WORK(IL)
*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M      [work]
*              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
*
               CALL dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
     $                      work( itauq ), work( itaup ), work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U, and computing right singular
*              vectors of bidiagonal matrix in WORK(IVT)
*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC
*
               CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
     $                      work( ivt ), m, dum, idum, work( nwork ),
     $                      iwork, info )
*
*              Overwrite U by left singular vectors of L and WORK(IVT)
*              by right singular vectors of L
*              Workspace: need   M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M    [work]
*              Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', m, m, m, work( il ),
     $                      ldwrkl,
     $                      work( itauq ), u, ldu, work( nwork ),
     $                      lwork - nwork + 1, ierr )
               CALL dormbr( 'P', 'R', 'T', m, m, m, work( il ),
     $                      ldwrkl,
     $                      work( itaup ), work( ivt ), m,
     $                      work( nwork ), lwork - nwork + 1, ierr )
*
*              Multiply right singular vectors of L in WORK(IVT) by Q
*              in A, storing result in WORK(IL) and copying to A
*              Workspace: need   M*M [VT] + M*M [L]
*              Workspace: prefer M*M [VT] + M*N [L]
*              At this point, L is resized as M by chunk.
*
               DO 30 i = 1, n, chunk
                  blk = min( n - i + 1, chunk )
                  CALL dgemm( 'N', 'N', m, blk, m, one, work( ivt ),
     $                        m,
     $                        a( 1, i ), lda, zero, work( il ), ldwrkl )
                  CALL dlacpy( 'F', m, blk, work( il ), ldwrkl,
     $                         a( 1, i ), lda )
   30          CONTINUE
*
            ELSE IF( wntqs ) THEN
*
*              Path 3t (N >> M, JOBZ='S')
*              M right singular vectors to be computed in VT and
*              M left singular vectors to be computed in U
*
               il = 1
*
*              WORK(IL) is M by M
*
               ldwrkl = m
               itau = il + ldwrkl*m
               nwork = itau + m
*
*              Compute A=L*Q
*              Workspace: need   M*M [L] + M [tau] + M    [work]
*              Workspace: prefer M*M [L] + M [tau] + M*NB [work]
*
               CALL dgelqf( m, n, a, lda, work( itau ),
     $                      work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Copy L to WORK(IL), zeroing out above it
*
               CALL dlacpy( 'L', m, m, a, lda, work( il ), ldwrkl )
               CALL dlaset( 'U', m - 1, m - 1, zero, zero,
     $                      work( il + ldwrkl ), ldwrkl )
*
*              Generate Q in A
*              Workspace: need   M*M [L] + M [tau] + M    [work]
*              Workspace: prefer M*M [L] + M [tau] + M*NB [work]
*
               CALL dorglq( m, n, m, a, lda, work( itau ),
     $                      work( nwork ), lwork - nwork + 1, ierr )
               ie = itau
               itauq = ie + m
               itaup = itauq + m
               nwork = itaup + m
*
*              Bidiagonalize L in WORK(IU).
*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M      [work]
*              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
*
               CALL dgebrd( m, m, work( il ), ldwrkl, s, work( ie ),
     $                      work( itauq ), work( itaup ), work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + BDSPAC
*
               CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu, vt,
     $                      ldvt, dum, idum, work( nwork ), iwork,
     $                      info )
*
*              Overwrite U by left singular vectors of L and VT
*              by right singular vectors of L
*              Workspace: need   M*M [L] + 3*M [e, tauq, taup] + M    [work]
*              Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', m, m, m, work( il ),
     $                      ldwrkl,
     $                      work( itauq ), u, ldu, work( nwork ),
     $                      lwork - nwork + 1, ierr )
               CALL dormbr( 'P', 'R', 'T', m, m, m, work( il ),
     $                      ldwrkl,
     $                      work( itaup ), vt, ldvt, work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
*              Multiply right singular vectors of L in WORK(IL) by
*              Q in A, storing result in VT
*              Workspace: need   M*M [L]
*
               CALL dlacpy( 'F', m, m, vt, ldvt, work( il ), ldwrkl )
               CALL dgemm( 'N', 'N', m, n, m, one, work( il ),
     $                     ldwrkl,
     $                     a, lda, zero, vt, ldvt )
*
            ELSE IF( wntqa ) THEN
*
*              Path 4t (N >> M, JOBZ='A')
*              N right singular vectors to be computed in VT and
*              M left singular vectors to be computed in U
*
               ivt = 1
*
*              WORK(IVT) is M by M
*
               ldwkvt = m
               itau = ivt + ldwkvt*m
               nwork = itau + m
*
*              Compute A=L*Q, copying result to VT
*              Workspace: need   M*M [VT] + M [tau] + M    [work]
*              Workspace: prefer M*M [VT] + M [tau] + M*NB [work]
*
               CALL dgelqf( m, n, a, lda, work( itau ),
     $                      work( nwork ),
     $                      lwork - nwork + 1, ierr )
               CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
*
*              Generate Q in VT
*              Workspace: need   M*M [VT] + M [tau] + N    [work]
*              Workspace: prefer M*M [VT] + M [tau] + N*NB [work]
*
               CALL dorglq( n, n, m, vt, ldvt, work( itau ),
     $                      work( nwork ), lwork - nwork + 1, ierr )
*
*              Produce L in A, zeroing out other entries
*
               CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
     $                      lda )
               ie = itau
               itauq = ie + m
               itaup = itauq + m
               nwork = itaup + m
*
*              Bidiagonalize L in A
*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + M      [work]
*              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]
*
               CALL dgebrd( m, m, a, lda, s, work( ie ),
     $                      work( itauq ),
     $                      work( itaup ), work( nwork ), lwork-nwork+1,
     $                      ierr )
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in WORK(IVT)
*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup] + BDSPAC
*
               CALL dbdsdc( 'U', 'I', m, s, work( ie ), u, ldu,
     $                      work( ivt ), ldwkvt, dum, idum,
     $                      work( nwork ), iwork, info )
*
*              Overwrite U by left singular vectors of L and WORK(IVT)
*              by right singular vectors of L
*              Workspace: need   M*M [VT] + 3*M [e, tauq, taup]+ M    [work]
*              Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', m, m, m, a, lda,
     $                      work( itauq ), u, ldu, work( nwork ),
     $                      lwork - nwork + 1, ierr )
               CALL dormbr( 'P', 'R', 'T', m, m, m, a, lda,
     $                      work( itaup ), work( ivt ), ldwkvt,
     $                      work( nwork ), lwork - nwork + 1, ierr )
*
*              Multiply right singular vectors of L in WORK(IVT) by
*              Q in VT, storing result in A
*              Workspace: need   M*M [VT]
*
               CALL dgemm( 'N', 'N', m, n, m, one, work( ivt ),
     $                     ldwkvt,
     $                     vt, ldvt, zero, a, lda )
*
*              Copy right singular vectors of A from A to VT
*
               CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
*
            END IF
*
         ELSE
*
*           N .LT. MNTHR
*
*           Path 5t (N > M, but not much larger)
*           Reduce to bidiagonal form without LQ decomposition
*
            ie = 1
            itauq = ie + m
            itaup = itauq + m
            nwork = itaup + m
*
*           Bidiagonalize A
*           Workspace: need   3*M [e, tauq, taup] + N        [work]
*           Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]
*
            CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
     $                   work( itaup ), work( nwork ), lwork-nwork+1,
     $                   ierr )
            IF( wntqn ) THEN
*
*              Path 5tn (N > M, JOBZ='N')
*              Perform bidiagonal SVD, only computing singular values
*              Workspace: need   3*M [e, tauq, taup] + BDSPAC
*
               CALL dbdsdc( 'L', 'N', m, s, work( ie ), dum, 1, dum,
     $                      1,
     $                      dum, idum, work( nwork ), iwork, info )
            ELSE IF( wntqo ) THEN
*              Path 5to (N > M, JOBZ='O')
               ldwkvt = m
               ivt = nwork
               IF( lwork .GE. m*n + 3*m + bdspac ) THEN
*
*                 WORK( IVT ) is M by N
*
                  CALL dlaset( 'F', m, n, zero, zero, work( ivt ),
     $                         ldwkvt )
                  nwork = ivt + ldwkvt*n
*                 IL is unused; silence compile warnings
                  il = -1
               ELSE
*
*                 WORK( IVT ) is M by M
*
                  nwork = ivt + ldwkvt*m
                  il = nwork
*
*                 WORK(IL) is M by CHUNK
*
                  chunk = ( lwork - m*m - 3*m ) / m
               END IF
*
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in WORK(IVT)
*              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + BDSPAC
*
               CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu,
     $                      work( ivt ), ldwkvt, dum, idum,
     $                      work( nwork ), iwork, info )
*
*              Overwrite U by left singular vectors of A
*              Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work]
*              Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
     $                      work( itauq ), u, ldu, work( nwork ),
     $                      lwork - nwork + 1, ierr )
*
               IF( lwork .GE. m*n + 3*m + bdspac ) THEN
*
*                 Path 5to-fast
*                 Overwrite WORK(IVT) by left singular vectors of A
*                 Workspace: need   3*M [e, tauq, taup] + M*N [VT] + M    [work]
*                 Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]
*
                  CALL dormbr( 'P', 'R', 'T', m, n, m, a, lda,
     $                         work( itaup ), work( ivt ), ldwkvt,
     $                         work( nwork ), lwork - nwork + 1, ierr )
*
*                 Copy right singular vectors of A from WORK(IVT) to A
*
                  CALL dlacpy( 'F', m, n, work( ivt ), ldwkvt, a,
     $                         lda )
               ELSE
*
*                 Path 5to-slow
*                 Generate P**T in A
*                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M    [work]
*                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
*
                  CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
     $                         work( nwork ), lwork - nwork + 1, ierr )
*
*                 Multiply Q in A by right singular vectors of
*                 bidiagonal matrix in WORK(IVT), storing result in
*                 WORK(IL) and copying to A
*                 Workspace: need   3*M [e, tauq, taup] + M*M [VT] + M*NB [L]
*                 Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N  [L]
*
                  DO 40 i = 1, n, chunk
                     blk = min( n - i + 1, chunk )
                     CALL dgemm( 'N', 'N', m, blk, m, one,
     $                           work( ivt ),
     $                           ldwkvt, a( 1, i ), lda, zero,
     $                           work( il ), m )
                     CALL dlacpy( 'F', m, blk, work( il ), m, a( 1,
     $                            i ),
     $                            lda )
   40             CONTINUE
               END IF
            ELSE IF( wntqs ) THEN
*
*              Path 5ts (N > M, JOBZ='S')
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              Workspace: need   3*M [e, tauq, taup] + BDSPAC
*
               CALL dlaset( 'F', m, n, zero, zero, vt, ldvt )
               CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
     $                      ldvt, dum, idum, work( nwork ), iwork,
     $                      info )
*
*              Overwrite U by left singular vectors of A and VT
*              by right singular vectors of A
*              Workspace: need   3*M [e, tauq, taup] + M    [work]
*              Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
     $                      work( itauq ), u, ldu, work( nwork ),
     $                      lwork - nwork + 1, ierr )
               CALL dormbr( 'P', 'R', 'T', m, n, m, a, lda,
     $                      work( itaup ), vt, ldvt, work( nwork ),
     $                      lwork - nwork + 1, ierr )
            ELSE IF( wntqa ) THEN
*
*              Path 5ta (N > M, JOBZ='A')
*              Perform bidiagonal SVD, computing left singular vectors
*              of bidiagonal matrix in U and computing right singular
*              vectors of bidiagonal matrix in VT
*              Workspace: need   3*M [e, tauq, taup] + BDSPAC
*
               CALL dlaset( 'F', n, n, zero, zero, vt, ldvt )
               CALL dbdsdc( 'L', 'I', m, s, work( ie ), u, ldu, vt,
     $                      ldvt, dum, idum, work( nwork ), iwork,
     $                      info )
*
*              Set the right corner of VT to identity matrix
*
               IF( n.GT.m ) THEN
                  CALL dlaset( 'F', n-m, n-m, zero, one, vt(m+1,m+1),
     $                         ldvt )
               END IF
*
*              Overwrite U by left singular vectors of A and VT
*              by right singular vectors of A
*              Workspace: need   3*M [e, tauq, taup] + N    [work]
*              Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]
*
               CALL dormbr( 'Q', 'L', 'N', m, m, n, a, lda,
     $                      work( itauq ), u, ldu, work( nwork ),
     $                      lwork - nwork + 1, ierr )
               CALL dormbr( 'P', 'R', 'T', n, n, m, a, lda,
     $                      work( itaup ), vt, ldvt, work( nwork ),
     $                      lwork - nwork + 1, ierr )
            END IF
*
         END IF
*
      END IF
*
*     Undo scaling if necessary
*
      IF( iscl.EQ.1 ) THEN
         IF( anrm.GT.bignum )
     $      CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
     $                   ierr )
         IF( anrm.LT.smlnum )
     $      CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
     $                   ierr )
      END IF
*
*     Return optimal workspace in WORK(1)
*
      work( 1 ) = droundup_lwork( maxwrk )
*
      RETURN
*
*     End of DGESDD
*

Here is the call graph for this function:

Here is the caller graph for this function: