LAPACK 3.3.1
Linear Algebra PACKage

sgesvd.f

Go to the documentation of this file.
00001       SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
00002      $                   WORK, LWORK, INFO )
00003 *
00004 *  -- LAPACK driver routine (version 3.3.1) --
00005 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
00006 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
00007 *  -- April 2011                                                      --
00008 *
00009 *     .. Scalar Arguments ..
00010       CHARACTER          JOBU, JOBVT
00011       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
00012 *     ..
00013 *     .. Array Arguments ..
00014       REAL               A( LDA, * ), S( * ), U( LDU, * ),
00015      $                   VT( LDVT, * ), WORK( * )
00016 *     ..
00017 *
00018 *  Purpose
00019 *  =======
00020 *
00021 *  SGESVD computes the singular value decomposition (SVD) of a real
00022 *  M-by-N matrix A, optionally computing the left and/or right singular
00023 *  vectors. The SVD is written
00024 *
00025 *       A = U * SIGMA * transpose(V)
00026 *
00027 *  where SIGMA is an M-by-N matrix which is zero except for its
00028 *  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
00029 *  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
00030 *  are the singular values of A; they are real and non-negative, and
00031 *  are returned in descending order.  The first min(m,n) columns of
00032 *  U and V are the left and right singular vectors of A.
00033 *
00034 *  Note that the routine returns V**T, not V.
00035 *
00036 *  Arguments
00037 *  =========
00038 *
00039 *  JOBU    (input) CHARACTER*1
00040 *          Specifies options for computing all or part of the matrix U:
00041 *          = 'A':  all M columns of U are returned in array U:
00042 *          = 'S':  the first min(m,n) columns of U (the left singular
00043 *                  vectors) are returned in the array U;
00044 *          = 'O':  the first min(m,n) columns of U (the left singular
00045 *                  vectors) are overwritten on the array A;
00046 *          = 'N':  no columns of U (no left singular vectors) are
00047 *                  computed.
00048 *
00049 *  JOBVT   (input) CHARACTER*1
00050 *          Specifies options for computing all or part of the matrix
00051 *          V**T:
00052 *          = 'A':  all N rows of V**T are returned in the array VT;
00053 *          = 'S':  the first min(m,n) rows of V**T (the right singular
00054 *                  vectors) are returned in the array VT;
00055 *          = 'O':  the first min(m,n) rows of V**T (the right singular
00056 *                  vectors) are overwritten on the array A;
00057 *          = 'N':  no rows of V**T (no right singular vectors) are
00058 *                  computed.
00059 *
00060 *          JOBVT and JOBU cannot both be 'O'.
00061 *
00062 *  M       (input) INTEGER
00063 *          The number of rows of the input matrix A.  M >= 0.
00064 *
00065 *  N       (input) INTEGER
00066 *          The number of columns of the input matrix A.  N >= 0.
00067 *
00068 *  A       (input/output) REAL array, dimension (LDA,N)
00069 *          On entry, the M-by-N matrix A.
00070 *          On exit,
00071 *          if JOBU = 'O',  A is overwritten with the first min(m,n)
00072 *                          columns of U (the left singular vectors,
00073 *                          stored columnwise);
00074 *          if JOBVT = 'O', A is overwritten with the first min(m,n)
00075 *                          rows of V**T (the right singular vectors,
00076 *                          stored rowwise);
00077 *          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
00078 *                          are destroyed.
00079 *
00080 *  LDA     (input) INTEGER
00081 *          The leading dimension of the array A.  LDA >= max(1,M).
00082 *
00083 *  S       (output) REAL array, dimension (min(M,N))
00084 *          The singular values of A, sorted so that S(i) >= S(i+1).
00085 *
00086 *  U       (output) REAL array, dimension (LDU,UCOL)
00087 *          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
00088 *          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
00089 *          if JOBU = 'S', U contains the first min(m,n) columns of U
00090 *          (the left singular vectors, stored columnwise);
00091 *          if JOBU = 'N' or 'O', U is not referenced.
00092 *
00093 *  LDU     (input) INTEGER
00094 *          The leading dimension of the array U.  LDU >= 1; if
00095 *          JOBU = 'S' or 'A', LDU >= M.
00096 *
00097 *  VT      (output) REAL array, dimension (LDVT,N)
00098 *          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
00099 *          V**T;
00100 *          if JOBVT = 'S', VT contains the first min(m,n) rows of
00101 *          V**T (the right singular vectors, stored rowwise);
00102 *          if JOBVT = 'N' or 'O', VT is not referenced.
00103 *
00104 *  LDVT    (input) INTEGER
00105 *          The leading dimension of the array VT.  LDVT >= 1; if
00106 *          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
00107 *
00108 *  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
00109 *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
00110 *          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
00111 *          superdiagonal elements of an upper bidiagonal matrix B
00112 *          whose diagonal is in S (not necessarily sorted). B
00113 *          satisfies A = U * B * VT, so it has the same singular values
00114 *          as A, and singular vectors related by U and VT.
00115 *
00116 *  LWORK   (input) INTEGER
00117 *          The dimension of the array WORK.
00118 *          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
00119 *             - PATH 1  (M much larger than N, JOBU='N') 
00120 *             - PATH 1t (N much larger than M, JOBVT='N')
00121 *          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
00122 *          For good performance, LWORK should generally be larger.
00123 *
00124 *          If LWORK = -1, then a workspace query is assumed; the routine
00125 *          only calculates the optimal size of the WORK array, returns
00126 *          this value as the first entry of the WORK array, and no error
00127 *          message related to LWORK is issued by XERBLA.
00128 *
00129 *  INFO    (output) INTEGER
00130 *          = 0:  successful exit.
00131 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
00132 *          > 0:  if SBDSQR did not converge, INFO specifies how many
00133 *                superdiagonals of an intermediate bidiagonal form B
00134 *                did not converge to zero. See the description of WORK
00135 *                above for details.
00136 *
00137 *  =====================================================================
00138 *
00139 *     .. Parameters ..
00140       REAL               ZERO, ONE
00141       PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
00142 *     ..
00143 *     .. Local Scalars ..
00144       LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
00145      $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
00146       INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
00147      $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
00148      $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
00149      $                   NRVT, WRKBL
00150       REAL               ANRM, BIGNUM, EPS, SMLNUM
00151 *     ..
00152 *     .. Local Arrays ..
00153       REAL               DUM( 1 )
00154 *     ..
00155 *     .. External Subroutines ..
00156       EXTERNAL           SBDSQR, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY,
00157      $                   SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR,
00158      $                   XERBLA
00159 *     ..
00160 *     .. External Functions ..
00161       LOGICAL            LSAME
00162       INTEGER            ILAENV
00163       REAL               SLAMCH, SLANGE
00164       EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
00165 *     ..
00166 *     .. Intrinsic Functions ..
00167       INTRINSIC          MAX, MIN, SQRT
00168 *     ..
00169 *     .. Executable Statements ..
00170 *
00171 *     Test the input arguments
00172 *
00173       INFO = 0
00174       MINMN = MIN( M, N )
00175       WNTUA = LSAME( JOBU, 'A' )
00176       WNTUS = LSAME( JOBU, 'S' )
00177       WNTUAS = WNTUA .OR. WNTUS
00178       WNTUO = LSAME( JOBU, 'O' )
00179       WNTUN = LSAME( JOBU, 'N' )
00180       WNTVA = LSAME( JOBVT, 'A' )
00181       WNTVS = LSAME( JOBVT, 'S' )
00182       WNTVAS = WNTVA .OR. WNTVS
00183       WNTVO = LSAME( JOBVT, 'O' )
00184       WNTVN = LSAME( JOBVT, 'N' )
00185       LQUERY = ( LWORK.EQ.-1 )
00186 *
00187       IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
00188          INFO = -1
00189       ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
00190      $         ( WNTVO .AND. WNTUO ) ) THEN
00191          INFO = -2
00192       ELSE IF( M.LT.0 ) THEN
00193          INFO = -3
00194       ELSE IF( N.LT.0 ) THEN
00195          INFO = -4
00196       ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
00197          INFO = -6
00198       ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
00199          INFO = -9
00200       ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
00201      $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
00202          INFO = -11
00203       END IF
00204 *
00205 *     Compute workspace
00206 *      (Note: Comments in the code beginning "Workspace:" describe the
00207 *       minimal amount of workspace needed at that point in the code,
00208 *       as well as the preferred amount for good performance.
00209 *       NB refers to the optimal block size for the immediately
00210 *       following subroutine, as returned by ILAENV.)
00211 *
00212       IF( INFO.EQ.0 ) THEN
00213          MINWRK = 1
00214          MAXWRK = 1
00215          IF( M.GE.N .AND. MINMN.GT.0 ) THEN
00216 *
00217 *           Compute space needed for SBDSQR
00218 *
00219             MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
00220             BDSPAC = 5*N
00221             IF( M.GE.MNTHR ) THEN
00222                IF( WNTUN ) THEN
00223 *
00224 *                 Path 1 (M much larger than N, JOBU='N')
00225 *
00226                   MAXWRK = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1,
00227      $                     -1 )
00228                   MAXWRK = MAX( MAXWRK, 3*N+2*N*
00229      $                     ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
00230                   IF( WNTVO .OR. WNTVAS )
00231      $               MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
00232      $                        ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
00233                   MAXWRK = MAX( MAXWRK, BDSPAC )
00234                   MINWRK = MAX( 4*N, BDSPAC )
00235                ELSE IF( WNTUO .AND. WNTVN ) THEN
00236 *
00237 *                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
00238 *
00239                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
00240                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
00241      $                    N, N, -1 ) )
00242                   WRKBL = MAX( WRKBL, 3*N+2*N*
00243      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
00244                   WRKBL = MAX( WRKBL, 3*N+N*
00245      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
00246                   WRKBL = MAX( WRKBL, BDSPAC )
00247                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
00248                   MINWRK = MAX( 3*N+M, BDSPAC )
00249                ELSE IF( WNTUO .AND. WNTVAS ) THEN
00250 *
00251 *                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
00252 *                 'A')
00253 *
00254                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
00255                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
00256      $                    N, N, -1 ) )
00257                   WRKBL = MAX( WRKBL, 3*N+2*N*
00258      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
00259                   WRKBL = MAX( WRKBL, 3*N+N*
00260      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
00261                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
00262      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
00263                   WRKBL = MAX( WRKBL, BDSPAC )
00264                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
00265                   MINWRK = MAX( 3*N+M, BDSPAC )
00266                ELSE IF( WNTUS .AND. WNTVN ) THEN
00267 *
00268 *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
00269 *
00270                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
00271                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
00272      $                    N, N, -1 ) )
00273                   WRKBL = MAX( WRKBL, 3*N+2*N*
00274      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
00275                   WRKBL = MAX( WRKBL, 3*N+N*
00276      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
00277                   WRKBL = MAX( WRKBL, BDSPAC )
00278                   MAXWRK = N*N + WRKBL
00279                   MINWRK = MAX( 3*N+M, BDSPAC )
00280                ELSE IF( WNTUS .AND. WNTVO ) THEN
00281 *
00282 *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
00283 *
00284                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
00285                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
00286      $                    N, N, -1 ) )
00287                   WRKBL = MAX( WRKBL, 3*N+2*N*
00288      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
00289                   WRKBL = MAX( WRKBL, 3*N+N*
00290      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
00291                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
00292      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
00293                   WRKBL = MAX( WRKBL, BDSPAC )
00294                   MAXWRK = 2*N*N + WRKBL
00295                   MINWRK = MAX( 3*N+M, BDSPAC )
00296                ELSE IF( WNTUS .AND. WNTVAS ) THEN
00297 *
00298 *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
00299 *                 'A')
00300 *
00301                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
00302                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
00303      $                    N, N, -1 ) )
00304                   WRKBL = MAX( WRKBL, 3*N+2*N*
00305      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
00306                   WRKBL = MAX( WRKBL, 3*N+N*
00307      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
00308                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
00309      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
00310                   WRKBL = MAX( WRKBL, BDSPAC )
00311                   MAXWRK = N*N + WRKBL
00312                   MINWRK = MAX( 3*N+M, BDSPAC )
00313                ELSE IF( WNTUA .AND. WNTVN ) THEN
00314 *
00315 *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
00316 *
00317                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
00318                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
00319      $                    M, N, -1 ) )
00320                   WRKBL = MAX( WRKBL, 3*N+2*N*
00321      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
00322                   WRKBL = MAX( WRKBL, 3*N+N*
00323      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
00324                   WRKBL = MAX( WRKBL, BDSPAC )
00325                   MAXWRK = N*N + WRKBL
00326                   MINWRK = MAX( 3*N+M, BDSPAC )
00327                ELSE IF( WNTUA .AND. WNTVO ) THEN
00328 *
00329 *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
00330 *
00331                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
00332                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
00333      $                    M, N, -1 ) )
00334                   WRKBL = MAX( WRKBL, 3*N+2*N*
00335      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
00336                   WRKBL = MAX( WRKBL, 3*N+N*
00337      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
00338                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
00339      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
00340                   WRKBL = MAX( WRKBL, BDSPAC )
00341                   MAXWRK = 2*N*N + WRKBL
00342                   MINWRK = MAX( 3*N+M, BDSPAC )
00343                ELSE IF( WNTUA .AND. WNTVAS ) THEN
00344 *
00345 *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
00346 *                 'A')
00347 *
00348                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
00349                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
00350      $                    M, N, -1 ) )
00351                   WRKBL = MAX( WRKBL, 3*N+2*N*
00352      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
00353                   WRKBL = MAX( WRKBL, 3*N+N*
00354      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
00355                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
00356      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
00357                   WRKBL = MAX( WRKBL, BDSPAC )
00358                   MAXWRK = N*N + WRKBL
00359                   MINWRK = MAX( 3*N+M, BDSPAC )
00360                END IF
00361             ELSE
00362 *
00363 *              Path 10 (M at least N, but not much larger)
00364 *
00365                MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N,
00366      $                  -1, -1 )
00367                IF( WNTUS .OR. WNTUO )
00368      $            MAXWRK = MAX( MAXWRK, 3*N+N*
00369      $                     ILAENV( 1, 'SORGBR', 'Q', M, N, N, -1 ) )
00370                IF( WNTUA )
00371      $            MAXWRK = MAX( MAXWRK, 3*N+M*
00372      $                     ILAENV( 1, 'SORGBR', 'Q', M, M, N, -1 ) )
00373                IF( .NOT.WNTVN )
00374      $            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
00375      $                     ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
00376                MAXWRK = MAX( MAXWRK, BDSPAC )
00377                MINWRK = MAX( 3*N+M, BDSPAC )
00378             END IF
00379          ELSE IF( MINMN.GT.0 ) THEN
00380 *
00381 *           Compute space needed for SBDSQR
00382 *
00383             MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
00384             BDSPAC = 5*M
00385             IF( N.GE.MNTHR ) THEN
00386                IF( WNTVN ) THEN
00387 *
00388 *                 Path 1t(N much larger than M, JOBVT='N')
00389 *
00390                   MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1,
00391      $                     -1 )
00392                   MAXWRK = MAX( MAXWRK, 3*M+2*M*
00393      $                     ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
00394                   IF( WNTUO .OR. WNTUAS )
00395      $               MAXWRK = MAX( MAXWRK, 3*M+M*
00396      $                        ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
00397                   MAXWRK = MAX( MAXWRK, BDSPAC )
00398                   MINWRK = MAX( 4*M, BDSPAC )
00399                ELSE IF( WNTVO .AND. WNTUN ) THEN
00400 *
00401 *                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
00402 *
00403                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
00404                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
00405      $                    N, M, -1 ) )
00406                   WRKBL = MAX( WRKBL, 3*M+2*M*
00407      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
00408                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
00409      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
00410                   WRKBL = MAX( WRKBL, BDSPAC )
00411                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
00412                   MINWRK = MAX( 3*M+N, BDSPAC )
00413                ELSE IF( WNTVO .AND. WNTUAS ) THEN
00414 *
00415 *                 Path 3t(N much larger than M, JOBU='S' or 'A',
00416 *                 JOBVT='O')
00417 *
00418                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
00419                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
00420      $                    N, M, -1 ) )
00421                   WRKBL = MAX( WRKBL, 3*M+2*M*
00422      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
00423                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
00424      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
00425                   WRKBL = MAX( WRKBL, 3*M+M*
00426      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
00427                   WRKBL = MAX( WRKBL, BDSPAC )
00428                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
00429                   MINWRK = MAX( 3*M+N, BDSPAC )
00430                ELSE IF( WNTVS .AND. WNTUN ) THEN
00431 *
00432 *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
00433 *
00434                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
00435                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
00436      $                    N, M, -1 ) )
00437                   WRKBL = MAX( WRKBL, 3*M+2*M*
00438      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
00439                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
00440      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
00441                   WRKBL = MAX( WRKBL, BDSPAC )
00442                   MAXWRK = M*M + WRKBL
00443                   MINWRK = MAX( 3*M+N, BDSPAC )
00444                ELSE IF( WNTVS .AND. WNTUO ) THEN
00445 *
00446 *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
00447 *
00448                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
00449                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
00450      $                    N, M, -1 ) )
00451                   WRKBL = MAX( WRKBL, 3*M+2*M*
00452      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
00453                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
00454      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
00455                   WRKBL = MAX( WRKBL, 3*M+M*
00456      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
00457                   WRKBL = MAX( WRKBL, BDSPAC )
00458                   MAXWRK = 2*M*M + WRKBL
00459                   MINWRK = MAX( 3*M+N, BDSPAC )
00460                   MAXWRK = MAX( MAXWRK, MINWRK )
00461                ELSE IF( WNTVS .AND. WNTUAS ) THEN
00462 *
00463 *                 Path 6t(N much larger than M, JOBU='S' or 'A',
00464 *                 JOBVT='S')
00465 *
00466                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
00467                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
00468      $                    N, M, -1 ) )
00469                   WRKBL = MAX( WRKBL, 3*M+2*M*
00470      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
00471                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
00472      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
00473                   WRKBL = MAX( WRKBL, 3*M+M*
00474      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
00475                   WRKBL = MAX( WRKBL, BDSPAC )
00476                   MAXWRK = M*M + WRKBL
00477                   MINWRK = MAX( 3*M+N, BDSPAC )
00478                ELSE IF( WNTVA .AND. WNTUN ) THEN
00479 *
00480 *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
00481 *
00482                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
00483                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
00484      $                    N, M, -1 ) )
00485                   WRKBL = MAX( WRKBL, 3*M+2*M*
00486      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
00487                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
00488      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
00489                   WRKBL = MAX( WRKBL, BDSPAC )
00490                   MAXWRK = M*M + WRKBL
00491                   MINWRK = MAX( 3*M+N, BDSPAC )
00492                ELSE IF( WNTVA .AND. WNTUO ) THEN
00493 *
00494 *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
00495 *
00496                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
00497                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
00498      $                    N, M, -1 ) )
00499                   WRKBL = MAX( WRKBL, 3*M+2*M*
00500      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
00501                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
00502      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
00503                   WRKBL = MAX( WRKBL, 3*M+M*
00504      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
00505                   WRKBL = MAX( WRKBL, BDSPAC )
00506                   MAXWRK = 2*M*M + WRKBL
00507                   MINWRK = MAX( 3*M+N, BDSPAC )
00508                ELSE IF( WNTVA .AND. WNTUAS ) THEN
00509 *
00510 *                 Path 9t(N much larger than M, JOBU='S' or 'A',
00511 *                 JOBVT='A')
00512 *
00513                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
00514                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
00515      $                    N, M, -1 ) )
00516                   WRKBL = MAX( WRKBL, 3*M+2*M*
00517      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
00518                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
00519      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
00520                   WRKBL = MAX( WRKBL, 3*M+M*
00521      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
00522                   WRKBL = MAX( WRKBL, BDSPAC )
00523                   MAXWRK = M*M + WRKBL
00524                   MINWRK = MAX( 3*M+N, BDSPAC )
00525                END IF
00526             ELSE
00527 *
00528 *              Path 10t(N greater than M, but not much larger)
00529 *
00530                MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N,
00531      $                  -1, -1 )
00532                IF( WNTVS .OR. WNTVO )
00533      $            MAXWRK = MAX( MAXWRK, 3*M+M*
00534      $                     ILAENV( 1, 'SORGBR', 'P', M, N, M, -1 ) )
00535                IF( WNTVA )
00536      $            MAXWRK = MAX( MAXWRK, 3*M+N*
00537      $                     ILAENV( 1, 'SORGBR', 'P', N, N, M, -1 ) )
00538                IF( .NOT.WNTUN )
00539      $            MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
00540      $                     ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
00541                MAXWRK = MAX( MAXWRK, BDSPAC )
00542                MINWRK = MAX( 3*M+N, BDSPAC )
00543             END IF
00544          END IF
00545          MAXWRK = MAX( MAXWRK, MINWRK )
00546          WORK( 1 ) = MAXWRK
00547 *
00548          IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
00549             INFO = -13
00550          END IF
00551       END IF
00552 *
00553       IF( INFO.NE.0 ) THEN
00554          CALL XERBLA( 'SGESVD', -INFO )
00555          RETURN
00556       ELSE IF( LQUERY ) THEN
00557          RETURN
00558       END IF
00559 *
00560 *     Quick return if possible
00561 *
00562       IF( M.EQ.0 .OR. N.EQ.0 ) THEN
00563          RETURN
00564       END IF
00565 *
00566 *     Get machine constants
00567 *
00568       EPS = SLAMCH( 'P' )
00569       SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
00570       BIGNUM = ONE / SMLNUM
00571 *
00572 *     Scale A if max element outside range [SMLNUM,BIGNUM]
00573 *
00574       ANRM = SLANGE( 'M', M, N, A, LDA, DUM )
00575       ISCL = 0
00576       IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
00577          ISCL = 1
00578          CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
00579       ELSE IF( ANRM.GT.BIGNUM ) THEN
00580          ISCL = 1
00581          CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
00582       END IF
00583 *
00584       IF( M.GE.N ) THEN
00585 *
00586 *        A has at least as many rows as columns. If A has sufficiently
00587 *        more rows than columns, first reduce using the QR
00588 *        decomposition (if sufficient workspace available)
00589 *
00590          IF( M.GE.MNTHR ) THEN
00591 *
00592             IF( WNTUN ) THEN
00593 *
00594 *              Path 1 (M much larger than N, JOBU='N')
00595 *              No left singular vectors to be computed
00596 *
00597                ITAU = 1
00598                IWORK = ITAU + N
00599 *
00600 *              Compute A=Q*R
00601 *              (Workspace: need 2*N, prefer N+N*NB)
00602 *
00603                CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
00604      $                      LWORK-IWORK+1, IERR )
00605 *
00606 *              Zero out below R
00607 *
00608                CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
00609                IE = 1
00610                ITAUQ = IE + N
00611                ITAUP = ITAUQ + N
00612                IWORK = ITAUP + N
00613 *
00614 *              Bidiagonalize R in A
00615 *              (Workspace: need 4*N, prefer 3*N+2*N*NB)
00616 *
00617                CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
00618      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
00619      $                      IERR )
00620                NCVT = 0
00621                IF( WNTVO .OR. WNTVAS ) THEN
00622 *
00623 *                 If right singular vectors desired, generate P'.
00624 *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
00625 *
00626                   CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
00627      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00628                   NCVT = N
00629                END IF
00630                IWORK = IE + N
00631 *
00632 *              Perform bidiagonal QR iteration, computing right
00633 *              singular vectors of A in A if desired
00634 *              (Workspace: need BDSPAC)
00635 *
00636                CALL SBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
00637      $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
00638 *
00639 *              If right singular vectors desired in VT, copy them there
00640 *
00641                IF( WNTVAS )
00642      $            CALL SLACPY( 'F', N, N, A, LDA, VT, LDVT )
00643 *
00644             ELSE IF( WNTUO .AND. WNTVN ) THEN
00645 *
00646 *              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
00647 *              N left singular vectors to be overwritten on A and
00648 *              no right singular vectors to be computed
00649 *
00650                IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
00651 *
00652 *                 Sufficient workspace for a fast algorithm
00653 *
00654                   IR = 1
00655                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
00656 *
00657 *                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
00658 *
00659                      LDWRKU = LDA
00660                      LDWRKR = LDA
00661                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
00662 *
00663 *                    WORK(IU) is LDA by N, WORK(IR) is N by N
00664 *
00665                      LDWRKU = LDA
00666                      LDWRKR = N
00667                   ELSE
00668 *
00669 *                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
00670 *
00671                      LDWRKU = ( LWORK-N*N-N ) / N
00672                      LDWRKR = N
00673                   END IF
00674                   ITAU = IR + LDWRKR*N
00675                   IWORK = ITAU + N
00676 *
00677 *                 Compute A=Q*R
00678 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
00679 *
00680                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
00681      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00682 *
00683 *                 Copy R to WORK(IR) and zero out below it
00684 *
00685                   CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
00686                   CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
00687      $                         LDWRKR )
00688 *
00689 *                 Generate Q in A
00690 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
00691 *
00692                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
00693      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00694                   IE = ITAU
00695                   ITAUQ = IE + N
00696                   ITAUP = ITAUQ + N
00697                   IWORK = ITAUP + N
00698 *
00699 *                 Bidiagonalize R in WORK(IR)
00700 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
00701 *
00702                   CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
00703      $                         WORK( ITAUQ ), WORK( ITAUP ),
00704      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00705 *
00706 *                 Generate left vectors bidiagonalizing R
00707 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
00708 *
00709                   CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
00710      $                         WORK( ITAUQ ), WORK( IWORK ),
00711      $                         LWORK-IWORK+1, IERR )
00712                   IWORK = IE + N
00713 *
00714 *                 Perform bidiagonal QR iteration, computing left
00715 *                 singular vectors of R in WORK(IR)
00716 *                 (Workspace: need N*N+BDSPAC)
00717 *
00718                   CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
00719      $                         WORK( IR ), LDWRKR, DUM, 1,
00720      $                         WORK( IWORK ), INFO )
00721                   IU = IE + N
00722 *
00723 *                 Multiply Q in A by left singular vectors of R in
00724 *                 WORK(IR), storing result in WORK(IU) and copying to A
00725 *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
00726 *
00727                   DO 10 I = 1, M, LDWRKU
00728                      CHUNK = MIN( M-I+1, LDWRKU )
00729                      CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
00730      $                           LDA, WORK( IR ), LDWRKR, ZERO,
00731      $                           WORK( IU ), LDWRKU )
00732                      CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
00733      $                            A( I, 1 ), LDA )
00734    10             CONTINUE
00735 *
00736                ELSE
00737 *
00738 *                 Insufficient workspace for a fast algorithm
00739 *
00740                   IE = 1
00741                   ITAUQ = IE + N
00742                   ITAUP = ITAUQ + N
00743                   IWORK = ITAUP + N
00744 *
00745 *                 Bidiagonalize A
00746 *                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
00747 *
00748                   CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
00749      $                         WORK( ITAUQ ), WORK( ITAUP ),
00750      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00751 *
00752 *                 Generate left vectors bidiagonalizing A
00753 *                 (Workspace: need 4*N, prefer 3*N+N*NB)
00754 *
00755                   CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
00756      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00757                   IWORK = IE + N
00758 *
00759 *                 Perform bidiagonal QR iteration, computing left
00760 *                 singular vectors of A in A
00761 *                 (Workspace: need BDSPAC)
00762 *
00763                   CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
00764      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
00765 *
00766                END IF
00767 *
00768             ELSE IF( WNTUO .AND. WNTVAS ) THEN
00769 *
00770 *              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
00771 *              N left singular vectors to be overwritten on A and
00772 *              N right singular vectors to be computed in VT
00773 *
00774                IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
00775 *
00776 *                 Sufficient workspace for a fast algorithm
00777 *
00778                   IR = 1
00779                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
00780 *
00781 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
00782 *
00783                      LDWRKU = LDA
00784                      LDWRKR = LDA
00785                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
00786 *
00787 *                    WORK(IU) is LDA by N and WORK(IR) is N by N
00788 *
00789                      LDWRKU = LDA
00790                      LDWRKR = N
00791                   ELSE
00792 *
00793 *                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
00794 *
00795                      LDWRKU = ( LWORK-N*N-N ) / N
00796                      LDWRKR = N
00797                   END IF
00798                   ITAU = IR + LDWRKR*N
00799                   IWORK = ITAU + N
00800 *
00801 *                 Compute A=Q*R
00802 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
00803 *
00804                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
00805      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00806 *
00807 *                 Copy R to VT, zeroing out below it
00808 *
00809                   CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
00810                   IF( N.GT.1 )
00811      $               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
00812      $                            VT( 2, 1 ), LDVT )
00813 *
00814 *                 Generate Q in A
00815 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
00816 *
00817                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
00818      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00819                   IE = ITAU
00820                   ITAUQ = IE + N
00821                   ITAUP = ITAUQ + N
00822                   IWORK = ITAUP + N
00823 *
00824 *                 Bidiagonalize R in VT, copying result to WORK(IR)
00825 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
00826 *
00827                   CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
00828      $                         WORK( ITAUQ ), WORK( ITAUP ),
00829      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00830                   CALL SLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
00831 *
00832 *                 Generate left vectors bidiagonalizing R in WORK(IR)
00833 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
00834 *
00835                   CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
00836      $                         WORK( ITAUQ ), WORK( IWORK ),
00837      $                         LWORK-IWORK+1, IERR )
00838 *
00839 *                 Generate right vectors bidiagonalizing R in VT
00840 *                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
00841 *
00842                   CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
00843      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00844                   IWORK = IE + N
00845 *
00846 *                 Perform bidiagonal QR iteration, computing left
00847 *                 singular vectors of R in WORK(IR) and computing right
00848 *                 singular vectors of R in VT
00849 *                 (Workspace: need N*N+BDSPAC)
00850 *
00851                   CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
00852      $                         WORK( IR ), LDWRKR, DUM, 1,
00853      $                         WORK( IWORK ), INFO )
00854                   IU = IE + N
00855 *
00856 *                 Multiply Q in A by left singular vectors of R in
00857 *                 WORK(IR), storing result in WORK(IU) and copying to A
00858 *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
00859 *
00860                   DO 20 I = 1, M, LDWRKU
00861                      CHUNK = MIN( M-I+1, LDWRKU )
00862                      CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
00863      $                           LDA, WORK( IR ), LDWRKR, ZERO,
00864      $                           WORK( IU ), LDWRKU )
00865                      CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
00866      $                            A( I, 1 ), LDA )
00867    20             CONTINUE
00868 *
00869                ELSE
00870 *
00871 *                 Insufficient workspace for a fast algorithm
00872 *
00873                   ITAU = 1
00874                   IWORK = ITAU + N
00875 *
00876 *                 Compute A=Q*R
00877 *                 (Workspace: need 2*N, prefer N+N*NB)
00878 *
00879                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
00880      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00881 *
00882 *                 Copy R to VT, zeroing out below it
00883 *
00884                   CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
00885                   IF( N.GT.1 )
00886      $               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
00887      $                            VT( 2, 1 ), LDVT )
00888 *
00889 *                 Generate Q in A
00890 *                 (Workspace: need 2*N, prefer N+N*NB)
00891 *
00892                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
00893      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00894                   IE = ITAU
00895                   ITAUQ = IE + N
00896                   ITAUP = ITAUQ + N
00897                   IWORK = ITAUP + N
00898 *
00899 *                 Bidiagonalize R in VT
00900 *                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
00901 *
00902                   CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
00903      $                         WORK( ITAUQ ), WORK( ITAUP ),
00904      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00905 *
00906 *                 Multiply Q in A by left vectors bidiagonalizing R
00907 *                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
00908 *
00909                   CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
00910      $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
00911      $                         LWORK-IWORK+1, IERR )
00912 *
00913 *                 Generate right vectors bidiagonalizing R in VT
00914 *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
00915 *
00916                   CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
00917      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
00918                   IWORK = IE + N
00919 *
00920 *                 Perform bidiagonal QR iteration, computing left
00921 *                 singular vectors of A in A and computing right
00922 *                 singular vectors of A in VT
00923 *                 (Workspace: need BDSPAC)
00924 *
00925                   CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
00926      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
00927 *
00928                END IF
00929 *
00930             ELSE IF( WNTUS ) THEN
00931 *
00932                IF( WNTVN ) THEN
00933 *
00934 *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
00935 *                 N left singular vectors to be computed in U and
00936 *                 no right singular vectors to be computed
00937 *
00938                   IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
00939 *
00940 *                    Sufficient workspace for a fast algorithm
00941 *
00942                      IR = 1
00943                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
00944 *
00945 *                       WORK(IR) is LDA by N
00946 *
00947                         LDWRKR = LDA
00948                      ELSE
00949 *
00950 *                       WORK(IR) is N by N
00951 *
00952                         LDWRKR = N
00953                      END IF
00954                      ITAU = IR + LDWRKR*N
00955                      IWORK = ITAU + N
00956 *
00957 *                    Compute A=Q*R
00958 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
00959 *
00960                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
00961      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
00962 *
00963 *                    Copy R to WORK(IR), zeroing out below it
00964 *
00965                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
00966      $                            LDWRKR )
00967                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
00968      $                            WORK( IR+1 ), LDWRKR )
00969 *
00970 *                    Generate Q in A
00971 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
00972 *
00973                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
00974      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
00975                      IE = ITAU
00976                      ITAUQ = IE + N
00977                      ITAUP = ITAUQ + N
00978                      IWORK = ITAUP + N
00979 *
00980 *                    Bidiagonalize R in WORK(IR)
00981 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
00982 *
00983                      CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
00984      $                            WORK( IE ), WORK( ITAUQ ),
00985      $                            WORK( ITAUP ), WORK( IWORK ),
00986      $                            LWORK-IWORK+1, IERR )
00987 *
00988 *                    Generate left vectors bidiagonalizing R in WORK(IR)
00989 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
00990 *
00991                      CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
00992      $                            WORK( ITAUQ ), WORK( IWORK ),
00993      $                            LWORK-IWORK+1, IERR )
00994                      IWORK = IE + N
00995 *
00996 *                    Perform bidiagonal QR iteration, computing left
00997 *                    singular vectors of R in WORK(IR)
00998 *                    (Workspace: need N*N+BDSPAC)
00999 *
01000                      CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
01001      $                            1, WORK( IR ), LDWRKR, DUM, 1,
01002      $                            WORK( IWORK ), INFO )
01003 *
01004 *                    Multiply Q in A by left singular vectors of R in
01005 *                    WORK(IR), storing result in U
01006 *                    (Workspace: need N*N)
01007 *
01008                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
01009      $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
01010 *
01011                   ELSE
01012 *
01013 *                    Insufficient workspace for a fast algorithm
01014 *
01015                      ITAU = 1
01016                      IWORK = ITAU + N
01017 *
01018 *                    Compute A=Q*R, copying result to U
01019 *                    (Workspace: need 2*N, prefer N+N*NB)
01020 *
01021                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01022      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01023                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01024 *
01025 *                    Generate Q in U
01026 *                    (Workspace: need 2*N, prefer N+N*NB)
01027 *
01028                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
01029      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01030                      IE = ITAU
01031                      ITAUQ = IE + N
01032                      ITAUP = ITAUQ + N
01033                      IWORK = ITAUP + N
01034 *
01035 *                    Zero out below R in A
01036 *
01037                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
01038      $                            LDA )
01039 *
01040 *                    Bidiagonalize R in A
01041 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01042 *
01043                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
01044      $                            WORK( ITAUQ ), WORK( ITAUP ),
01045      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01046 *
01047 *                    Multiply Q in U by left vectors bidiagonalizing R
01048 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01049 *
01050                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
01051      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01052      $                            LWORK-IWORK+1, IERR )
01053                      IWORK = IE + N
01054 *
01055 *                    Perform bidiagonal QR iteration, computing left
01056 *                    singular vectors of A in U
01057 *                    (Workspace: need BDSPAC)
01058 *
01059                      CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
01060      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
01061      $                            INFO )
01062 *
01063                   END IF
01064 *
01065                ELSE IF( WNTVO ) THEN
01066 *
01067 *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
01068 *                 N left singular vectors to be computed in U and
01069 *                 N right singular vectors to be overwritten on A
01070 *
01071                   IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
01072 *
01073 *                    Sufficient workspace for a fast algorithm
01074 *
01075                      IU = 1
01076                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
01077 *
01078 *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
01079 *
01080                         LDWRKU = LDA
01081                         IR = IU + LDWRKU*N
01082                         LDWRKR = LDA
01083                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
01084 *
01085 *                       WORK(IU) is LDA by N and WORK(IR) is N by N
01086 *
01087                         LDWRKU = LDA
01088                         IR = IU + LDWRKU*N
01089                         LDWRKR = N
01090                      ELSE
01091 *
01092 *                       WORK(IU) is N by N and WORK(IR) is N by N
01093 *
01094                         LDWRKU = N
01095                         IR = IU + LDWRKU*N
01096                         LDWRKR = N
01097                      END IF
01098                      ITAU = IR + LDWRKR*N
01099                      IWORK = ITAU + N
01100 *
01101 *                    Compute A=Q*R
01102 *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
01103 *
01104                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01105      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01106 *
01107 *                    Copy R to WORK(IU), zeroing out below it
01108 *
01109                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
01110      $                            LDWRKU )
01111                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01112      $                            WORK( IU+1 ), LDWRKU )
01113 *
01114 *                    Generate Q in A
01115 *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
01116 *
01117                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
01118      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01119                      IE = ITAU
01120                      ITAUQ = IE + N
01121                      ITAUP = ITAUQ + N
01122                      IWORK = ITAUP + N
01123 *
01124 *                    Bidiagonalize R in WORK(IU), copying result to
01125 *                    WORK(IR)
01126 *                    (Workspace: need 2*N*N+4*N,
01127 *                                prefer 2*N*N+3*N+2*N*NB)
01128 *
01129                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
01130      $                            WORK( IE ), WORK( ITAUQ ),
01131      $                            WORK( ITAUP ), WORK( IWORK ),
01132      $                            LWORK-IWORK+1, IERR )
01133                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
01134      $                            WORK( IR ), LDWRKR )
01135 *
01136 *                    Generate left bidiagonalizing vectors in WORK(IU)
01137 *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
01138 *
01139                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
01140      $                            WORK( ITAUQ ), WORK( IWORK ),
01141      $                            LWORK-IWORK+1, IERR )
01142 *
01143 *                    Generate right bidiagonalizing vectors in WORK(IR)
01144 *                    (Workspace: need 2*N*N+4*N-1,
01145 *                                prefer 2*N*N+3*N+(N-1)*NB)
01146 *
01147                      CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
01148      $                            WORK( ITAUP ), WORK( IWORK ),
01149      $                            LWORK-IWORK+1, IERR )
01150                      IWORK = IE + N
01151 *
01152 *                    Perform bidiagonal QR iteration, computing left
01153 *                    singular vectors of R in WORK(IU) and computing
01154 *                    right singular vectors of R in WORK(IR)
01155 *                    (Workspace: need 2*N*N+BDSPAC)
01156 *
01157                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
01158      $                            WORK( IR ), LDWRKR, WORK( IU ),
01159      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
01160 *
01161 *                    Multiply Q in A by left singular vectors of R in
01162 *                    WORK(IU), storing result in U
01163 *                    (Workspace: need N*N)
01164 *
01165                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
01166      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
01167 *
01168 *                    Copy right singular vectors of R to A
01169 *                    (Workspace: need N*N)
01170 *
01171                      CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
01172      $                            LDA )
01173 *
01174                   ELSE
01175 *
01176 *                    Insufficient workspace for a fast algorithm
01177 *
01178                      ITAU = 1
01179                      IWORK = ITAU + N
01180 *
01181 *                    Compute A=Q*R, copying result to U
01182 *                    (Workspace: need 2*N, prefer N+N*NB)
01183 *
01184                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01185      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01186                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01187 *
01188 *                    Generate Q in U
01189 *                    (Workspace: need 2*N, prefer N+N*NB)
01190 *
01191                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
01192      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01193                      IE = ITAU
01194                      ITAUQ = IE + N
01195                      ITAUP = ITAUQ + N
01196                      IWORK = ITAUP + N
01197 *
01198 *                    Zero out below R in A
01199 *
01200                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
01201      $                            LDA )
01202 *
01203 *                    Bidiagonalize R in A
01204 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01205 *
01206                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
01207      $                            WORK( ITAUQ ), WORK( ITAUP ),
01208      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01209 *
01210 *                    Multiply Q in U by left vectors bidiagonalizing R
01211 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01212 *
01213                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
01214      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01215      $                            LWORK-IWORK+1, IERR )
01216 *
01217 *                    Generate right vectors bidiagonalizing R in A
01218 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01219 *
01220                      CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
01221      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01222                      IWORK = IE + N
01223 *
01224 *                    Perform bidiagonal QR iteration, computing left
01225 *                    singular vectors of A in U and computing right
01226 *                    singular vectors of A in A
01227 *                    (Workspace: need BDSPAC)
01228 *
01229                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
01230      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
01231      $                            INFO )
01232 *
01233                   END IF
01234 *
01235                ELSE IF( WNTVAS ) THEN
01236 *
01237 *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
01238 *                         or 'A')
01239 *                 N left singular vectors to be computed in U and
01240 *                 N right singular vectors to be computed in VT
01241 *
01242                   IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
01243 *
01244 *                    Sufficient workspace for a fast algorithm
01245 *
01246                      IU = 1
01247                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
01248 *
01249 *                       WORK(IU) is LDA by N
01250 *
01251                         LDWRKU = LDA
01252                      ELSE
01253 *
01254 *                       WORK(IU) is N by N
01255 *
01256                         LDWRKU = N
01257                      END IF
01258                      ITAU = IU + LDWRKU*N
01259                      IWORK = ITAU + N
01260 *
01261 *                    Compute A=Q*R
01262 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
01263 *
01264                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01265      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01266 *
01267 *                    Copy R to WORK(IU), zeroing out below it
01268 *
01269                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
01270      $                            LDWRKU )
01271                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01272      $                            WORK( IU+1 ), LDWRKU )
01273 *
01274 *                    Generate Q in A
01275 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
01276 *
01277                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
01278      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01279                      IE = ITAU
01280                      ITAUQ = IE + N
01281                      ITAUP = ITAUQ + N
01282                      IWORK = ITAUP + N
01283 *
01284 *                    Bidiagonalize R in WORK(IU), copying result to VT
01285 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
01286 *
01287                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
01288      $                            WORK( IE ), WORK( ITAUQ ),
01289      $                            WORK( ITAUP ), WORK( IWORK ),
01290      $                            LWORK-IWORK+1, IERR )
01291                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
01292      $                            LDVT )
01293 *
01294 *                    Generate left bidiagonalizing vectors in WORK(IU)
01295 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
01296 *
01297                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
01298      $                            WORK( ITAUQ ), WORK( IWORK ),
01299      $                            LWORK-IWORK+1, IERR )
01300 *
01301 *                    Generate right bidiagonalizing vectors in VT
01302 *                    (Workspace: need N*N+4*N-1,
01303 *                                prefer N*N+3*N+(N-1)*NB)
01304 *
01305                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01306      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01307                      IWORK = IE + N
01308 *
01309 *                    Perform bidiagonal QR iteration, computing left
01310 *                    singular vectors of R in WORK(IU) and computing
01311 *                    right singular vectors of R in VT
01312 *                    (Workspace: need N*N+BDSPAC)
01313 *
01314                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
01315      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
01316      $                            WORK( IWORK ), INFO )
01317 *
01318 *                    Multiply Q in A by left singular vectors of R in
01319 *                    WORK(IU), storing result in U
01320 *                    (Workspace: need N*N)
01321 *
01322                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
01323      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
01324 *
01325                   ELSE
01326 *
01327 *                    Insufficient workspace for a fast algorithm
01328 *
01329                      ITAU = 1
01330                      IWORK = ITAU + N
01331 *
01332 *                    Compute A=Q*R, copying result to U
01333 *                    (Workspace: need 2*N, prefer N+N*NB)
01334 *
01335                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01336      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01337                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01338 *
01339 *                    Generate Q in U
01340 *                    (Workspace: need 2*N, prefer N+N*NB)
01341 *
01342                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
01343      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01344 *
01345 *                    Copy R to VT, zeroing out below it
01346 *
01347                      CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
01348                      IF( N.GT.1 )
01349      $                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01350      $                               VT( 2, 1 ), LDVT )
01351                      IE = ITAU
01352                      ITAUQ = IE + N
01353                      ITAUP = ITAUQ + N
01354                      IWORK = ITAUP + N
01355 *
01356 *                    Bidiagonalize R in VT
01357 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01358 *
01359                      CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
01360      $                            WORK( ITAUQ ), WORK( ITAUP ),
01361      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01362 *
01363 *                    Multiply Q in U by left bidiagonalizing vectors
01364 *                    in VT
01365 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01366 *
01367                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
01368      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01369      $                            LWORK-IWORK+1, IERR )
01370 *
01371 *                    Generate right bidiagonalizing vectors in VT
01372 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01373 *
01374                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01375      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01376                      IWORK = IE + N
01377 *
01378 *                    Perform bidiagonal QR iteration, computing left
01379 *                    singular vectors of A in U and computing right
01380 *                    singular vectors of A in VT
01381 *                    (Workspace: need BDSPAC)
01382 *
01383                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
01384      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
01385      $                            INFO )
01386 *
01387                   END IF
01388 *
01389                END IF
01390 *
01391             ELSE IF( WNTUA ) THEN
01392 *
01393                IF( WNTVN ) THEN
01394 *
01395 *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
01396 *                 M left singular vectors to be computed in U and
01397 *                 no right singular vectors to be computed
01398 *
01399                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
01400 *
01401 *                    Sufficient workspace for a fast algorithm
01402 *
01403                      IR = 1
01404                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
01405 *
01406 *                       WORK(IR) is LDA by N
01407 *
01408                         LDWRKR = LDA
01409                      ELSE
01410 *
01411 *                       WORK(IR) is N by N
01412 *
01413                         LDWRKR = N
01414                      END IF
01415                      ITAU = IR + LDWRKR*N
01416                      IWORK = ITAU + N
01417 *
01418 *                    Compute A=Q*R, copying result to U
01419 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
01420 *
01421                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01422      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01423                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01424 *
01425 *                    Copy R to WORK(IR), zeroing out below it
01426 *
01427                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
01428      $                            LDWRKR )
01429                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01430      $                            WORK( IR+1 ), LDWRKR )
01431 *
01432 *                    Generate Q in U
01433 *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
01434 *
01435                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01436      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01437                      IE = ITAU
01438                      ITAUQ = IE + N
01439                      ITAUP = ITAUQ + N
01440                      IWORK = ITAUP + N
01441 *
01442 *                    Bidiagonalize R in WORK(IR)
01443 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
01444 *
01445                      CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
01446      $                            WORK( IE ), WORK( ITAUQ ),
01447      $                            WORK( ITAUP ), WORK( IWORK ),
01448      $                            LWORK-IWORK+1, IERR )
01449 *
01450 *                    Generate left bidiagonalizing vectors in WORK(IR)
01451 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
01452 *
01453                      CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
01454      $                            WORK( ITAUQ ), WORK( IWORK ),
01455      $                            LWORK-IWORK+1, IERR )
01456                      IWORK = IE + N
01457 *
01458 *                    Perform bidiagonal QR iteration, computing left
01459 *                    singular vectors of R in WORK(IR)
01460 *                    (Workspace: need N*N+BDSPAC)
01461 *
01462                      CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
01463      $                            1, WORK( IR ), LDWRKR, DUM, 1,
01464      $                            WORK( IWORK ), INFO )
01465 *
01466 *                    Multiply Q in U by left singular vectors of R in
01467 *                    WORK(IR), storing result in A
01468 *                    (Workspace: need N*N)
01469 *
01470                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
01471      $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
01472 *
01473 *                    Copy left singular vectors of A from A to U
01474 *
01475                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
01476 *
01477                   ELSE
01478 *
01479 *                    Insufficient workspace for a fast algorithm
01480 *
01481                      ITAU = 1
01482                      IWORK = ITAU + N
01483 *
01484 *                    Compute A=Q*R, copying result to U
01485 *                    (Workspace: need 2*N, prefer N+N*NB)
01486 *
01487                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01488      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01489                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01490 *
01491 *                    Generate Q in U
01492 *                    (Workspace: need N+M, prefer N+M*NB)
01493 *
01494                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01495      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01496                      IE = ITAU
01497                      ITAUQ = IE + N
01498                      ITAUP = ITAUQ + N
01499                      IWORK = ITAUP + N
01500 *
01501 *                    Zero out below R in A
01502 *
01503                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
01504      $                            LDA )
01505 *
01506 *                    Bidiagonalize R in A
01507 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01508 *
01509                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
01510      $                            WORK( ITAUQ ), WORK( ITAUP ),
01511      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01512 *
01513 *                    Multiply Q in U by left bidiagonalizing vectors
01514 *                    in A
01515 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01516 *
01517                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
01518      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01519      $                            LWORK-IWORK+1, IERR )
01520                      IWORK = IE + N
01521 *
01522 *                    Perform bidiagonal QR iteration, computing left
01523 *                    singular vectors of A in U
01524 *                    (Workspace: need BDSPAC)
01525 *
01526                      CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
01527      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
01528      $                            INFO )
01529 *
01530                   END IF
01531 *
01532                ELSE IF( WNTVO ) THEN
01533 *
01534 *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
01535 *                 M left singular vectors to be computed in U and
01536 *                 N right singular vectors to be overwritten on A
01537 *
01538                   IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
01539 *
01540 *                    Sufficient workspace for a fast algorithm
01541 *
01542                      IU = 1
01543                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
01544 *
01545 *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
01546 *
01547                         LDWRKU = LDA
01548                         IR = IU + LDWRKU*N
01549                         LDWRKR = LDA
01550                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
01551 *
01552 *                       WORK(IU) is LDA by N and WORK(IR) is N by N
01553 *
01554                         LDWRKU = LDA
01555                         IR = IU + LDWRKU*N
01556                         LDWRKR = N
01557                      ELSE
01558 *
01559 *                       WORK(IU) is N by N and WORK(IR) is N by N
01560 *
01561                         LDWRKU = N
01562                         IR = IU + LDWRKU*N
01563                         LDWRKR = N
01564                      END IF
01565                      ITAU = IR + LDWRKR*N
01566                      IWORK = ITAU + N
01567 *
01568 *                    Compute A=Q*R, copying result to U
01569 *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
01570 *
01571                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01572      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01573                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01574 *
01575 *                    Generate Q in U
01576 *                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
01577 *
01578                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01579      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01580 *
01581 *                    Copy R to WORK(IU), zeroing out below it
01582 *
01583                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
01584      $                            LDWRKU )
01585                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01586      $                            WORK( IU+1 ), LDWRKU )
01587                      IE = ITAU
01588                      ITAUQ = IE + N
01589                      ITAUP = ITAUQ + N
01590                      IWORK = ITAUP + N
01591 *
01592 *                    Bidiagonalize R in WORK(IU), copying result to
01593 *                    WORK(IR)
01594 *                    (Workspace: need 2*N*N+4*N,
01595 *                                prefer 2*N*N+3*N+2*N*NB)
01596 *
01597                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
01598      $                            WORK( IE ), WORK( ITAUQ ),
01599      $                            WORK( ITAUP ), WORK( IWORK ),
01600      $                            LWORK-IWORK+1, IERR )
01601                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
01602      $                            WORK( IR ), LDWRKR )
01603 *
01604 *                    Generate left bidiagonalizing vectors in WORK(IU)
01605 *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
01606 *
01607                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
01608      $                            WORK( ITAUQ ), WORK( IWORK ),
01609      $                            LWORK-IWORK+1, IERR )
01610 *
01611 *                    Generate right bidiagonalizing vectors in WORK(IR)
01612 *                    (Workspace: need 2*N*N+4*N-1,
01613 *                                prefer 2*N*N+3*N+(N-1)*NB)
01614 *
01615                      CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
01616      $                            WORK( ITAUP ), WORK( IWORK ),
01617      $                            LWORK-IWORK+1, IERR )
01618                      IWORK = IE + N
01619 *
01620 *                    Perform bidiagonal QR iteration, computing left
01621 *                    singular vectors of R in WORK(IU) and computing
01622 *                    right singular vectors of R in WORK(IR)
01623 *                    (Workspace: need 2*N*N+BDSPAC)
01624 *
01625                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
01626      $                            WORK( IR ), LDWRKR, WORK( IU ),
01627      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
01628 *
01629 *                    Multiply Q in U by left singular vectors of R in
01630 *                    WORK(IU), storing result in A
01631 *                    (Workspace: need N*N)
01632 *
01633                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
01634      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
01635 *
01636 *                    Copy left singular vectors of A from A to U
01637 *
01638                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
01639 *
01640 *                    Copy right singular vectors of R from WORK(IR) to A
01641 *
01642                      CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
01643      $                            LDA )
01644 *
01645                   ELSE
01646 *
01647 *                    Insufficient workspace for a fast algorithm
01648 *
01649                      ITAU = 1
01650                      IWORK = ITAU + N
01651 *
01652 *                    Compute A=Q*R, copying result to U
01653 *                    (Workspace: need 2*N, prefer N+N*NB)
01654 *
01655                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01656      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01657                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01658 *
01659 *                    Generate Q in U
01660 *                    (Workspace: need N+M, prefer N+M*NB)
01661 *
01662                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01663      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01664                      IE = ITAU
01665                      ITAUQ = IE + N
01666                      ITAUP = ITAUQ + N
01667                      IWORK = ITAUP + N
01668 *
01669 *                    Zero out below R in A
01670 *
01671                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
01672      $                            LDA )
01673 *
01674 *                    Bidiagonalize R in A
01675 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01676 *
01677                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
01678      $                            WORK( ITAUQ ), WORK( ITAUP ),
01679      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01680 *
01681 *                    Multiply Q in U by left bidiagonalizing vectors
01682 *                    in A
01683 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01684 *
01685                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
01686      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01687      $                            LWORK-IWORK+1, IERR )
01688 *
01689 *                    Generate right bidiagonalizing vectors in A
01690 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01691 *
01692                      CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
01693      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01694                      IWORK = IE + N
01695 *
01696 *                    Perform bidiagonal QR iteration, computing left
01697 *                    singular vectors of A in U and computing right
01698 *                    singular vectors of A in A
01699 *                    (Workspace: need BDSPAC)
01700 *
01701                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
01702      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
01703      $                            INFO )
01704 *
01705                   END IF
01706 *
01707                ELSE IF( WNTVAS ) THEN
01708 *
01709 *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
01710 *                         or 'A')
01711 *                 M left singular vectors to be computed in U and
01712 *                 N right singular vectors to be computed in VT
01713 *
01714                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
01715 *
01716 *                    Sufficient workspace for a fast algorithm
01717 *
01718                      IU = 1
01719                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
01720 *
01721 *                       WORK(IU) is LDA by N
01722 *
01723                         LDWRKU = LDA
01724                      ELSE
01725 *
01726 *                       WORK(IU) is N by N
01727 *
01728                         LDWRKU = N
01729                      END IF
01730                      ITAU = IU + LDWRKU*N
01731                      IWORK = ITAU + N
01732 *
01733 *                    Compute A=Q*R, copying result to U
01734 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
01735 *
01736                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01737      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01738                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01739 *
01740 *                    Generate Q in U
01741 *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
01742 *
01743                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01744      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01745 *
01746 *                    Copy R to WORK(IU), zeroing out below it
01747 *
01748                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
01749      $                            LDWRKU )
01750                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01751      $                            WORK( IU+1 ), LDWRKU )
01752                      IE = ITAU
01753                      ITAUQ = IE + N
01754                      ITAUP = ITAUQ + N
01755                      IWORK = ITAUP + N
01756 *
01757 *                    Bidiagonalize R in WORK(IU), copying result to VT
01758 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
01759 *
01760                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
01761      $                            WORK( IE ), WORK( ITAUQ ),
01762      $                            WORK( ITAUP ), WORK( IWORK ),
01763      $                            LWORK-IWORK+1, IERR )
01764                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
01765      $                            LDVT )
01766 *
01767 *                    Generate left bidiagonalizing vectors in WORK(IU)
01768 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
01769 *
01770                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
01771      $                            WORK( ITAUQ ), WORK( IWORK ),
01772      $                            LWORK-IWORK+1, IERR )
01773 *
01774 *                    Generate right bidiagonalizing vectors in VT
01775 *                    (Workspace: need N*N+4*N-1,
01776 *                                prefer N*N+3*N+(N-1)*NB)
01777 *
01778                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01779      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01780                      IWORK = IE + N
01781 *
01782 *                    Perform bidiagonal QR iteration, computing left
01783 *                    singular vectors of R in WORK(IU) and computing
01784 *                    right singular vectors of R in VT
01785 *                    (Workspace: need N*N+BDSPAC)
01786 *
01787                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
01788      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
01789      $                            WORK( IWORK ), INFO )
01790 *
01791 *                    Multiply Q in U by left singular vectors of R in
01792 *                    WORK(IU), storing result in A
01793 *                    (Workspace: need N*N)
01794 *
01795                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
01796      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
01797 *
01798 *                    Copy left singular vectors of A from A to U
01799 *
01800                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
01801 *
01802                   ELSE
01803 *
01804 *                    Insufficient workspace for a fast algorithm
01805 *
01806                      ITAU = 1
01807                      IWORK = ITAU + N
01808 *
01809 *                    Compute A=Q*R, copying result to U
01810 *                    (Workspace: need 2*N, prefer N+N*NB)
01811 *
01812                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
01813      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01814                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01815 *
01816 *                    Generate Q in U
01817 *                    (Workspace: need N+M, prefer N+M*NB)
01818 *
01819                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
01820      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01821 *
01822 *                    Copy R from A to VT, zeroing out below it
01823 *
01824                      CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
01825                      IF( N.GT.1 )
01826      $                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
01827      $                               VT( 2, 1 ), LDVT )
01828                      IE = ITAU
01829                      ITAUQ = IE + N
01830                      ITAUP = ITAUQ + N
01831                      IWORK = ITAUP + N
01832 *
01833 *                    Bidiagonalize R in VT
01834 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
01835 *
01836                      CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
01837      $                            WORK( ITAUQ ), WORK( ITAUP ),
01838      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01839 *
01840 *                    Multiply Q in U by left bidiagonalizing vectors
01841 *                    in VT
01842 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
01843 *
01844                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
01845      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
01846      $                            LWORK-IWORK+1, IERR )
01847 *
01848 *                    Generate right bidiagonalizing vectors in VT
01849 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01850 *
01851                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01852      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
01853                      IWORK = IE + N
01854 *
01855 *                    Perform bidiagonal QR iteration, computing left
01856 *                    singular vectors of A in U and computing right
01857 *                    singular vectors of A in VT
01858 *                    (Workspace: need BDSPAC)
01859 *
01860                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
01861      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
01862      $                            INFO )
01863 *
01864                   END IF
01865 *
01866                END IF
01867 *
01868             END IF
01869 *
01870          ELSE
01871 *
01872 *           M .LT. MNTHR
01873 *
01874 *           Path 10 (M at least N, but not much larger)
01875 *           Reduce to bidiagonal form without QR decomposition
01876 *
01877             IE = 1
01878             ITAUQ = IE + N
01879             ITAUP = ITAUQ + N
01880             IWORK = ITAUP + N
01881 *
01882 *           Bidiagonalize A
01883 *           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
01884 *
01885             CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
01886      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
01887      $                   IERR )
01888             IF( WNTUAS ) THEN
01889 *
01890 *              If left singular vectors desired in U, copy result to U
01891 *              and generate left bidiagonalizing vectors in U
01892 *              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
01893 *
01894                CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
01895                IF( WNTUS )
01896      $            NCU = N
01897                IF( WNTUA )
01898      $            NCU = M
01899                CALL SORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
01900      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
01901             END IF
01902             IF( WNTVAS ) THEN
01903 *
01904 *              If right singular vectors desired in VT, copy result to
01905 *              VT and generate right bidiagonalizing vectors in VT
01906 *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01907 *
01908                CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
01909                CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
01910      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
01911             END IF
01912             IF( WNTUO ) THEN
01913 *
01914 *              If left singular vectors desired in A, generate left
01915 *              bidiagonalizing vectors in A
01916 *              (Workspace: need 4*N, prefer 3*N+N*NB)
01917 *
01918                CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
01919      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
01920             END IF
01921             IF( WNTVO ) THEN
01922 *
01923 *              If right singular vectors desired in A, generate right
01924 *              bidiagonalizing vectors in A
01925 *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
01926 *
01927                CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
01928      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
01929             END IF
01930             IWORK = IE + N
01931             IF( WNTUAS .OR. WNTUO )
01932      $         NRU = M
01933             IF( WNTUN )
01934      $         NRU = 0
01935             IF( WNTVAS .OR. WNTVO )
01936      $         NCVT = N
01937             IF( WNTVN )
01938      $         NCVT = 0
01939             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
01940 *
01941 *              Perform bidiagonal QR iteration, if desired, computing
01942 *              left singular vectors in U and computing right singular
01943 *              vectors in VT
01944 *              (Workspace: need BDSPAC)
01945 *
01946                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
01947      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
01948             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
01949 *
01950 *              Perform bidiagonal QR iteration, if desired, computing
01951 *              left singular vectors in U and computing right singular
01952 *              vectors in A
01953 *              (Workspace: need BDSPAC)
01954 *
01955                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
01956      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
01957             ELSE
01958 *
01959 *              Perform bidiagonal QR iteration, if desired, computing
01960 *              left singular vectors in A and computing right singular
01961 *              vectors in VT
01962 *              (Workspace: need BDSPAC)
01963 *
01964                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
01965      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
01966             END IF
01967 *
01968          END IF
01969 *
01970       ELSE
01971 *
01972 *        A has more columns than rows. If A has sufficiently more
01973 *        columns than rows, first reduce using the LQ decomposition (if
01974 *        sufficient workspace available)
01975 *
01976          IF( N.GE.MNTHR ) THEN
01977 *
01978             IF( WNTVN ) THEN
01979 *
01980 *              Path 1t(N much larger than M, JOBVT='N')
01981 *              No right singular vectors to be computed
01982 *
01983                ITAU = 1
01984                IWORK = ITAU + M
01985 *
01986 *              Compute A=L*Q
01987 *              (Workspace: need 2*M, prefer M+M*NB)
01988 *
01989                CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
01990      $                      LWORK-IWORK+1, IERR )
01991 *
01992 *              Zero out above L
01993 *
01994                CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
01995                IE = 1
01996                ITAUQ = IE + M
01997                ITAUP = ITAUQ + M
01998                IWORK = ITAUP + M
01999 *
02000 *              Bidiagonalize L in A
02001 *              (Workspace: need 4*M, prefer 3*M+2*M*NB)
02002 *
02003                CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
02004      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
02005      $                      IERR )
02006                IF( WNTUO .OR. WNTUAS ) THEN
02007 *
02008 *                 If left singular vectors desired, generate Q
02009 *                 (Workspace: need 4*M, prefer 3*M+M*NB)
02010 *
02011                   CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
02012      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02013                END IF
02014                IWORK = IE + M
02015                NRU = 0
02016                IF( WNTUO .OR. WNTUAS )
02017      $            NRU = M
02018 *
02019 *              Perform bidiagonal QR iteration, computing left singular
02020 *              vectors of A in A if desired
02021 *              (Workspace: need BDSPAC)
02022 *
02023                CALL SBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
02024      $                      LDA, DUM, 1, WORK( IWORK ), INFO )
02025 *
02026 *              If left singular vectors desired in U, copy them there
02027 *
02028                IF( WNTUAS )
02029      $            CALL SLACPY( 'F', M, M, A, LDA, U, LDU )
02030 *
02031             ELSE IF( WNTVO .AND. WNTUN ) THEN
02032 *
02033 *              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
02034 *              M right singular vectors to be overwritten on A and
02035 *              no left singular vectors to be computed
02036 *
02037                IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
02038 *
02039 *                 Sufficient workspace for a fast algorithm
02040 *
02041                   IR = 1
02042                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
02043 *
02044 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
02045 *
02046                      LDWRKU = LDA
02047                      CHUNK = N
02048                      LDWRKR = LDA
02049                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
02050 *
02051 *                    WORK(IU) is LDA by N and WORK(IR) is M by M
02052 *
02053                      LDWRKU = LDA
02054                      CHUNK = N
02055                      LDWRKR = M
02056                   ELSE
02057 *
02058 *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
02059 *
02060                      LDWRKU = M
02061                      CHUNK = ( LWORK-M*M-M ) / M
02062                      LDWRKR = M
02063                   END IF
02064                   ITAU = IR + LDWRKR*M
02065                   IWORK = ITAU + M
02066 *
02067 *                 Compute A=L*Q
02068 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02069 *
02070                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02071      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02072 *
02073 *                 Copy L to WORK(IR) and zero out above it
02074 *
02075                   CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
02076                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02077      $                         WORK( IR+LDWRKR ), LDWRKR )
02078 *
02079 *                 Generate Q in A
02080 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02081 *
02082                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02083      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02084                   IE = ITAU
02085                   ITAUQ = IE + M
02086                   ITAUP = ITAUQ + M
02087                   IWORK = ITAUP + M
02088 *
02089 *                 Bidiagonalize L in WORK(IR)
02090 *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
02091 *
02092                   CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
02093      $                         WORK( ITAUQ ), WORK( ITAUP ),
02094      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02095 *
02096 *                 Generate right vectors bidiagonalizing L
02097 *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
02098 *
02099                   CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
02100      $                         WORK( ITAUP ), WORK( IWORK ),
02101      $                         LWORK-IWORK+1, IERR )
02102                   IWORK = IE + M
02103 *
02104 *                 Perform bidiagonal QR iteration, computing right
02105 *                 singular vectors of L in WORK(IR)
02106 *                 (Workspace: need M*M+BDSPAC)
02107 *
02108                   CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
02109      $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
02110      $                         WORK( IWORK ), INFO )
02111                   IU = IE + M
02112 *
02113 *                 Multiply right singular vectors of L in WORK(IR) by Q
02114 *                 in A, storing result in WORK(IU) and copying to A
02115 *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
02116 *
02117                   DO 30 I = 1, N, CHUNK
02118                      BLK = MIN( N-I+1, CHUNK )
02119                      CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
02120      $                           LDWRKR, A( 1, I ), LDA, ZERO,
02121      $                           WORK( IU ), LDWRKU )
02122                      CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
02123      $                            A( 1, I ), LDA )
02124    30             CONTINUE
02125 *
02126                ELSE
02127 *
02128 *                 Insufficient workspace for a fast algorithm
02129 *
02130                   IE = 1
02131                   ITAUQ = IE + M
02132                   ITAUP = ITAUQ + M
02133                   IWORK = ITAUP + M
02134 *
02135 *                 Bidiagonalize A
02136 *                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
02137 *
02138                   CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
02139      $                         WORK( ITAUQ ), WORK( ITAUP ),
02140      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02141 *
02142 *                 Generate right vectors bidiagonalizing A
02143 *                 (Workspace: need 4*M, prefer 3*M+M*NB)
02144 *
02145                   CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
02146      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02147                   IWORK = IE + M
02148 *
02149 *                 Perform bidiagonal QR iteration, computing right
02150 *                 singular vectors of A in A
02151 *                 (Workspace: need BDSPAC)
02152 *
02153                   CALL SBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
02154      $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
02155 *
02156                END IF
02157 *
02158             ELSE IF( WNTVO .AND. WNTUAS ) THEN
02159 *
02160 *              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
02161 *              M right singular vectors to be overwritten on A and
02162 *              M left singular vectors to be computed in U
02163 *
02164                IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
02165 *
02166 *                 Sufficient workspace for a fast algorithm
02167 *
02168                   IR = 1
02169                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
02170 *
02171 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
02172 *
02173                      LDWRKU = LDA
02174                      CHUNK = N
02175                      LDWRKR = LDA
02176                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
02177 *
02178 *                    WORK(IU) is LDA by N and WORK(IR) is M by M
02179 *
02180                      LDWRKU = LDA
02181                      CHUNK = N
02182                      LDWRKR = M
02183                   ELSE
02184 *
02185 *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
02186 *
02187                      LDWRKU = M
02188                      CHUNK = ( LWORK-M*M-M ) / M
02189                      LDWRKR = M
02190                   END IF
02191                   ITAU = IR + LDWRKR*M
02192                   IWORK = ITAU + M
02193 *
02194 *                 Compute A=L*Q
02195 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02196 *
02197                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02198      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02199 *
02200 *                 Copy L to U, zeroing about above it
02201 *
02202                   CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
02203                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
02204      $                         LDU )
02205 *
02206 *                 Generate Q in A
02207 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02208 *
02209                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02210      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02211                   IE = ITAU
02212                   ITAUQ = IE + M
02213                   ITAUP = ITAUQ + M
02214                   IWORK = ITAUP + M
02215 *
02216 *                 Bidiagonalize L in U, copying result to WORK(IR)
02217 *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
02218 *
02219                   CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
02220      $                         WORK( ITAUQ ), WORK( ITAUP ),
02221      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02222                   CALL SLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
02223 *
02224 *                 Generate right vectors bidiagonalizing L in WORK(IR)
02225 *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
02226 *
02227                   CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
02228      $                         WORK( ITAUP ), WORK( IWORK ),
02229      $                         LWORK-IWORK+1, IERR )
02230 *
02231 *                 Generate left vectors bidiagonalizing L in U
02232 *                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
02233 *
02234                   CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
02235      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02236                   IWORK = IE + M
02237 *
02238 *                 Perform bidiagonal QR iteration, computing left
02239 *                 singular vectors of L in U, and computing right
02240 *                 singular vectors of L in WORK(IR)
02241 *                 (Workspace: need M*M+BDSPAC)
02242 *
02243                   CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
02244      $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
02245      $                         WORK( IWORK ), INFO )
02246                   IU = IE + M
02247 *
02248 *                 Multiply right singular vectors of L in WORK(IR) by Q
02249 *                 in A, storing result in WORK(IU) and copying to A
02250 *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
02251 *
02252                   DO 40 I = 1, N, CHUNK
02253                      BLK = MIN( N-I+1, CHUNK )
02254                      CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
02255      $                           LDWRKR, A( 1, I ), LDA, ZERO,
02256      $                           WORK( IU ), LDWRKU )
02257                      CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
02258      $                            A( 1, I ), LDA )
02259    40             CONTINUE
02260 *
02261                ELSE
02262 *
02263 *                 Insufficient workspace for a fast algorithm
02264 *
02265                   ITAU = 1
02266                   IWORK = ITAU + M
02267 *
02268 *                 Compute A=L*Q
02269 *                 (Workspace: need 2*M, prefer M+M*NB)
02270 *
02271                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02272      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02273 *
02274 *                 Copy L to U, zeroing out above it
02275 *
02276                   CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
02277                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
02278      $                         LDU )
02279 *
02280 *                 Generate Q in A
02281 *                 (Workspace: need 2*M, prefer M+M*NB)
02282 *
02283                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02284      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02285                   IE = ITAU
02286                   ITAUQ = IE + M
02287                   ITAUP = ITAUQ + M
02288                   IWORK = ITAUP + M
02289 *
02290 *                 Bidiagonalize L in U
02291 *                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
02292 *
02293                   CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
02294      $                         WORK( ITAUQ ), WORK( ITAUP ),
02295      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02296 *
02297 *                 Multiply right vectors bidiagonalizing L by Q in A
02298 *                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
02299 *
02300                   CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
02301      $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
02302      $                         LWORK-IWORK+1, IERR )
02303 *
02304 *                 Generate left vectors bidiagonalizing L in U
02305 *                 (Workspace: need 4*M, prefer 3*M+M*NB)
02306 *
02307                   CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
02308      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
02309                   IWORK = IE + M
02310 *
02311 *                 Perform bidiagonal QR iteration, computing left
02312 *                 singular vectors of A in U and computing right
02313 *                 singular vectors of A in A
02314 *                 (Workspace: need BDSPAC)
02315 *
02316                   CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
02317      $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
02318 *
02319                END IF
02320 *
02321             ELSE IF( WNTVS ) THEN
02322 *
02323                IF( WNTUN ) THEN
02324 *
02325 *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
02326 *                 M right singular vectors to be computed in VT and
02327 *                 no left singular vectors to be computed
02328 *
02329                   IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
02330 *
02331 *                    Sufficient workspace for a fast algorithm
02332 *
02333                      IR = 1
02334                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
02335 *
02336 *                       WORK(IR) is LDA by M
02337 *
02338                         LDWRKR = LDA
02339                      ELSE
02340 *
02341 *                       WORK(IR) is M by M
02342 *
02343                         LDWRKR = M
02344                      END IF
02345                      ITAU = IR + LDWRKR*M
02346                      IWORK = ITAU + M
02347 *
02348 *                    Compute A=L*Q
02349 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02350 *
02351                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02352      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02353 *
02354 *                    Copy L to WORK(IR), zeroing out above it
02355 *
02356                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
02357      $                            LDWRKR )
02358                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02359      $                            WORK( IR+LDWRKR ), LDWRKR )
02360 *
02361 *                    Generate Q in A
02362 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02363 *
02364                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02365      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02366                      IE = ITAU
02367                      ITAUQ = IE + M
02368                      ITAUP = ITAUQ + M
02369                      IWORK = ITAUP + M
02370 *
02371 *                    Bidiagonalize L in WORK(IR)
02372 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
02373 *
02374                      CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
02375      $                            WORK( IE ), WORK( ITAUQ ),
02376      $                            WORK( ITAUP ), WORK( IWORK ),
02377      $                            LWORK-IWORK+1, IERR )
02378 *
02379 *                    Generate right vectors bidiagonalizing L in
02380 *                    WORK(IR)
02381 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
02382 *
02383                      CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
02384      $                            WORK( ITAUP ), WORK( IWORK ),
02385      $                            LWORK-IWORK+1, IERR )
02386                      IWORK = IE + M
02387 *
02388 *                    Perform bidiagonal QR iteration, computing right
02389 *                    singular vectors of L in WORK(IR)
02390 *                    (Workspace: need M*M+BDSPAC)
02391 *
02392                      CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
02393      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
02394      $                            WORK( IWORK ), INFO )
02395 *
02396 *                    Multiply right singular vectors of L in WORK(IR) by
02397 *                    Q in A, storing result in VT
02398 *                    (Workspace: need M*M)
02399 *
02400                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
02401      $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
02402 *
02403                   ELSE
02404 *
02405 *                    Insufficient workspace for a fast algorithm
02406 *
02407                      ITAU = 1
02408                      IWORK = ITAU + M
02409 *
02410 *                    Compute A=L*Q
02411 *                    (Workspace: need 2*M, prefer M+M*NB)
02412 *
02413                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02414      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02415 *
02416 *                    Copy result to VT
02417 *
02418                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02419 *
02420 *                    Generate Q in VT
02421 *                    (Workspace: need 2*M, prefer M+M*NB)
02422 *
02423                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
02424      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02425                      IE = ITAU
02426                      ITAUQ = IE + M
02427                      ITAUP = ITAUQ + M
02428                      IWORK = ITAUP + M
02429 *
02430 *                    Zero out above L in A
02431 *
02432                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
02433      $                            LDA )
02434 *
02435 *                    Bidiagonalize L in A
02436 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
02437 *
02438                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
02439      $                            WORK( ITAUQ ), WORK( ITAUP ),
02440      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02441 *
02442 *                    Multiply right vectors bidiagonalizing L by Q in VT
02443 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
02444 *
02445                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
02446      $                            WORK( ITAUP ), VT, LDVT,
02447      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02448                      IWORK = IE + M
02449 *
02450 *                    Perform bidiagonal QR iteration, computing right
02451 *                    singular vectors of A in VT
02452 *                    (Workspace: need BDSPAC)
02453 *
02454                      CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
02455      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
02456      $                            INFO )
02457 *
02458                   END IF
02459 *
02460                ELSE IF( WNTUO ) THEN
02461 *
02462 *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
02463 *                 M right singular vectors to be computed in VT and
02464 *                 M left singular vectors to be overwritten on A
02465 *
02466                   IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
02467 *
02468 *                    Sufficient workspace for a fast algorithm
02469 *
02470                      IU = 1
02471                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
02472 *
02473 *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
02474 *
02475                         LDWRKU = LDA
02476                         IR = IU + LDWRKU*M
02477                         LDWRKR = LDA
02478                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
02479 *
02480 *                       WORK(IU) is LDA by M and WORK(IR) is M by M
02481 *
02482                         LDWRKU = LDA
02483                         IR = IU + LDWRKU*M
02484                         LDWRKR = M
02485                      ELSE
02486 *
02487 *                       WORK(IU) is M by M and WORK(IR) is M by M
02488 *
02489                         LDWRKU = M
02490                         IR = IU + LDWRKU*M
02491                         LDWRKR = M
02492                      END IF
02493                      ITAU = IR + LDWRKR*M
02494                      IWORK = ITAU + M
02495 *
02496 *                    Compute A=L*Q
02497 *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
02498 *
02499                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02500      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02501 *
02502 *                    Copy L to WORK(IU), zeroing out below it
02503 *
02504                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
02505      $                            LDWRKU )
02506                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02507      $                            WORK( IU+LDWRKU ), LDWRKU )
02508 *
02509 *                    Generate Q in A
02510 *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
02511 *
02512                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02513      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02514                      IE = ITAU
02515                      ITAUQ = IE + M
02516                      ITAUP = ITAUQ + M
02517                      IWORK = ITAUP + M
02518 *
02519 *                    Bidiagonalize L in WORK(IU), copying result to
02520 *                    WORK(IR)
02521 *                    (Workspace: need 2*M*M+4*M,
02522 *                                prefer 2*M*M+3*M+2*M*NB)
02523 *
02524                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
02525      $                            WORK( IE ), WORK( ITAUQ ),
02526      $                            WORK( ITAUP ), WORK( IWORK ),
02527      $                            LWORK-IWORK+1, IERR )
02528                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
02529      $                            WORK( IR ), LDWRKR )
02530 *
02531 *                    Generate right bidiagonalizing vectors in WORK(IU)
02532 *                    (Workspace: need 2*M*M+4*M-1,
02533 *                                prefer 2*M*M+3*M+(M-1)*NB)
02534 *
02535                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
02536      $                            WORK( ITAUP ), WORK( IWORK ),
02537      $                            LWORK-IWORK+1, IERR )
02538 *
02539 *                    Generate left bidiagonalizing vectors in WORK(IR)
02540 *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
02541 *
02542                      CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
02543      $                            WORK( ITAUQ ), WORK( IWORK ),
02544      $                            LWORK-IWORK+1, IERR )
02545                      IWORK = IE + M
02546 *
02547 *                    Perform bidiagonal QR iteration, computing left
02548 *                    singular vectors of L in WORK(IR) and computing
02549 *                    right singular vectors of L in WORK(IU)
02550 *                    (Workspace: need 2*M*M+BDSPAC)
02551 *
02552                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
02553      $                            WORK( IU ), LDWRKU, WORK( IR ),
02554      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
02555 *
02556 *                    Multiply right singular vectors of L in WORK(IU) by
02557 *                    Q in A, storing result in VT
02558 *                    (Workspace: need M*M)
02559 *
02560                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
02561      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
02562 *
02563 *                    Copy left singular vectors of L to A
02564 *                    (Workspace: need M*M)
02565 *
02566                      CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
02567      $                            LDA )
02568 *
02569                   ELSE
02570 *
02571 *                    Insufficient workspace for a fast algorithm
02572 *
02573                      ITAU = 1
02574                      IWORK = ITAU + M
02575 *
02576 *                    Compute A=L*Q, copying result to VT
02577 *                    (Workspace: need 2*M, prefer M+M*NB)
02578 *
02579                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02580      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02581                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02582 *
02583 *                    Generate Q in VT
02584 *                    (Workspace: need 2*M, prefer M+M*NB)
02585 *
02586                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
02587      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02588                      IE = ITAU
02589                      ITAUQ = IE + M
02590                      ITAUP = ITAUQ + M
02591                      IWORK = ITAUP + M
02592 *
02593 *                    Zero out above L in A
02594 *
02595                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
02596      $                            LDA )
02597 *
02598 *                    Bidiagonalize L in A
02599 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
02600 *
02601                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
02602      $                            WORK( ITAUQ ), WORK( ITAUP ),
02603      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02604 *
02605 *                    Multiply right vectors bidiagonalizing L by Q in VT
02606 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
02607 *
02608                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
02609      $                            WORK( ITAUP ), VT, LDVT,
02610      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02611 *
02612 *                    Generate left bidiagonalizing vectors of L in A
02613 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
02614 *
02615                      CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
02616      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02617                      IWORK = IE + M
02618 *
02619 *                    Perform bidiagonal QR iteration, compute left
02620 *                    singular vectors of A in A and compute right
02621 *                    singular vectors of A in VT
02622 *                    (Workspace: need BDSPAC)
02623 *
02624                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
02625      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
02626      $                            INFO )
02627 *
02628                   END IF
02629 *
02630                ELSE IF( WNTUAS ) THEN
02631 *
02632 *                 Path 6t(N much larger than M, JOBU='S' or 'A',
02633 *                         JOBVT='S')
02634 *                 M right singular vectors to be computed in VT and
02635 *                 M left singular vectors to be computed in U
02636 *
02637                   IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
02638 *
02639 *                    Sufficient workspace for a fast algorithm
02640 *
02641                      IU = 1
02642                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
02643 *
02644 *                       WORK(IU) is LDA by N
02645 *
02646                         LDWRKU = LDA
02647                      ELSE
02648 *
02649 *                       WORK(IU) is LDA by M
02650 *
02651                         LDWRKU = M
02652                      END IF
02653                      ITAU = IU + LDWRKU*M
02654                      IWORK = ITAU + M
02655 *
02656 *                    Compute A=L*Q
02657 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02658 *
02659                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02660      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02661 *
02662 *                    Copy L to WORK(IU), zeroing out above it
02663 *
02664                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
02665      $                            LDWRKU )
02666                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02667      $                            WORK( IU+LDWRKU ), LDWRKU )
02668 *
02669 *                    Generate Q in A
02670 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02671 *
02672                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
02673      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02674                      IE = ITAU
02675                      ITAUQ = IE + M
02676                      ITAUP = ITAUQ + M
02677                      IWORK = ITAUP + M
02678 *
02679 *                    Bidiagonalize L in WORK(IU), copying result to U
02680 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
02681 *
02682                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
02683      $                            WORK( IE ), WORK( ITAUQ ),
02684      $                            WORK( ITAUP ), WORK( IWORK ),
02685      $                            LWORK-IWORK+1, IERR )
02686                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
02687      $                            LDU )
02688 *
02689 *                    Generate right bidiagonalizing vectors in WORK(IU)
02690 *                    (Workspace: need M*M+4*M-1,
02691 *                                prefer M*M+3*M+(M-1)*NB)
02692 *
02693                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
02694      $                            WORK( ITAUP ), WORK( IWORK ),
02695      $                            LWORK-IWORK+1, IERR )
02696 *
02697 *                    Generate left bidiagonalizing vectors in U
02698 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
02699 *
02700                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
02701      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02702                      IWORK = IE + M
02703 *
02704 *                    Perform bidiagonal QR iteration, computing left
02705 *                    singular vectors of L in U and computing right
02706 *                    singular vectors of L in WORK(IU)
02707 *                    (Workspace: need M*M+BDSPAC)
02708 *
02709                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
02710      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
02711      $                            WORK( IWORK ), INFO )
02712 *
02713 *                    Multiply right singular vectors of L in WORK(IU) by
02714 *                    Q in A, storing result in VT
02715 *                    (Workspace: need M*M)
02716 *
02717                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
02718      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
02719 *
02720                   ELSE
02721 *
02722 *                    Insufficient workspace for a fast algorithm
02723 *
02724                      ITAU = 1
02725                      IWORK = ITAU + M
02726 *
02727 *                    Compute A=L*Q, copying result to VT
02728 *                    (Workspace: need 2*M, prefer M+M*NB)
02729 *
02730                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02731      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02732                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02733 *
02734 *                    Generate Q in VT
02735 *                    (Workspace: need 2*M, prefer M+M*NB)
02736 *
02737                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
02738      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02739 *
02740 *                    Copy L to U, zeroing out above it
02741 *
02742                      CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
02743                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
02744      $                            LDU )
02745                      IE = ITAU
02746                      ITAUQ = IE + M
02747                      ITAUP = ITAUQ + M
02748                      IWORK = ITAUP + M
02749 *
02750 *                    Bidiagonalize L in U
02751 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
02752 *
02753                      CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
02754      $                            WORK( ITAUQ ), WORK( ITAUP ),
02755      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02756 *
02757 *                    Multiply right bidiagonalizing vectors in U by Q
02758 *                    in VT
02759 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
02760 *
02761                      CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
02762      $                            WORK( ITAUP ), VT, LDVT,
02763      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02764 *
02765 *                    Generate left bidiagonalizing vectors in U
02766 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
02767 *
02768                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
02769      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02770                      IWORK = IE + M
02771 *
02772 *                    Perform bidiagonal QR iteration, computing left
02773 *                    singular vectors of A in U and computing right
02774 *                    singular vectors of A in VT
02775 *                    (Workspace: need BDSPAC)
02776 *
02777                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
02778      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
02779      $                            INFO )
02780 *
02781                   END IF
02782 *
02783                END IF
02784 *
02785             ELSE IF( WNTVA ) THEN
02786 *
02787                IF( WNTUN ) THEN
02788 *
02789 *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
02790 *                 N right singular vectors to be computed in VT and
02791 *                 no left singular vectors to be computed
02792 *
02793                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
02794 *
02795 *                    Sufficient workspace for a fast algorithm
02796 *
02797                      IR = 1
02798                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
02799 *
02800 *                       WORK(IR) is LDA by M
02801 *
02802                         LDWRKR = LDA
02803                      ELSE
02804 *
02805 *                       WORK(IR) is M by M
02806 *
02807                         LDWRKR = M
02808                      END IF
02809                      ITAU = IR + LDWRKR*M
02810                      IWORK = ITAU + M
02811 *
02812 *                    Compute A=L*Q, copying result to VT
02813 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
02814 *
02815                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02816      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02817                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02818 *
02819 *                    Copy L to WORK(IR), zeroing out above it
02820 *
02821                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
02822      $                            LDWRKR )
02823                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02824      $                            WORK( IR+LDWRKR ), LDWRKR )
02825 *
02826 *                    Generate Q in VT
02827 *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
02828 *
02829                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
02830      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02831                      IE = ITAU
02832                      ITAUQ = IE + M
02833                      ITAUP = ITAUQ + M
02834                      IWORK = ITAUP + M
02835 *
02836 *                    Bidiagonalize L in WORK(IR)
02837 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
02838 *
02839                      CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
02840      $                            WORK( IE ), WORK( ITAUQ ),
02841      $                            WORK( ITAUP ), WORK( IWORK ),
02842      $                            LWORK-IWORK+1, IERR )
02843 *
02844 *                    Generate right bidiagonalizing vectors in WORK(IR)
02845 *                    (Workspace: need M*M+4*M-1,
02846 *                                prefer M*M+3*M+(M-1)*NB)
02847 *
02848                      CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
02849      $                            WORK( ITAUP ), WORK( IWORK ),
02850      $                            LWORK-IWORK+1, IERR )
02851                      IWORK = IE + M
02852 *
02853 *                    Perform bidiagonal QR iteration, computing right
02854 *                    singular vectors of L in WORK(IR)
02855 *                    (Workspace: need M*M+BDSPAC)
02856 *
02857                      CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
02858      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
02859      $                            WORK( IWORK ), INFO )
02860 *
02861 *                    Multiply right singular vectors of L in WORK(IR) by
02862 *                    Q in VT, storing result in A
02863 *                    (Workspace: need M*M)
02864 *
02865                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
02866      $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
02867 *
02868 *                    Copy right singular vectors of A from A to VT
02869 *
02870                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
02871 *
02872                   ELSE
02873 *
02874 *                    Insufficient workspace for a fast algorithm
02875 *
02876                      ITAU = 1
02877                      IWORK = ITAU + M
02878 *
02879 *                    Compute A=L*Q, copying result to VT
02880 *                    (Workspace: need 2*M, prefer M+M*NB)
02881 *
02882                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02883      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02884                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02885 *
02886 *                    Generate Q in VT
02887 *                    (Workspace: need M+N, prefer M+N*NB)
02888 *
02889                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
02890      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02891                      IE = ITAU
02892                      ITAUQ = IE + M
02893                      ITAUP = ITAUQ + M
02894                      IWORK = ITAUP + M
02895 *
02896 *                    Zero out above L in A
02897 *
02898                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
02899      $                            LDA )
02900 *
02901 *                    Bidiagonalize L in A
02902 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
02903 *
02904                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
02905      $                            WORK( ITAUQ ), WORK( ITAUP ),
02906      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02907 *
02908 *                    Multiply right bidiagonalizing vectors in A by Q
02909 *                    in VT
02910 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
02911 *
02912                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
02913      $                            WORK( ITAUP ), VT, LDVT,
02914      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02915                      IWORK = IE + M
02916 *
02917 *                    Perform bidiagonal QR iteration, computing right
02918 *                    singular vectors of A in VT
02919 *                    (Workspace: need BDSPAC)
02920 *
02921                      CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
02922      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
02923      $                            INFO )
02924 *
02925                   END IF
02926 *
02927                ELSE IF( WNTUO ) THEN
02928 *
02929 *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
02930 *                 N right singular vectors to be computed in VT and
02931 *                 M left singular vectors to be overwritten on A
02932 *
02933                   IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
02934 *
02935 *                    Sufficient workspace for a fast algorithm
02936 *
02937                      IU = 1
02938                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
02939 *
02940 *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
02941 *
02942                         LDWRKU = LDA
02943                         IR = IU + LDWRKU*M
02944                         LDWRKR = LDA
02945                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
02946 *
02947 *                       WORK(IU) is LDA by M and WORK(IR) is M by M
02948 *
02949                         LDWRKU = LDA
02950                         IR = IU + LDWRKU*M
02951                         LDWRKR = M
02952                      ELSE
02953 *
02954 *                       WORK(IU) is M by M and WORK(IR) is M by M
02955 *
02956                         LDWRKU = M
02957                         IR = IU + LDWRKU*M
02958                         LDWRKR = M
02959                      END IF
02960                      ITAU = IR + LDWRKR*M
02961                      IWORK = ITAU + M
02962 *
02963 *                    Compute A=L*Q, copying result to VT
02964 *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
02965 *
02966                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
02967      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02968                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
02969 *
02970 *                    Generate Q in VT
02971 *                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
02972 *
02973                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
02974      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
02975 *
02976 *                    Copy L to WORK(IU), zeroing out above it
02977 *
02978                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
02979      $                            LDWRKU )
02980                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
02981      $                            WORK( IU+LDWRKU ), LDWRKU )
02982                      IE = ITAU
02983                      ITAUQ = IE + M
02984                      ITAUP = ITAUQ + M
02985                      IWORK = ITAUP + M
02986 *
02987 *                    Bidiagonalize L in WORK(IU), copying result to
02988 *                    WORK(IR)
02989 *                    (Workspace: need 2*M*M+4*M,
02990 *                                prefer 2*M*M+3*M+2*M*NB)
02991 *
02992                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
02993      $                            WORK( IE ), WORK( ITAUQ ),
02994      $                            WORK( ITAUP ), WORK( IWORK ),
02995      $                            LWORK-IWORK+1, IERR )
02996                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
02997      $                            WORK( IR ), LDWRKR )
02998 *
02999 *                    Generate right bidiagonalizing vectors in WORK(IU)
03000 *                    (Workspace: need 2*M*M+4*M-1,
03001 *                                prefer 2*M*M+3*M+(M-1)*NB)
03002 *
03003                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
03004      $                            WORK( ITAUP ), WORK( IWORK ),
03005      $                            LWORK-IWORK+1, IERR )
03006 *
03007 *                    Generate left bidiagonalizing vectors in WORK(IR)
03008 *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
03009 *
03010                      CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
03011      $                            WORK( ITAUQ ), WORK( IWORK ),
03012      $                            LWORK-IWORK+1, IERR )
03013                      IWORK = IE + M
03014 *
03015 *                    Perform bidiagonal QR iteration, computing left
03016 *                    singular vectors of L in WORK(IR) and computing
03017 *                    right singular vectors of L in WORK(IU)
03018 *                    (Workspace: need 2*M*M+BDSPAC)
03019 *
03020                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
03021      $                            WORK( IU ), LDWRKU, WORK( IR ),
03022      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
03023 *
03024 *                    Multiply right singular vectors of L in WORK(IU) by
03025 *                    Q in VT, storing result in A
03026 *                    (Workspace: need M*M)
03027 *
03028                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
03029      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
03030 *
03031 *                    Copy right singular vectors of A from A to VT
03032 *
03033                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
03034 *
03035 *                    Copy left singular vectors of A from WORK(IR) to A
03036 *
03037                      CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
03038      $                            LDA )
03039 *
03040                   ELSE
03041 *
03042 *                    Insufficient workspace for a fast algorithm
03043 *
03044                      ITAU = 1
03045                      IWORK = ITAU + M
03046 *
03047 *                    Compute A=L*Q, copying result to VT
03048 *                    (Workspace: need 2*M, prefer M+M*NB)
03049 *
03050                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
03051      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03052                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
03053 *
03054 *                    Generate Q in VT
03055 *                    (Workspace: need M+N, prefer M+N*NB)
03056 *
03057                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
03058      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03059                      IE = ITAU
03060                      ITAUQ = IE + M
03061                      ITAUP = ITAUQ + M
03062                      IWORK = ITAUP + M
03063 *
03064 *                    Zero out above L in A
03065 *
03066                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
03067      $                            LDA )
03068 *
03069 *                    Bidiagonalize L in A
03070 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
03071 *
03072                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
03073      $                            WORK( ITAUQ ), WORK( ITAUP ),
03074      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03075 *
03076 *                    Multiply right bidiagonalizing vectors in A by Q
03077 *                    in VT
03078 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
03079 *
03080                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
03081      $                            WORK( ITAUP ), VT, LDVT,
03082      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03083 *
03084 *                    Generate left bidiagonalizing vectors in A
03085 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
03086 *
03087                      CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
03088      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03089                      IWORK = IE + M
03090 *
03091 *                    Perform bidiagonal QR iteration, computing left
03092 *                    singular vectors of A in A and computing right
03093 *                    singular vectors of A in VT
03094 *                    (Workspace: need BDSPAC)
03095 *
03096                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
03097      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
03098      $                            INFO )
03099 *
03100                   END IF
03101 *
03102                ELSE IF( WNTUAS ) THEN
03103 *
03104 *                 Path 9t(N much larger than M, JOBU='S' or 'A',
03105 *                         JOBVT='A')
03106 *                 N right singular vectors to be computed in VT and
03107 *                 M left singular vectors to be computed in U
03108 *
03109                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
03110 *
03111 *                    Sufficient workspace for a fast algorithm
03112 *
03113                      IU = 1
03114                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
03115 *
03116 *                       WORK(IU) is LDA by M
03117 *
03118                         LDWRKU = LDA
03119                      ELSE
03120 *
03121 *                       WORK(IU) is M by M
03122 *
03123                         LDWRKU = M
03124                      END IF
03125                      ITAU = IU + LDWRKU*M
03126                      IWORK = ITAU + M
03127 *
03128 *                    Compute A=L*Q, copying result to VT
03129 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
03130 *
03131                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
03132      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03133                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
03134 *
03135 *                    Generate Q in VT
03136 *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
03137 *
03138                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
03139      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03140 *
03141 *                    Copy L to WORK(IU), zeroing out above it
03142 *
03143                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
03144      $                            LDWRKU )
03145                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
03146      $                            WORK( IU+LDWRKU ), LDWRKU )
03147                      IE = ITAU
03148                      ITAUQ = IE + M
03149                      ITAUP = ITAUQ + M
03150                      IWORK = ITAUP + M
03151 *
03152 *                    Bidiagonalize L in WORK(IU), copying result to U
03153 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
03154 *
03155                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
03156      $                            WORK( IE ), WORK( ITAUQ ),
03157      $                            WORK( ITAUP ), WORK( IWORK ),
03158      $                            LWORK-IWORK+1, IERR )
03159                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
03160      $                            LDU )
03161 *
03162 *                    Generate right bidiagonalizing vectors in WORK(IU)
03163 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
03164 *
03165                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
03166      $                            WORK( ITAUP ), WORK( IWORK ),
03167      $                            LWORK-IWORK+1, IERR )
03168 *
03169 *                    Generate left bidiagonalizing vectors in U
03170 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
03171 *
03172                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
03173      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03174                      IWORK = IE + M
03175 *
03176 *                    Perform bidiagonal QR iteration, computing left
03177 *                    singular vectors of L in U and computing right
03178 *                    singular vectors of L in WORK(IU)
03179 *                    (Workspace: need M*M+BDSPAC)
03180 *
03181                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
03182      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
03183      $                            WORK( IWORK ), INFO )
03184 *
03185 *                    Multiply right singular vectors of L in WORK(IU) by
03186 *                    Q in VT, storing result in A
03187 *                    (Workspace: need M*M)
03188 *
03189                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
03190      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
03191 *
03192 *                    Copy right singular vectors of A from A to VT
03193 *
03194                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
03195 *
03196                   ELSE
03197 *
03198 *                    Insufficient workspace for a fast algorithm
03199 *
03200                      ITAU = 1
03201                      IWORK = ITAU + M
03202 *
03203 *                    Compute A=L*Q, copying result to VT
03204 *                    (Workspace: need 2*M, prefer M+M*NB)
03205 *
03206                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
03207      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03208                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
03209 *
03210 *                    Generate Q in VT
03211 *                    (Workspace: need M+N, prefer M+N*NB)
03212 *
03213                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
03214      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03215 *
03216 *                    Copy L to U, zeroing out above it
03217 *
03218                      CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
03219                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
03220      $                            LDU )
03221                      IE = ITAU
03222                      ITAUQ = IE + M
03223                      ITAUP = ITAUQ + M
03224                      IWORK = ITAUP + M
03225 *
03226 *                    Bidiagonalize L in U
03227 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
03228 *
03229                      CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
03230      $                            WORK( ITAUQ ), WORK( ITAUP ),
03231      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03232 *
03233 *                    Multiply right bidiagonalizing vectors in U by Q
03234 *                    in VT
03235 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
03236 *
03237                      CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
03238      $                            WORK( ITAUP ), VT, LDVT,
03239      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03240 *
03241 *                    Generate left bidiagonalizing vectors in U
03242 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
03243 *
03244                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
03245      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
03246                      IWORK = IE + M
03247 *
03248 *                    Perform bidiagonal QR iteration, computing left
03249 *                    singular vectors of A in U and computing right
03250 *                    singular vectors of A in VT
03251 *                    (Workspace: need BDSPAC)
03252 *
03253                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
03254      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
03255      $                            INFO )
03256 *
03257                   END IF
03258 *
03259                END IF
03260 *
03261             END IF
03262 *
03263          ELSE
03264 *
03265 *           N .LT. MNTHR
03266 *
03267 *           Path 10t(N greater than M, but not much larger)
03268 *           Reduce to bidiagonal form without LQ decomposition
03269 *
03270             IE = 1
03271             ITAUQ = IE + M
03272             ITAUP = ITAUQ + M
03273             IWORK = ITAUP + M
03274 *
03275 *           Bidiagonalize A
03276 *           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
03277 *
03278             CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
03279      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
03280      $                   IERR )
03281             IF( WNTUAS ) THEN
03282 *
03283 *              If left singular vectors desired in U, copy result to U
03284 *              and generate left bidiagonalizing vectors in U
03285 *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
03286 *
03287                CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
03288                CALL SORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
03289      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
03290             END IF
03291             IF( WNTVAS ) THEN
03292 *
03293 *              If right singular vectors desired in VT, copy result to
03294 *              VT and generate right bidiagonalizing vectors in VT
03295 *              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
03296 *
03297                CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
03298                IF( WNTVA )
03299      $            NRVT = N
03300                IF( WNTVS )
03301      $            NRVT = M
03302                CALL SORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
03303      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
03304             END IF
03305             IF( WNTUO ) THEN
03306 *
03307 *              If left singular vectors desired in A, generate left
03308 *              bidiagonalizing vectors in A
03309 *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
03310 *
03311                CALL SORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
03312      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
03313             END IF
03314             IF( WNTVO ) THEN
03315 *
03316 *              If right singular vectors desired in A, generate right
03317 *              bidiagonalizing vectors in A
03318 *              (Workspace: need 4*M, prefer 3*M+M*NB)
03319 *
03320                CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
03321      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
03322             END IF
03323             IWORK = IE + M
03324             IF( WNTUAS .OR. WNTUO )
03325      $         NRU = M
03326             IF( WNTUN )
03327      $         NRU = 0
03328             IF( WNTVAS .OR. WNTVO )
03329      $         NCVT = N
03330             IF( WNTVN )
03331      $         NCVT = 0
03332             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
03333 *
03334 *              Perform bidiagonal QR iteration, if desired, computing
03335 *              left singular vectors in U and computing right singular
03336 *              vectors in VT
03337 *              (Workspace: need BDSPAC)
03338 *
03339                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
03340      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
03341             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
03342 *
03343 *              Perform bidiagonal QR iteration, if desired, computing
03344 *              left singular vectors in U and computing right singular
03345 *              vectors in A
03346 *              (Workspace: need BDSPAC)
03347 *
03348                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
03349      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
03350             ELSE
03351 *
03352 *              Perform bidiagonal QR iteration, if desired, computing
03353 *              left singular vectors in A and computing right singular
03354 *              vectors in VT
03355 *              (Workspace: need BDSPAC)
03356 *
03357                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
03358      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
03359             END IF
03360 *
03361          END IF
03362 *
03363       END IF
03364 *
03365 *     If SBDSQR failed to converge, copy unconverged superdiagonals
03366 *     to WORK( 2:MINMN )
03367 *
03368       IF( INFO.NE.0 ) THEN
03369          IF( IE.GT.2 ) THEN
03370             DO 50 I = 1, MINMN - 1
03371                WORK( I+1 ) = WORK( I+IE-1 )
03372    50       CONTINUE
03373          END IF
03374          IF( IE.LT.2 ) THEN
03375             DO 60 I = MINMN - 1, 1, -1
03376                WORK( I+1 ) = WORK( I+IE-1 )
03377    60       CONTINUE
03378          END IF
03379       END IF
03380 *
03381 *     Undo scaling if necessary
03382 *
03383       IF( ISCL.EQ.1 ) THEN
03384          IF( ANRM.GT.BIGNUM )
03385      $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
03386      $                   IERR )
03387          IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
03388      $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
03389      $                   MINMN, IERR )
03390          IF( ANRM.LT.SMLNUM )
03391      $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
03392      $                   IERR )
03393          IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
03394      $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
03395      $                   MINMN, IERR )
03396       END IF
03397 *
03398 *     Return optimal workspace in WORK(1)
03399 *
03400       WORK( 1 ) = MAXWRK
03401 *
03402       RETURN
03403 *
03404 *     End of SGESVD
03405 *
03406       END
 All Files Functions