0001:       SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
0002:      $                   WORK, LWORK, INFO )
0003: *
0004: *  -- LAPACK driver routine (version 3.2) --
0005: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
0006: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
0007: *     November 2006
0008: *
0009: *     .. Scalar Arguments ..
0010:       CHARACTER          JOBU, JOBVT
0011:       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
0012: *     ..
0013: *     .. Array Arguments ..
0014:       REAL               A( LDA, * ), S( * ), U( LDU, * ),
0015:      $                   VT( LDVT, * ), WORK( * )
0016: *     ..
0017: *
0018: *  Purpose
0019: *  =======
0020: *
0021: *  SGESVD computes the singular value decomposition (SVD) of a real
0022: *  M-by-N matrix A, optionally computing the left and/or right singular
0023: *  vectors. The SVD is written
0024: *
0025: *       A = U * SIGMA * transpose(V)
0026: *
0027: *  where SIGMA is an M-by-N matrix which is zero except for its
0028: *  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
0029: *  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
0030: *  are the singular values of A; they are real and non-negative, and
0031: *  are returned in descending order.  The first min(m,n) columns of
0032: *  U and V are the left and right singular vectors of A.
0033: *
0034: *  Note that the routine returns V**T, not V.
0035: *
0036: *  Arguments
0037: *  =========
0038: *
0039: *  JOBU    (input) CHARACTER*1
0040: *          Specifies options for computing all or part of the matrix U:
0041: *          = 'A':  all M columns of U are returned in array U:
0042: *          = 'S':  the first min(m,n) columns of U (the left singular
0043: *                  vectors) are returned in the array U;
0044: *          = 'O':  the first min(m,n) columns of U (the left singular
0045: *                  vectors) are overwritten on the array A;
0046: *          = 'N':  no columns of U (no left singular vectors) are
0047: *                  computed.
0048: *
0049: *  JOBVT   (input) CHARACTER*1
0050: *          Specifies options for computing all or part of the matrix
0051: *          V**T:
0052: *          = 'A':  all N rows of V**T are returned in the array VT;
0053: *          = 'S':  the first min(m,n) rows of V**T (the right singular
0054: *                  vectors) are returned in the array VT;
0055: *          = 'O':  the first min(m,n) rows of V**T (the right singular
0056: *                  vectors) are overwritten on the array A;
0057: *          = 'N':  no rows of V**T (no right singular vectors) are
0058: *                  computed.
0059: *
0060: *          JOBVT and JOBU cannot both be 'O'.
0061: *
0062: *  M       (input) INTEGER
0063: *          The number of rows of the input matrix A.  M >= 0.
0064: *
0065: *  N       (input) INTEGER
0066: *          The number of columns of the input matrix A.  N >= 0.
0067: *
0068: *  A       (input/output) REAL array, dimension (LDA,N)
0069: *          On entry, the M-by-N matrix A.
0070: *          On exit,
0071: *          if JOBU = 'O',  A is overwritten with the first min(m,n)
0072: *                          columns of U (the left singular vectors,
0073: *                          stored columnwise);
0074: *          if JOBVT = 'O', A is overwritten with the first min(m,n)
0075: *                          rows of V**T (the right singular vectors,
0076: *                          stored rowwise);
0077: *          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
0078: *                          are destroyed.
0079: *
0080: *  LDA     (input) INTEGER
0081: *          The leading dimension of the array A.  LDA >= max(1,M).
0082: *
0083: *  S       (output) REAL array, dimension (min(M,N))
0084: *          The singular values of A, sorted so that S(i) >= S(i+1).
0085: *
0086: *  U       (output) REAL array, dimension (LDU,UCOL)
0087: *          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
0088: *          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
0089: *          if JOBU = 'S', U contains the first min(m,n) columns of U
0090: *          (the left singular vectors, stored columnwise);
0091: *          if JOBU = 'N' or 'O', U is not referenced.
0092: *
0093: *  LDU     (input) INTEGER
0094: *          The leading dimension of the array U.  LDU >= 1; if
0095: *          JOBU = 'S' or 'A', LDU >= M.
0096: *
0097: *  VT      (output) REAL array, dimension (LDVT,N)
0098: *          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
0099: *          V**T;
0100: *          if JOBVT = 'S', VT contains the first min(m,n) rows of
0101: *          V**T (the right singular vectors, stored rowwise);
0102: *          if JOBVT = 'N' or 'O', VT is not referenced.
0103: *
0104: *  LDVT    (input) INTEGER
0105: *          The leading dimension of the array VT.  LDVT >= 1; if
0106: *          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
0107: *
0108: *  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
0109: *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
0110: *          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
0111: *          superdiagonal elements of an upper bidiagonal matrix B
0112: *          whose diagonal is in S (not necessarily sorted). B
0113: *          satisfies A = U * B * VT, so it has the same singular values
0114: *          as A, and singular vectors related by U and VT.
0115: *
0116: *  LWORK   (input) INTEGER
0117: *          The dimension of the array WORK.
0118: *          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
0119: *          For good performance, LWORK should generally be larger.
0120: *
0121: *          If LWORK = -1, then a workspace query is assumed; the routine
0122: *          only calculates the optimal size of the WORK array, returns
0123: *          this value as the first entry of the WORK array, and no error
0124: *          message related to LWORK is issued by XERBLA.
0125: *
0126: *  INFO    (output) INTEGER
0127: *          = 0:  successful exit.
0128: *          < 0:  if INFO = -i, the i-th argument had an illegal value.
0129: *          > 0:  if SBDSQR did not converge, INFO specifies how many
0130: *                superdiagonals of an intermediate bidiagonal form B
0131: *                did not converge to zero. See the description of WORK
0132: *                above for details.
0133: *
0134: *  =====================================================================
0135: *
0136: *     .. Parameters ..
0137:       REAL               ZERO, ONE
0138:       PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
0139: *     ..
0140: *     .. Local Scalars ..
0141:       LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
0142:      $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
0143:       INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
0144:      $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
0145:      $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
0146:      $                   NRVT, WRKBL
0147:       REAL               ANRM, BIGNUM, EPS, SMLNUM
0148: *     ..
0149: *     .. Local Arrays ..
0150:       REAL               DUM( 1 )
0151: *     ..
0152: *     .. External Subroutines ..
0153:       EXTERNAL           SBDSQR, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY,
0154:      $                   SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR,
0155:      $                   XERBLA
0156: *     ..
0157: *     .. External Functions ..
0158:       LOGICAL            LSAME
0159:       INTEGER            ILAENV
0160:       REAL               SLAMCH, SLANGE
0161:       EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
0162: *     ..
0163: *     .. Intrinsic Functions ..
0164:       INTRINSIC          MAX, MIN, SQRT
0165: *     ..
0166: *     .. Executable Statements ..
0167: *
0168: *     Test the input arguments
0169: *
0170:       INFO = 0
0171:       MINMN = MIN( M, N )
0172:       WNTUA = LSAME( JOBU, 'A' )
0173:       WNTUS = LSAME( JOBU, 'S' )
0174:       WNTUAS = WNTUA .OR. WNTUS
0175:       WNTUO = LSAME( JOBU, 'O' )
0176:       WNTUN = LSAME( JOBU, 'N' )
0177:       WNTVA = LSAME( JOBVT, 'A' )
0178:       WNTVS = LSAME( JOBVT, 'S' )
0179:       WNTVAS = WNTVA .OR. WNTVS
0180:       WNTVO = LSAME( JOBVT, 'O' )
0181:       WNTVN = LSAME( JOBVT, 'N' )
0182:       LQUERY = ( LWORK.EQ.-1 )
0183: *
0184:       IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
0185:          INFO = -1
0186:       ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
0187:      $         ( WNTVO .AND. WNTUO ) ) THEN
0188:          INFO = -2
0189:       ELSE IF( M.LT.0 ) THEN
0190:          INFO = -3
0191:       ELSE IF( N.LT.0 ) THEN
0192:          INFO = -4
0193:       ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
0194:          INFO = -6
0195:       ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
0196:          INFO = -9
0197:       ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
0198:      $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
0199:          INFO = -11
0200:       END IF
0201: *
0202: *     Compute workspace
0203: *      (Note: Comments in the code beginning "Workspace:" describe the
0204: *       minimal amount of workspace needed at that point in the code,
0205: *       as well as the preferred amount for good performance.
0206: *       NB refers to the optimal block size for the immediately
0207: *       following subroutine, as returned by ILAENV.)
0208: *
0209:       IF( INFO.EQ.0 ) THEN
0210:          MINWRK = 1
0211:          MAXWRK = 1
0212:          IF( M.GE.N .AND. MINMN.GT.0 ) THEN
0213: *
0214: *           Compute space needed for SBDSQR
0215: *
0216:             MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
0217:             BDSPAC = 5*N
0218:             IF( M.GE.MNTHR ) THEN
0219:                IF( WNTUN ) THEN
0220: *
0221: *                 Path 1 (M much larger than N, JOBU='N')
0222: *
0223:                   MAXWRK = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1,
0224:      $                     -1 )
0225:                   MAXWRK = MAX( MAXWRK, 3*N+2*N*
0226:      $                     ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0227:                   IF( WNTVO .OR. WNTVAS )
0228:      $               MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
0229:      $                        ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0230:                   MAXWRK = MAX( MAXWRK, BDSPAC )
0231:                   MINWRK = MAX( 4*N, BDSPAC )
0232:                ELSE IF( WNTUO .AND. WNTVN ) THEN
0233: *
0234: *                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
0235: *
0236:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0237:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
0238:      $                    N, N, -1 ) )
0239:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0240:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0241:                   WRKBL = MAX( WRKBL, 3*N+N*
0242:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0243:                   WRKBL = MAX( WRKBL, BDSPAC )
0244:                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
0245:                   MINWRK = MAX( 3*N+M, BDSPAC )
0246:                ELSE IF( WNTUO .AND. WNTVAS ) THEN
0247: *
0248: *                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
0249: *                 'A')
0250: *
0251:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0252:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
0253:      $                    N, N, -1 ) )
0254:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0255:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0256:                   WRKBL = MAX( WRKBL, 3*N+N*
0257:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0258:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0259:      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0260:                   WRKBL = MAX( WRKBL, BDSPAC )
0261:                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
0262:                   MINWRK = MAX( 3*N+M, BDSPAC )
0263:                ELSE IF( WNTUS .AND. WNTVN ) THEN
0264: *
0265: *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
0266: *
0267:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0268:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
0269:      $                    N, N, -1 ) )
0270:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0271:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0272:                   WRKBL = MAX( WRKBL, 3*N+N*
0273:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0274:                   WRKBL = MAX( WRKBL, BDSPAC )
0275:                   MAXWRK = N*N + WRKBL
0276:                   MINWRK = MAX( 3*N+M, BDSPAC )
0277:                ELSE IF( WNTUS .AND. WNTVO ) THEN
0278: *
0279: *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
0280: *
0281:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0282:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
0283:      $                    N, N, -1 ) )
0284:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0285:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0286:                   WRKBL = MAX( WRKBL, 3*N+N*
0287:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0288:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0289:      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0290:                   WRKBL = MAX( WRKBL, BDSPAC )
0291:                   MAXWRK = 2*N*N + WRKBL
0292:                   MINWRK = MAX( 3*N+M, BDSPAC )
0293:                ELSE IF( WNTUS .AND. WNTVAS ) THEN
0294: *
0295: *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
0296: *                 'A')
0297: *
0298:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0299:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
0300:      $                    N, N, -1 ) )
0301:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0302:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0303:                   WRKBL = MAX( WRKBL, 3*N+N*
0304:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0305:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0306:      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0307:                   WRKBL = MAX( WRKBL, BDSPAC )
0308:                   MAXWRK = N*N + WRKBL
0309:                   MINWRK = MAX( 3*N+M, BDSPAC )
0310:                ELSE IF( WNTUA .AND. WNTVN ) THEN
0311: *
0312: *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
0313: *
0314:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0315:                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
0316:      $                    M, N, -1 ) )
0317:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0318:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0319:                   WRKBL = MAX( WRKBL, 3*N+N*
0320:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0321:                   WRKBL = MAX( WRKBL, BDSPAC )
0322:                   MAXWRK = N*N + WRKBL
0323:                   MINWRK = MAX( 3*N+M, BDSPAC )
0324:                ELSE IF( WNTUA .AND. WNTVO ) THEN
0325: *
0326: *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
0327: *
0328:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0329:                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
0330:      $                    M, N, -1 ) )
0331:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0332:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0333:                   WRKBL = MAX( WRKBL, 3*N+N*
0334:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0335:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0336:      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0337:                   WRKBL = MAX( WRKBL, BDSPAC )
0338:                   MAXWRK = 2*N*N + WRKBL
0339:                   MINWRK = MAX( 3*N+M, BDSPAC )
0340:                ELSE IF( WNTUA .AND. WNTVAS ) THEN
0341: *
0342: *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
0343: *                 'A')
0344: *
0345:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0346:                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
0347:      $                    M, N, -1 ) )
0348:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0349:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0350:                   WRKBL = MAX( WRKBL, 3*N+N*
0351:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0352:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0353:      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0354:                   WRKBL = MAX( WRKBL, BDSPAC )
0355:                   MAXWRK = N*N + WRKBL
0356:                   MINWRK = MAX( 3*N+M, BDSPAC )
0357:                END IF
0358:             ELSE
0359: *
0360: *              Path 10 (M at least N, but not much larger)
0361: *
0362:                MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N,
0363:      $                  -1, -1 )
0364:                IF( WNTUS .OR. WNTUO )
0365:      $            MAXWRK = MAX( MAXWRK, 3*N+N*
0366:      $                     ILAENV( 1, 'SORGBR', 'Q', M, N, N, -1 ) )
0367:                IF( WNTUA )
0368:      $            MAXWRK = MAX( MAXWRK, 3*N+M*
0369:      $                     ILAENV( 1, 'SORGBR', 'Q', M, M, N, -1 ) )
0370:                IF( .NOT.WNTVN )
0371:      $            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
0372:      $                     ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0373:                MAXWRK = MAX( MAXWRK, BDSPAC )
0374:                MINWRK = MAX( 3*N+M, BDSPAC )
0375:             END IF
0376:          ELSE IF( MINMN.GT.0 ) THEN
0377: *
0378: *           Compute space needed for SBDSQR
0379: *
0380:             MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
0381:             BDSPAC = 5*M
0382:             IF( N.GE.MNTHR ) THEN
0383:                IF( WNTVN ) THEN
0384: *
0385: *                 Path 1t(N much larger than M, JOBVT='N')
0386: *
0387:                   MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1,
0388:      $                     -1 )
0389:                   MAXWRK = MAX( MAXWRK, 3*M+2*M*
0390:      $                     ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0391:                   IF( WNTUO .OR. WNTUAS )
0392:      $               MAXWRK = MAX( MAXWRK, 3*M+M*
0393:      $                        ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0394:                   MAXWRK = MAX( MAXWRK, BDSPAC )
0395:                   MINWRK = MAX( 4*M, BDSPAC )
0396:                ELSE IF( WNTVO .AND. WNTUN ) THEN
0397: *
0398: *                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
0399: *
0400:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0401:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
0402:      $                    N, M, -1 ) )
0403:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0404:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0405:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0406:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0407:                   WRKBL = MAX( WRKBL, BDSPAC )
0408:                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
0409:                   MINWRK = MAX( 3*M+N, BDSPAC )
0410:                ELSE IF( WNTVO .AND. WNTUAS ) THEN
0411: *
0412: *                 Path 3t(N much larger than M, JOBU='S' or 'A',
0413: *                 JOBVT='O')
0414: *
0415:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0416:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
0417:      $                    N, M, -1 ) )
0418:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0419:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0420:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0421:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0422:                   WRKBL = MAX( WRKBL, 3*M+M*
0423:      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0424:                   WRKBL = MAX( WRKBL, BDSPAC )
0425:                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
0426:                   MINWRK = MAX( 3*M+N, BDSPAC )
0427:                ELSE IF( WNTVS .AND. WNTUN ) THEN
0428: *
0429: *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
0430: *
0431:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0432:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
0433:      $                    N, M, -1 ) )
0434:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0435:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0436:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0437:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0438:                   WRKBL = MAX( WRKBL, BDSPAC )
0439:                   MAXWRK = M*M + WRKBL
0440:                   MINWRK = MAX( 3*M+N, BDSPAC )
0441:                ELSE IF( WNTVS .AND. WNTUO ) THEN
0442: *
0443: *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
0444: *
0445:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0446:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
0447:      $                    N, M, -1 ) )
0448:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0449:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0450:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0451:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0452:                   WRKBL = MAX( WRKBL, 3*M+M*
0453:      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0454:                   WRKBL = MAX( WRKBL, BDSPAC )
0455:                   MAXWRK = 2*M*M + WRKBL
0456:                   MINWRK = MAX( 3*M+N, BDSPAC )
0457:                   MAXWRK = MAX( MAXWRK, MINWRK )
0458:                ELSE IF( WNTVS .AND. WNTUAS ) THEN
0459: *
0460: *                 Path 6t(N much larger than M, JOBU='S' or 'A',
0461: *                 JOBVT='S')
0462: *
0463:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0464:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
0465:      $                    N, M, -1 ) )
0466:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0467:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0468:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0469:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0470:                   WRKBL = MAX( WRKBL, 3*M+M*
0471:      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0472:                   WRKBL = MAX( WRKBL, BDSPAC )
0473:                   MAXWRK = M*M + WRKBL
0474:                   MINWRK = MAX( 3*M+N, BDSPAC )
0475:                ELSE IF( WNTVA .AND. WNTUN ) THEN
0476: *
0477: *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
0478: *
0479:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0480:                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
0481:      $                    N, M, -1 ) )
0482:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0483:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0484:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0485:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0486:                   WRKBL = MAX( WRKBL, BDSPAC )
0487:                   MAXWRK = M*M + WRKBL
0488:                   MINWRK = MAX( 3*M+N, BDSPAC )
0489:                ELSE IF( WNTVA .AND. WNTUO ) THEN
0490: *
0491: *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
0492: *
0493:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0494:                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
0495:      $                    N, M, -1 ) )
0496:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0497:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0498:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0499:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0500:                   WRKBL = MAX( WRKBL, 3*M+M*
0501:      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0502:                   WRKBL = MAX( WRKBL, BDSPAC )
0503:                   MAXWRK = 2*M*M + WRKBL
0504:                   MINWRK = MAX( 3*M+N, BDSPAC )
0505:                ELSE IF( WNTVA .AND. WNTUAS ) THEN
0506: *
0507: *                 Path 9t(N much larger than M, JOBU='S' or 'A',
0508: *                 JOBVT='A')
0509: *
0510:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0511:                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
0512:      $                    N, M, -1 ) )
0513:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0514:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0515:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0516:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0517:                   WRKBL = MAX( WRKBL, 3*M+M*
0518:      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0519:                   WRKBL = MAX( WRKBL, BDSPAC )
0520:                   MAXWRK = M*M + WRKBL
0521:                   MINWRK = MAX( 3*M+N, BDSPAC )
0522:                END IF
0523:             ELSE
0524: *
0525: *              Path 10t(N greater than M, but not much larger)
0526: *
0527:                MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N,
0528:      $                  -1, -1 )
0529:                IF( WNTVS .OR. WNTVO )
0530:      $            MAXWRK = MAX( MAXWRK, 3*M+M*
0531:      $                     ILAENV( 1, 'SORGBR', 'P', M, N, M, -1 ) )
0532:                IF( WNTVA )
0533:      $            MAXWRK = MAX( MAXWRK, 3*M+N*
0534:      $                     ILAENV( 1, 'SORGBR', 'P', N, N, M, -1 ) )
0535:                IF( .NOT.WNTUN )
0536:      $            MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
0537:      $                     ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0538:                MAXWRK = MAX( MAXWRK, BDSPAC )
0539:                MINWRK = MAX( 3*M+N, BDSPAC )
0540:             END IF
0541:          END IF
0542:          MAXWRK = MAX( MAXWRK, MINWRK )
0543:          WORK( 1 ) = MAXWRK
0544: *
0545:          IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
0546:             INFO = -13
0547:          END IF
0548:       END IF
0549: *
0550:       IF( INFO.NE.0 ) THEN
0551:          CALL XERBLA( 'SGESVD', -INFO )
0552:          RETURN
0553:       ELSE IF( LQUERY ) THEN
0554:          RETURN
0555:       END IF
0556: *
0557: *     Quick return if possible
0558: *
0559:       IF( M.EQ.0 .OR. N.EQ.0 ) THEN
0560:          RETURN
0561:       END IF
0562: *
0563: *     Get machine constants
0564: *
0565:       EPS = SLAMCH( 'P' )
0566:       SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
0567:       BIGNUM = ONE / SMLNUM
0568: *
0569: *     Scale A if max element outside range [SMLNUM,BIGNUM]
0570: *
0571:       ANRM = SLANGE( 'M', M, N, A, LDA, DUM )
0572:       ISCL = 0
0573:       IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
0574:          ISCL = 1
0575:          CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
0576:       ELSE IF( ANRM.GT.BIGNUM ) THEN
0577:          ISCL = 1
0578:          CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
0579:       END IF
0580: *
0581:       IF( M.GE.N ) THEN
0582: *
0583: *        A has at least as many rows as columns. If A has sufficiently
0584: *        more rows than columns, first reduce using the QR
0585: *        decomposition (if sufficient workspace available)
0586: *
0587:          IF( M.GE.MNTHR ) THEN
0588: *
0589:             IF( WNTUN ) THEN
0590: *
0591: *              Path 1 (M much larger than N, JOBU='N')
0592: *              No left singular vectors to be computed
0593: *
0594:                ITAU = 1
0595:                IWORK = ITAU + N
0596: *
0597: *              Compute A=Q*R
0598: *              (Workspace: need 2*N, prefer N+N*NB)
0599: *
0600:                CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
0601:      $                      LWORK-IWORK+1, IERR )
0602: *
0603: *              Zero out below R
0604: *
0605:                CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
0606:                IE = 1
0607:                ITAUQ = IE + N
0608:                ITAUP = ITAUQ + N
0609:                IWORK = ITAUP + N
0610: *
0611: *              Bidiagonalize R in A
0612: *              (Workspace: need 4*N, prefer 3*N+2*N*NB)
0613: *
0614:                CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
0615:      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
0616:      $                      IERR )
0617:                NCVT = 0
0618:                IF( WNTVO .OR. WNTVAS ) THEN
0619: *
0620: *                 If right singular vectors desired, generate P'.
0621: *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
0622: *
0623:                   CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
0624:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0625:                   NCVT = N
0626:                END IF
0627:                IWORK = IE + N
0628: *
0629: *              Perform bidiagonal QR iteration, computing right
0630: *              singular vectors of A in A if desired
0631: *              (Workspace: need BDSPAC)
0632: *
0633:                CALL SBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
0634:      $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
0635: *
0636: *              If right singular vectors desired in VT, copy them there
0637: *
0638:                IF( WNTVAS )
0639:      $            CALL SLACPY( 'F', N, N, A, LDA, VT, LDVT )
0640: *
0641:             ELSE IF( WNTUO .AND. WNTVN ) THEN
0642: *
0643: *              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
0644: *              N left singular vectors to be overwritten on A and
0645: *              no right singular vectors to be computed
0646: *
0647:                IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
0648: *
0649: *                 Sufficient workspace for a fast algorithm
0650: *
0651:                   IR = 1
0652:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
0653: *
0654: *                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
0655: *
0656:                      LDWRKU = LDA
0657:                      LDWRKR = LDA
0658:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
0659: *
0660: *                    WORK(IU) is LDA by N, WORK(IR) is N by N
0661: *
0662:                      LDWRKU = LDA
0663:                      LDWRKR = N
0664:                   ELSE
0665: *
0666: *                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
0667: *
0668:                      LDWRKU = ( LWORK-N*N-N ) / N
0669:                      LDWRKR = N
0670:                   END IF
0671:                   ITAU = IR + LDWRKR*N
0672:                   IWORK = ITAU + N
0673: *
0674: *                 Compute A=Q*R
0675: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0676: *
0677:                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
0678:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0679: *
0680: *                 Copy R to WORK(IR) and zero out below it
0681: *
0682:                   CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
0683:                   CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
0684:      $                         LDWRKR )
0685: *
0686: *                 Generate Q in A
0687: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0688: *
0689:                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
0690:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0691:                   IE = ITAU
0692:                   ITAUQ = IE + N
0693:                   ITAUP = ITAUQ + N
0694:                   IWORK = ITAUP + N
0695: *
0696: *                 Bidiagonalize R in WORK(IR)
0697: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
0698: *
0699:                   CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
0700:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0701:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0702: *
0703: *                 Generate left vectors bidiagonalizing R
0704: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
0705: *
0706:                   CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
0707:      $                         WORK( ITAUQ ), WORK( IWORK ),
0708:      $                         LWORK-IWORK+1, IERR )
0709:                   IWORK = IE + N
0710: *
0711: *                 Perform bidiagonal QR iteration, computing left
0712: *                 singular vectors of R in WORK(IR)
0713: *                 (Workspace: need N*N+BDSPAC)
0714: *
0715:                   CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
0716:      $                         WORK( IR ), LDWRKR, DUM, 1,
0717:      $                         WORK( IWORK ), INFO )
0718:                   IU = IE + N
0719: *
0720: *                 Multiply Q in A by left singular vectors of R in
0721: *                 WORK(IR), storing result in WORK(IU) and copying to A
0722: *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
0723: *
0724:                   DO 10 I = 1, M, LDWRKU
0725:                      CHUNK = MIN( M-I+1, LDWRKU )
0726:                      CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
0727:      $                           LDA, WORK( IR ), LDWRKR, ZERO,
0728:      $                           WORK( IU ), LDWRKU )
0729:                      CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
0730:      $                            A( I, 1 ), LDA )
0731:    10             CONTINUE
0732: *
0733:                ELSE
0734: *
0735: *                 Insufficient workspace for a fast algorithm
0736: *
0737:                   IE = 1
0738:                   ITAUQ = IE + N
0739:                   ITAUP = ITAUQ + N
0740:                   IWORK = ITAUP + N
0741: *
0742: *                 Bidiagonalize A
0743: *                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
0744: *
0745:                   CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
0746:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0747:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0748: *
0749: *                 Generate left vectors bidiagonalizing A
0750: *                 (Workspace: need 4*N, prefer 3*N+N*NB)
0751: *
0752:                   CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
0753:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0754:                   IWORK = IE + N
0755: *
0756: *                 Perform bidiagonal QR iteration, computing left
0757: *                 singular vectors of A in A
0758: *                 (Workspace: need BDSPAC)
0759: *
0760:                   CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
0761:      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
0762: *
0763:                END IF
0764: *
0765:             ELSE IF( WNTUO .AND. WNTVAS ) THEN
0766: *
0767: *              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
0768: *              N left singular vectors to be overwritten on A and
0769: *              N right singular vectors to be computed in VT
0770: *
0771:                IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
0772: *
0773: *                 Sufficient workspace for a fast algorithm
0774: *
0775:                   IR = 1
0776:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
0777: *
0778: *                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
0779: *
0780:                      LDWRKU = LDA
0781:                      LDWRKR = LDA
0782:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
0783: *
0784: *                    WORK(IU) is LDA by N and WORK(IR) is N by N
0785: *
0786:                      LDWRKU = LDA
0787:                      LDWRKR = N
0788:                   ELSE
0789: *
0790: *                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
0791: *
0792:                      LDWRKU = ( LWORK-N*N-N ) / N
0793:                      LDWRKR = N
0794:                   END IF
0795:                   ITAU = IR + LDWRKR*N
0796:                   IWORK = ITAU + N
0797: *
0798: *                 Compute A=Q*R
0799: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0800: *
0801:                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
0802:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0803: *
0804: *                 Copy R to VT, zeroing out below it
0805: *
0806:                   CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
0807:                   IF( N.GT.1 )
0808:      $               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
0809:      $                            VT( 2, 1 ), LDVT )
0810: *
0811: *                 Generate Q in A
0812: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0813: *
0814:                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
0815:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0816:                   IE = ITAU
0817:                   ITAUQ = IE + N
0818:                   ITAUP = ITAUQ + N
0819:                   IWORK = ITAUP + N
0820: *
0821: *                 Bidiagonalize R in VT, copying result to WORK(IR)
0822: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
0823: *
0824:                   CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
0825:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0826:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0827:                   CALL SLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
0828: *
0829: *                 Generate left vectors bidiagonalizing R in WORK(IR)
0830: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
0831: *
0832:                   CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
0833:      $                         WORK( ITAUQ ), WORK( IWORK ),
0834:      $                         LWORK-IWORK+1, IERR )
0835: *
0836: *                 Generate right vectors bidiagonalizing R in VT
0837: *                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
0838: *
0839:                   CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
0840:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0841:                   IWORK = IE + N
0842: *
0843: *                 Perform bidiagonal QR iteration, computing left
0844: *                 singular vectors of R in WORK(IR) and computing right
0845: *                 singular vectors of R in VT
0846: *                 (Workspace: need N*N+BDSPAC)
0847: *
0848:                   CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
0849:      $                         WORK( IR ), LDWRKR, DUM, 1,
0850:      $                         WORK( IWORK ), INFO )
0851:                   IU = IE + N
0852: *
0853: *                 Multiply Q in A by left singular vectors of R in
0854: *                 WORK(IR), storing result in WORK(IU) and copying to A
0855: *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
0856: *
0857:                   DO 20 I = 1, M, LDWRKU
0858:                      CHUNK = MIN( M-I+1, LDWRKU )
0859:                      CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
0860:      $                           LDA, WORK( IR ), LDWRKR, ZERO,
0861:      $                           WORK( IU ), LDWRKU )
0862:                      CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
0863:      $                            A( I, 1 ), LDA )
0864:    20             CONTINUE
0865: *
0866:                ELSE
0867: *
0868: *                 Insufficient workspace for a fast algorithm
0869: *
0870:                   ITAU = 1
0871:                   IWORK = ITAU + N
0872: *
0873: *                 Compute A=Q*R
0874: *                 (Workspace: need 2*N, prefer N+N*NB)
0875: *
0876:                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
0877:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0878: *
0879: *                 Copy R to VT, zeroing out below it
0880: *
0881:                   CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
0882:                   IF( N.GT.1 )
0883:      $               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
0884:      $                            VT( 2, 1 ), LDVT )
0885: *
0886: *                 Generate Q in A
0887: *                 (Workspace: need 2*N, prefer N+N*NB)
0888: *
0889:                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
0890:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0891:                   IE = ITAU
0892:                   ITAUQ = IE + N
0893:                   ITAUP = ITAUQ + N
0894:                   IWORK = ITAUP + N
0895: *
0896: *                 Bidiagonalize R in VT
0897: *                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
0898: *
0899:                   CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
0900:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0901:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0902: *
0903: *                 Multiply Q in A by left vectors bidiagonalizing R
0904: *                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
0905: *
0906:                   CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
0907:      $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
0908:      $                         LWORK-IWORK+1, IERR )
0909: *
0910: *                 Generate right vectors bidiagonalizing R in VT
0911: *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
0912: *
0913:                   CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
0914:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0915:                   IWORK = IE + N
0916: *
0917: *                 Perform bidiagonal QR iteration, computing left
0918: *                 singular vectors of A in A and computing right
0919: *                 singular vectors of A in VT
0920: *                 (Workspace: need BDSPAC)
0921: *
0922:                   CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
0923:      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
0924: *
0925:                END IF
0926: *
0927:             ELSE IF( WNTUS ) THEN
0928: *
0929:                IF( WNTVN ) THEN
0930: *
0931: *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
0932: *                 N left singular vectors to be computed in U and
0933: *                 no right singular vectors to be computed
0934: *
0935:                   IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
0936: *
0937: *                    Sufficient workspace for a fast algorithm
0938: *
0939:                      IR = 1
0940:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
0941: *
0942: *                       WORK(IR) is LDA by N
0943: *
0944:                         LDWRKR = LDA
0945:                      ELSE
0946: *
0947: *                       WORK(IR) is N by N
0948: *
0949:                         LDWRKR = N
0950:                      END IF
0951:                      ITAU = IR + LDWRKR*N
0952:                      IWORK = ITAU + N
0953: *
0954: *                    Compute A=Q*R
0955: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0956: *
0957:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
0958:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
0959: *
0960: *                    Copy R to WORK(IR), zeroing out below it
0961: *
0962:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
0963:      $                            LDWRKR )
0964:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
0965:      $                            WORK( IR+1 ), LDWRKR )
0966: *
0967: *                    Generate Q in A
0968: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0969: *
0970:                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
0971:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
0972:                      IE = ITAU
0973:                      ITAUQ = IE + N
0974:                      ITAUP = ITAUQ + N
0975:                      IWORK = ITAUP + N
0976: *
0977: *                    Bidiagonalize R in WORK(IR)
0978: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
0979: *
0980:                      CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
0981:      $                            WORK( IE ), WORK( ITAUQ ),
0982:      $                            WORK( ITAUP ), WORK( IWORK ),
0983:      $                            LWORK-IWORK+1, IERR )
0984: *
0985: *                    Generate left vectors bidiagonalizing R in WORK(IR)
0986: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
0987: *
0988:                      CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
0989:      $                            WORK( ITAUQ ), WORK( IWORK ),
0990:      $                            LWORK-IWORK+1, IERR )
0991:                      IWORK = IE + N
0992: *
0993: *                    Perform bidiagonal QR iteration, computing left
0994: *                    singular vectors of R in WORK(IR)
0995: *                    (Workspace: need N*N+BDSPAC)
0996: *
0997:                      CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
0998:      $                            1, WORK( IR ), LDWRKR, DUM, 1,
0999:      $                            WORK( IWORK ), INFO )
1000: *
1001: *                    Multiply Q in A by left singular vectors of R in
1002: *                    WORK(IR), storing result in U
1003: *                    (Workspace: need N*N)
1004: *
1005:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1006:      $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
1007: *
1008:                   ELSE
1009: *
1010: *                    Insufficient workspace for a fast algorithm
1011: *
1012:                      ITAU = 1
1013:                      IWORK = ITAU + N
1014: *
1015: *                    Compute A=Q*R, copying result to U
1016: *                    (Workspace: need 2*N, prefer N+N*NB)
1017: *
1018:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1019:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1020:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1021: *
1022: *                    Generate Q in U
1023: *                    (Workspace: need 2*N, prefer N+N*NB)
1024: *
1025:                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
1026:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1027:                      IE = ITAU
1028:                      ITAUQ = IE + N
1029:                      ITAUP = ITAUQ + N
1030:                      IWORK = ITAUP + N
1031: *
1032: *                    Zero out below R in A
1033: *
1034:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1035:      $                            LDA )
1036: *
1037: *                    Bidiagonalize R in A
1038: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1039: *
1040:                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1041:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1042:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1043: *
1044: *                    Multiply Q in U by left vectors bidiagonalizing R
1045: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1046: *
1047:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1048:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1049:      $                            LWORK-IWORK+1, IERR )
1050:                      IWORK = IE + N
1051: *
1052: *                    Perform bidiagonal QR iteration, computing left
1053: *                    singular vectors of A in U
1054: *                    (Workspace: need BDSPAC)
1055: *
1056:                      CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1057:      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1058:      $                            INFO )
1059: *
1060:                   END IF
1061: *
1062:                ELSE IF( WNTVO ) THEN
1063: *
1064: *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1065: *                 N left singular vectors to be computed in U and
1066: *                 N right singular vectors to be overwritten on A
1067: *
1068:                   IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1069: *
1070: *                    Sufficient workspace for a fast algorithm
1071: *
1072:                      IU = 1
1073:                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1074: *
1075: *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1076: *
1077:                         LDWRKU = LDA
1078:                         IR = IU + LDWRKU*N
1079:                         LDWRKR = LDA
1080:                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1081: *
1082: *                       WORK(IU) is LDA by N and WORK(IR) is N by N
1083: *
1084:                         LDWRKU = LDA
1085:                         IR = IU + LDWRKU*N
1086:                         LDWRKR = N
1087:                      ELSE
1088: *
1089: *                       WORK(IU) is N by N and WORK(IR) is N by N
1090: *
1091:                         LDWRKU = N
1092:                         IR = IU + LDWRKU*N
1093:                         LDWRKR = N
1094:                      END IF
1095:                      ITAU = IR + LDWRKR*N
1096:                      IWORK = ITAU + N
1097: *
1098: *                    Compute A=Q*R
1099: *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1100: *
1101:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1102:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1103: *
1104: *                    Copy R to WORK(IU), zeroing out below it
1105: *
1106:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1107:      $                            LDWRKU )
1108:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1109:      $                            WORK( IU+1 ), LDWRKU )
1110: *
1111: *                    Generate Q in A
1112: *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1113: *
1114:                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
1115:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1116:                      IE = ITAU
1117:                      ITAUQ = IE + N
1118:                      ITAUP = ITAUQ + N
1119:                      IWORK = ITAUP + N
1120: *
1121: *                    Bidiagonalize R in WORK(IU), copying result to
1122: *                    WORK(IR)
1123: *                    (Workspace: need 2*N*N+4*N,
1124: *                                prefer 2*N*N+3*N+2*N*NB)
1125: *
1126:                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1127:      $                            WORK( IE ), WORK( ITAUQ ),
1128:      $                            WORK( ITAUP ), WORK( IWORK ),
1129:      $                            LWORK-IWORK+1, IERR )
1130:                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1131:      $                            WORK( IR ), LDWRKR )
1132: *
1133: *                    Generate left bidiagonalizing vectors in WORK(IU)
1134: *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1135: *
1136:                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1137:      $                            WORK( ITAUQ ), WORK( IWORK ),
1138:      $                            LWORK-IWORK+1, IERR )
1139: *
1140: *                    Generate right bidiagonalizing vectors in WORK(IR)
1141: *                    (Workspace: need 2*N*N+4*N-1,
1142: *                                prefer 2*N*N+3*N+(N-1)*NB)
1143: *
1144:                      CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1145:      $                            WORK( ITAUP ), WORK( IWORK ),
1146:      $                            LWORK-IWORK+1, IERR )
1147:                      IWORK = IE + N
1148: *
1149: *                    Perform bidiagonal QR iteration, computing left
1150: *                    singular vectors of R in WORK(IU) and computing
1151: *                    right singular vectors of R in WORK(IR)
1152: *                    (Workspace: need 2*N*N+BDSPAC)
1153: *
1154:                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1155:      $                            WORK( IR ), LDWRKR, WORK( IU ),
1156:      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1157: *
1158: *                    Multiply Q in A by left singular vectors of R in
1159: *                    WORK(IU), storing result in U
1160: *                    (Workspace: need N*N)
1161: *
1162:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1163:      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1164: *
1165: *                    Copy right singular vectors of R to A
1166: *                    (Workspace: need N*N)
1167: *
1168:                      CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1169:      $                            LDA )
1170: *
1171:                   ELSE
1172: *
1173: *                    Insufficient workspace for a fast algorithm
1174: *
1175:                      ITAU = 1
1176:                      IWORK = ITAU + N
1177: *
1178: *                    Compute A=Q*R, copying result to U
1179: *                    (Workspace: need 2*N, prefer N+N*NB)
1180: *
1181:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1182:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1183:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1184: *
1185: *                    Generate Q in U
1186: *                    (Workspace: need 2*N, prefer N+N*NB)
1187: *
1188:                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
1189:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1190:                      IE = ITAU
1191:                      ITAUQ = IE + N
1192:                      ITAUP = ITAUQ + N
1193:                      IWORK = ITAUP + N
1194: *
1195: *                    Zero out below R in A
1196: *
1197:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1198:      $                            LDA )
1199: *
1200: *                    Bidiagonalize R in A
1201: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1202: *
1203:                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1204:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1205:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1206: *
1207: *                    Multiply Q in U by left vectors bidiagonalizing R
1208: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1209: *
1210:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1211:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1212:      $                            LWORK-IWORK+1, IERR )
1213: *
1214: *                    Generate right vectors bidiagonalizing R in A
1215: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1216: *
1217:                      CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1218:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1219:                      IWORK = IE + N
1220: *
1221: *                    Perform bidiagonal QR iteration, computing left
1222: *                    singular vectors of A in U and computing right
1223: *                    singular vectors of A in A
1224: *                    (Workspace: need BDSPAC)
1225: *
1226:                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1227:      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1228:      $                            INFO )
1229: *
1230:                   END IF
1231: *
1232:                ELSE IF( WNTVAS ) THEN
1233: *
1234: *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1235: *                         or 'A')
1236: *                 N left singular vectors to be computed in U and
1237: *                 N right singular vectors to be computed in VT
1238: *
1239:                   IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1240: *
1241: *                    Sufficient workspace for a fast algorithm
1242: *
1243:                      IU = 1
1244:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1245: *
1246: *                       WORK(IU) is LDA by N
1247: *
1248:                         LDWRKU = LDA
1249:                      ELSE
1250: *
1251: *                       WORK(IU) is N by N
1252: *
1253:                         LDWRKU = N
1254:                      END IF
1255:                      ITAU = IU + LDWRKU*N
1256:                      IWORK = ITAU + N
1257: *
1258: *                    Compute A=Q*R
1259: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1260: *
1261:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1262:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1263: *
1264: *                    Copy R to WORK(IU), zeroing out below it
1265: *
1266:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1267:      $                            LDWRKU )
1268:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1269:      $                            WORK( IU+1 ), LDWRKU )
1270: *
1271: *                    Generate Q in A
1272: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1273: *
1274:                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
1275:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1276:                      IE = ITAU
1277:                      ITAUQ = IE + N
1278:                      ITAUP = ITAUQ + N
1279:                      IWORK = ITAUP + N
1280: *
1281: *                    Bidiagonalize R in WORK(IU), copying result to VT
1282: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1283: *
1284:                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1285:      $                            WORK( IE ), WORK( ITAUQ ),
1286:      $                            WORK( ITAUP ), WORK( IWORK ),
1287:      $                            LWORK-IWORK+1, IERR )
1288:                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1289:      $                            LDVT )
1290: *
1291: *                    Generate left bidiagonalizing vectors in WORK(IU)
1292: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1293: *
1294:                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1295:      $                            WORK( ITAUQ ), WORK( IWORK ),
1296:      $                            LWORK-IWORK+1, IERR )
1297: *
1298: *                    Generate right bidiagonalizing vectors in VT
1299: *                    (Workspace: need N*N+4*N-1,
1300: *                                prefer N*N+3*N+(N-1)*NB)
1301: *
1302:                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1303:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1304:                      IWORK = IE + N
1305: *
1306: *                    Perform bidiagonal QR iteration, computing left
1307: *                    singular vectors of R in WORK(IU) and computing
1308: *                    right singular vectors of R in VT
1309: *                    (Workspace: need N*N+BDSPAC)
1310: *
1311:                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1312:      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1313:      $                            WORK( IWORK ), INFO )
1314: *
1315: *                    Multiply Q in A by left singular vectors of R in
1316: *                    WORK(IU), storing result in U
1317: *                    (Workspace: need N*N)
1318: *
1319:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1320:      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1321: *
1322:                   ELSE
1323: *
1324: *                    Insufficient workspace for a fast algorithm
1325: *
1326:                      ITAU = 1
1327:                      IWORK = ITAU + N
1328: *
1329: *                    Compute A=Q*R, copying result to U
1330: *                    (Workspace: need 2*N, prefer N+N*NB)
1331: *
1332:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1333:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1334:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1335: *
1336: *                    Generate Q in U
1337: *                    (Workspace: need 2*N, prefer N+N*NB)
1338: *
1339:                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
1340:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1341: *
1342: *                    Copy R to VT, zeroing out below it
1343: *
1344:                      CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
1345:                      IF( N.GT.1 )
1346:      $                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1347:      $                               VT( 2, 1 ), LDVT )
1348:                      IE = ITAU
1349:                      ITAUQ = IE + N
1350:                      ITAUP = ITAUQ + N
1351:                      IWORK = ITAUP + N
1352: *
1353: *                    Bidiagonalize R in VT
1354: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1355: *
1356:                      CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1357:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1358:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1359: *
1360: *                    Multiply Q in U by left bidiagonalizing vectors
1361: *                    in VT
1362: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1363: *
1364:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1365:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1366:      $                            LWORK-IWORK+1, IERR )
1367: *
1368: *                    Generate right bidiagonalizing vectors in VT
1369: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1370: *
1371:                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1372:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1373:                      IWORK = IE + N
1374: *
1375: *                    Perform bidiagonal QR iteration, computing left
1376: *                    singular vectors of A in U and computing right
1377: *                    singular vectors of A in VT
1378: *                    (Workspace: need BDSPAC)
1379: *
1380:                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1381:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1382:      $                            INFO )
1383: *
1384:                   END IF
1385: *
1386:                END IF
1387: *
1388:             ELSE IF( WNTUA ) THEN
1389: *
1390:                IF( WNTVN ) THEN
1391: *
1392: *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1393: *                 M left singular vectors to be computed in U and
1394: *                 no right singular vectors to be computed
1395: *
1396:                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1397: *
1398: *                    Sufficient workspace for a fast algorithm
1399: *
1400:                      IR = 1
1401:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1402: *
1403: *                       WORK(IR) is LDA by N
1404: *
1405:                         LDWRKR = LDA
1406:                      ELSE
1407: *
1408: *                       WORK(IR) is N by N
1409: *
1410:                         LDWRKR = N
1411:                      END IF
1412:                      ITAU = IR + LDWRKR*N
1413:                      IWORK = ITAU + N
1414: *
1415: *                    Compute A=Q*R, copying result to U
1416: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1417: *
1418:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1419:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1420:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1421: *
1422: *                    Copy R to WORK(IR), zeroing out below it
1423: *
1424:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
1425:      $                            LDWRKR )
1426:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1427:      $                            WORK( IR+1 ), LDWRKR )
1428: *
1429: *                    Generate Q in U
1430: *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1431: *
1432:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1433:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1434:                      IE = ITAU
1435:                      ITAUQ = IE + N
1436:                      ITAUP = ITAUQ + N
1437:                      IWORK = ITAUP + N
1438: *
1439: *                    Bidiagonalize R in WORK(IR)
1440: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1441: *
1442:                      CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
1443:      $                            WORK( IE ), WORK( ITAUQ ),
1444:      $                            WORK( ITAUP ), WORK( IWORK ),
1445:      $                            LWORK-IWORK+1, IERR )
1446: *
1447: *                    Generate left bidiagonalizing vectors in WORK(IR)
1448: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1449: *
1450:                      CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1451:      $                            WORK( ITAUQ ), WORK( IWORK ),
1452:      $                            LWORK-IWORK+1, IERR )
1453:                      IWORK = IE + N
1454: *
1455: *                    Perform bidiagonal QR iteration, computing left
1456: *                    singular vectors of R in WORK(IR)
1457: *                    (Workspace: need N*N+BDSPAC)
1458: *
1459:                      CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1460:      $                            1, WORK( IR ), LDWRKR, DUM, 1,
1461:      $                            WORK( IWORK ), INFO )
1462: *
1463: *                    Multiply Q in U by left singular vectors of R in
1464: *                    WORK(IR), storing result in A
1465: *                    (Workspace: need N*N)
1466: *
1467:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1468:      $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
1469: *
1470: *                    Copy left singular vectors of A from A to U
1471: *
1472:                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
1473: *
1474:                   ELSE
1475: *
1476: *                    Insufficient workspace for a fast algorithm
1477: *
1478:                      ITAU = 1
1479:                      IWORK = ITAU + N
1480: *
1481: *                    Compute A=Q*R, copying result to U
1482: *                    (Workspace: need 2*N, prefer N+N*NB)
1483: *
1484:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1485:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1486:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1487: *
1488: *                    Generate Q in U
1489: *                    (Workspace: need N+M, prefer N+M*NB)
1490: *
1491:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1492:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1493:                      IE = ITAU
1494:                      ITAUQ = IE + N
1495:                      ITAUP = ITAUQ + N
1496:                      IWORK = ITAUP + N
1497: *
1498: *                    Zero out below R in A
1499: *
1500:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1501:      $                            LDA )
1502: *
1503: *                    Bidiagonalize R in A
1504: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1505: *
1506:                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1507:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1508:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1509: *
1510: *                    Multiply Q in U by left bidiagonalizing vectors
1511: *                    in A
1512: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1513: *
1514:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1515:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1516:      $                            LWORK-IWORK+1, IERR )
1517:                      IWORK = IE + N
1518: *
1519: *                    Perform bidiagonal QR iteration, computing left
1520: *                    singular vectors of A in U
1521: *                    (Workspace: need BDSPAC)
1522: *
1523:                      CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1524:      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1525:      $                            INFO )
1526: *
1527:                   END IF
1528: *
1529:                ELSE IF( WNTVO ) THEN
1530: *
1531: *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1532: *                 M left singular vectors to be computed in U and
1533: *                 N right singular vectors to be overwritten on A
1534: *
1535:                   IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1536: *
1537: *                    Sufficient workspace for a fast algorithm
1538: *
1539:                      IU = 1
1540:                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1541: *
1542: *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1543: *
1544:                         LDWRKU = LDA
1545:                         IR = IU + LDWRKU*N
1546:                         LDWRKR = LDA
1547:                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1548: *
1549: *                       WORK(IU) is LDA by N and WORK(IR) is N by N
1550: *
1551:                         LDWRKU = LDA
1552:                         IR = IU + LDWRKU*N
1553:                         LDWRKR = N
1554:                      ELSE
1555: *
1556: *                       WORK(IU) is N by N and WORK(IR) is N by N
1557: *
1558:                         LDWRKU = N
1559:                         IR = IU + LDWRKU*N
1560:                         LDWRKR = N
1561:                      END IF
1562:                      ITAU = IR + LDWRKR*N
1563:                      IWORK = ITAU + N
1564: *
1565: *                    Compute A=Q*R, copying result to U
1566: *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1567: *
1568:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1569:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1570:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1571: *
1572: *                    Generate Q in U
1573: *                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1574: *
1575:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1576:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1577: *
1578: *                    Copy R to WORK(IU), zeroing out below it
1579: *
1580:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1581:      $                            LDWRKU )
1582:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1583:      $                            WORK( IU+1 ), LDWRKU )
1584:                      IE = ITAU
1585:                      ITAUQ = IE + N
1586:                      ITAUP = ITAUQ + N
1587:                      IWORK = ITAUP + N
1588: *
1589: *                    Bidiagonalize R in WORK(IU), copying result to
1590: *                    WORK(IR)
1591: *                    (Workspace: need 2*N*N+4*N,
1592: *                                prefer 2*N*N+3*N+2*N*NB)
1593: *
1594:                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1595:      $                            WORK( IE ), WORK( ITAUQ ),
1596:      $                            WORK( ITAUP ), WORK( IWORK ),
1597:      $                            LWORK-IWORK+1, IERR )
1598:                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1599:      $                            WORK( IR ), LDWRKR )
1600: *
1601: *                    Generate left bidiagonalizing vectors in WORK(IU)
1602: *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1603: *
1604:                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1605:      $                            WORK( ITAUQ ), WORK( IWORK ),
1606:      $                            LWORK-IWORK+1, IERR )
1607: *
1608: *                    Generate right bidiagonalizing vectors in WORK(IR)
1609: *                    (Workspace: need 2*N*N+4*N-1,
1610: *                                prefer 2*N*N+3*N+(N-1)*NB)
1611: *
1612:                      CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1613:      $                            WORK( ITAUP ), WORK( IWORK ),
1614:      $                            LWORK-IWORK+1, IERR )
1615:                      IWORK = IE + N
1616: *
1617: *                    Perform bidiagonal QR iteration, computing left
1618: *                    singular vectors of R in WORK(IU) and computing
1619: *                    right singular vectors of R in WORK(IR)
1620: *                    (Workspace: need 2*N*N+BDSPAC)
1621: *
1622:                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1623:      $                            WORK( IR ), LDWRKR, WORK( IU ),
1624:      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1625: *
1626: *                    Multiply Q in U by left singular vectors of R in
1627: *                    WORK(IU), storing result in A
1628: *                    (Workspace: need N*N)
1629: *
1630:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1631:      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1632: *
1633: *                    Copy left singular vectors of A from A to U
1634: *
1635:                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
1636: *
1637: *                    Copy right singular vectors of R from WORK(IR) to A
1638: *
1639:                      CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1640:      $                            LDA )
1641: *
1642:                   ELSE
1643: *
1644: *                    Insufficient workspace for a fast algorithm
1645: *
1646:                      ITAU = 1
1647:                      IWORK = ITAU + N
1648: *
1649: *                    Compute A=Q*R, copying result to U
1650: *                    (Workspace: need 2*N, prefer N+N*NB)
1651: *
1652:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1653:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1654:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1655: *
1656: *                    Generate Q in U
1657: *                    (Workspace: need N+M, prefer N+M*NB)
1658: *
1659:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1660:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1661:                      IE = ITAU
1662:                      ITAUQ = IE + N
1663:                      ITAUP = ITAUQ + N
1664:                      IWORK = ITAUP + N
1665: *
1666: *                    Zero out below R in A
1667: *
1668:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1669:      $                            LDA )
1670: *
1671: *                    Bidiagonalize R in A
1672: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1673: *
1674:                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1675:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1676:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1677: *
1678: *                    Multiply Q in U by left bidiagonalizing vectors
1679: *                    in A
1680: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1681: *
1682:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1683:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1684:      $                            LWORK-IWORK+1, IERR )
1685: *
1686: *                    Generate right bidiagonalizing vectors in A
1687: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1688: *
1689:                      CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1690:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1691:                      IWORK = IE + N
1692: *
1693: *                    Perform bidiagonal QR iteration, computing left
1694: *                    singular vectors of A in U and computing right
1695: *                    singular vectors of A in A
1696: *                    (Workspace: need BDSPAC)
1697: *
1698:                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1699:      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1700:      $                            INFO )
1701: *
1702:                   END IF
1703: *
1704:                ELSE IF( WNTVAS ) THEN
1705: *
1706: *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1707: *                         or 'A')
1708: *                 M left singular vectors to be computed in U and
1709: *                 N right singular vectors to be computed in VT
1710: *
1711:                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1712: *
1713: *                    Sufficient workspace for a fast algorithm
1714: *
1715:                      IU = 1
1716:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1717: *
1718: *                       WORK(IU) is LDA by N
1719: *
1720:                         LDWRKU = LDA
1721:                      ELSE
1722: *
1723: *                       WORK(IU) is N by N
1724: *
1725:                         LDWRKU = N
1726:                      END IF
1727:                      ITAU = IU + LDWRKU*N
1728:                      IWORK = ITAU + N
1729: *
1730: *                    Compute A=Q*R, copying result to U
1731: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1732: *
1733:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1734:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1735:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1736: *
1737: *                    Generate Q in U
1738: *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1739: *
1740:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1741:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1742: *
1743: *                    Copy R to WORK(IU), zeroing out below it
1744: *
1745:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1746:      $                            LDWRKU )
1747:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1748:      $                            WORK( IU+1 ), LDWRKU )
1749:                      IE = ITAU
1750:                      ITAUQ = IE + N
1751:                      ITAUP = ITAUQ + N
1752:                      IWORK = ITAUP + N
1753: *
1754: *                    Bidiagonalize R in WORK(IU), copying result to VT
1755: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1756: *
1757:                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1758:      $                            WORK( IE ), WORK( ITAUQ ),
1759:      $                            WORK( ITAUP ), WORK( IWORK ),
1760:      $                            LWORK-IWORK+1, IERR )
1761:                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1762:      $                            LDVT )
1763: *
1764: *                    Generate left bidiagonalizing vectors in WORK(IU)
1765: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1766: *
1767:                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1768:      $                            WORK( ITAUQ ), WORK( IWORK ),
1769:      $                            LWORK-IWORK+1, IERR )
1770: *
1771: *                    Generate right bidiagonalizing vectors in VT
1772: *                    (Workspace: need N*N+4*N-1,
1773: *                                prefer N*N+3*N+(N-1)*NB)
1774: *
1775:                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1776:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1777:                      IWORK = IE + N
1778: *
1779: *                    Perform bidiagonal QR iteration, computing left
1780: *                    singular vectors of R in WORK(IU) and computing
1781: *                    right singular vectors of R in VT
1782: *                    (Workspace: need N*N+BDSPAC)
1783: *
1784:                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1785:      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1786:      $                            WORK( IWORK ), INFO )
1787: *
1788: *                    Multiply Q in U by left singular vectors of R in
1789: *                    WORK(IU), storing result in A
1790: *                    (Workspace: need N*N)
1791: *
1792:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1793:      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1794: *
1795: *                    Copy left singular vectors of A from A to U
1796: *
1797:                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
1798: *
1799:                   ELSE
1800: *
1801: *                    Insufficient workspace for a fast algorithm
1802: *
1803:                      ITAU = 1
1804:                      IWORK = ITAU + N
1805: *
1806: *                    Compute A=Q*R, copying result to U
1807: *                    (Workspace: need 2*N, prefer N+N*NB)
1808: *
1809:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1810:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1811:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1812: *
1813: *                    Generate Q in U
1814: *                    (Workspace: need N+M, prefer N+M*NB)
1815: *
1816:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1817:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1818: *
1819: *                    Copy R from A to VT, zeroing out below it
1820: *
1821:                      CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
1822:                      IF( N.GT.1 )
1823:      $                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1824:      $                               VT( 2, 1 ), LDVT )
1825:                      IE = ITAU
1826:                      ITAUQ = IE + N
1827:                      ITAUP = ITAUQ + N
1828:                      IWORK = ITAUP + N
1829: *
1830: *                    Bidiagonalize R in VT
1831: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1832: *
1833:                      CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1834:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1835:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1836: *
1837: *                    Multiply Q in U by left bidiagonalizing vectors
1838: *                    in VT
1839: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1840: *
1841:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1842:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1843:      $                            LWORK-IWORK+1, IERR )
1844: *
1845: *                    Generate right bidiagonalizing vectors in VT
1846: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1847: *
1848:                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1849:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1850:                      IWORK = IE + N
1851: *
1852: *                    Perform bidiagonal QR iteration, computing left
1853: *                    singular vectors of A in U and computing right
1854: *                    singular vectors of A in VT
1855: *                    (Workspace: need BDSPAC)
1856: *
1857:                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1858:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1859:      $                            INFO )
1860: *
1861:                   END IF
1862: *
1863:                END IF
1864: *
1865:             END IF
1866: *
1867:          ELSE
1868: *
1869: *           M .LT. MNTHR
1870: *
1871: *           Path 10 (M at least N, but not much larger)
1872: *           Reduce to bidiagonal form without QR decomposition
1873: *
1874:             IE = 1
1875:             ITAUQ = IE + N
1876:             ITAUP = ITAUQ + N
1877:             IWORK = ITAUP + N
1878: *
1879: *           Bidiagonalize A
1880: *           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1881: *
1882:             CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1883:      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1884:      $                   IERR )
1885:             IF( WNTUAS ) THEN
1886: *
1887: *              If left singular vectors desired in U, copy result to U
1888: *              and generate left bidiagonalizing vectors in U
1889: *              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1890: *
1891:                CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1892:                IF( WNTUS )
1893:      $            NCU = N
1894:                IF( WNTUA )
1895:      $            NCU = M
1896:                CALL SORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1897:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1898:             END IF
1899:             IF( WNTVAS ) THEN
1900: *
1901: *              If right singular vectors desired in VT, copy result to
1902: *              VT and generate right bidiagonalizing vectors in VT
1903: *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1904: *
1905:                CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
1906:                CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1907:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1908:             END IF
1909:             IF( WNTUO ) THEN
1910: *
1911: *              If left singular vectors desired in A, generate left
1912: *              bidiagonalizing vectors in A
1913: *              (Workspace: need 4*N, prefer 3*N+N*NB)
1914: *
1915:                CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
1916:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1917:             END IF
1918:             IF( WNTVO ) THEN
1919: *
1920: *              If right singular vectors desired in A, generate right
1921: *              bidiagonalizing vectors in A
1922: *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1923: *
1924:                CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1925:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1926:             END IF
1927:             IWORK = IE + N
1928:             IF( WNTUAS .OR. WNTUO )
1929:      $         NRU = M
1930:             IF( WNTUN )
1931:      $         NRU = 0
1932:             IF( WNTVAS .OR. WNTVO )
1933:      $         NCVT = N
1934:             IF( WNTVN )
1935:      $         NCVT = 0
1936:             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
1937: *
1938: *              Perform bidiagonal QR iteration, if desired, computing
1939: *              left singular vectors in U and computing right singular
1940: *              vectors in VT
1941: *              (Workspace: need BDSPAC)
1942: *
1943:                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1944:      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
1945:             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
1946: *
1947: *              Perform bidiagonal QR iteration, if desired, computing
1948: *              left singular vectors in U and computing right singular
1949: *              vectors in A
1950: *              (Workspace: need BDSPAC)
1951: *
1952:                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
1953:      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
1954:             ELSE
1955: *
1956: *              Perform bidiagonal QR iteration, if desired, computing
1957: *              left singular vectors in A and computing right singular
1958: *              vectors in VT
1959: *              (Workspace: need BDSPAC)
1960: *
1961:                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1962:      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
1963:             END IF
1964: *
1965:          END IF
1966: *
1967:       ELSE
1968: *
1969: *        A has more columns than rows. If A has sufficiently more
1970: *        columns than rows, first reduce using the LQ decomposition (if
1971: *        sufficient workspace available)
1972: *
1973:          IF( N.GE.MNTHR ) THEN
1974: *
1975:             IF( WNTVN ) THEN
1976: *
1977: *              Path 1t(N much larger than M, JOBVT='N')
1978: *              No right singular vectors to be computed
1979: *
1980:                ITAU = 1
1981:                IWORK = ITAU + M
1982: *
1983: *              Compute A=L*Q
1984: *              (Workspace: need 2*M, prefer M+M*NB)
1985: *
1986:                CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
1987:      $                      LWORK-IWORK+1, IERR )
1988: *
1989: *              Zero out above L
1990: *
1991:                CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
1992:                IE = 1
1993:                ITAUQ = IE + M
1994:                ITAUP = ITAUQ + M
1995:                IWORK = ITAUP + M
1996: *
1997: *              Bidiagonalize L in A
1998: *              (Workspace: need 4*M, prefer 3*M+2*M*NB)
1999: *
2000:                CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2001:      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2002:      $                      IERR )
2003:                IF( WNTUO .OR. WNTUAS ) THEN
2004: *
2005: *                 If left singular vectors desired, generate Q
2006: *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2007: *
2008:                   CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2009:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2010:                END IF
2011:                IWORK = IE + M
2012:                NRU = 0
2013:                IF( WNTUO .OR. WNTUAS )
2014:      $            NRU = M
2015: *
2016: *              Perform bidiagonal QR iteration, computing left singular
2017: *              vectors of A in A if desired
2018: *              (Workspace: need BDSPAC)
2019: *
2020:                CALL SBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2021:      $                      LDA, DUM, 1, WORK( IWORK ), INFO )
2022: *
2023: *              If left singular vectors desired in U, copy them there
2024: *
2025:                IF( WNTUAS )
2026:      $            CALL SLACPY( 'F', M, M, A, LDA, U, LDU )
2027: *
2028:             ELSE IF( WNTVO .AND. WNTUN ) THEN
2029: *
2030: *              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2031: *              M right singular vectors to be overwritten on A and
2032: *              no left singular vectors to be computed
2033: *
2034:                IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2035: *
2036: *                 Sufficient workspace for a fast algorithm
2037: *
2038:                   IR = 1
2039:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2040: *
2041: *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2042: *
2043:                      LDWRKU = LDA
2044:                      CHUNK = N
2045:                      LDWRKR = LDA
2046:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2047: *
2048: *                    WORK(IU) is LDA by N and WORK(IR) is M by M
2049: *
2050:                      LDWRKU = LDA
2051:                      CHUNK = N
2052:                      LDWRKR = M
2053:                   ELSE
2054: *
2055: *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2056: *
2057:                      LDWRKU = M
2058:                      CHUNK = ( LWORK-M*M-M ) / M
2059:                      LDWRKR = M
2060:                   END IF
2061:                   ITAU = IR + LDWRKR*M
2062:                   IWORK = ITAU + M
2063: *
2064: *                 Compute A=L*Q
2065: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2066: *
2067:                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2068:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2069: *
2070: *                 Copy L to WORK(IR) and zero out above it
2071: *
2072:                   CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2073:                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2074:      $                         WORK( IR+LDWRKR ), LDWRKR )
2075: *
2076: *                 Generate Q in A
2077: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2078: *
2079:                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2080:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2081:                   IE = ITAU
2082:                   ITAUQ = IE + M
2083:                   ITAUP = ITAUQ + M
2084:                   IWORK = ITAUP + M
2085: *
2086: *                 Bidiagonalize L in WORK(IR)
2087: *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2088: *
2089:                   CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2090:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2091:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2092: *
2093: *                 Generate right vectors bidiagonalizing L
2094: *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2095: *
2096:                   CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2097:      $                         WORK( ITAUP ), WORK( IWORK ),
2098:      $                         LWORK-IWORK+1, IERR )
2099:                   IWORK = IE + M
2100: *
2101: *                 Perform bidiagonal QR iteration, computing right
2102: *                 singular vectors of L in WORK(IR)
2103: *                 (Workspace: need M*M+BDSPAC)
2104: *
2105:                   CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2106:      $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2107:      $                         WORK( IWORK ), INFO )
2108:                   IU = IE + M
2109: *
2110: *                 Multiply right singular vectors of L in WORK(IR) by Q
2111: *                 in A, storing result in WORK(IU) and copying to A
2112: *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2113: *
2114:                   DO 30 I = 1, N, CHUNK
2115:                      BLK = MIN( N-I+1, CHUNK )
2116:                      CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2117:      $                           LDWRKR, A( 1, I ), LDA, ZERO,
2118:      $                           WORK( IU ), LDWRKU )
2119:                      CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2120:      $                            A( 1, I ), LDA )
2121:    30             CONTINUE
2122: *
2123:                ELSE
2124: *
2125: *                 Insufficient workspace for a fast algorithm
2126: *
2127:                   IE = 1
2128:                   ITAUQ = IE + M
2129:                   ITAUP = ITAUQ + M
2130:                   IWORK = ITAUP + M
2131: *
2132: *                 Bidiagonalize A
2133: *                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2134: *
2135:                   CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
2136:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2137:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2138: *
2139: *                 Generate right vectors bidiagonalizing A
2140: *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2141: *
2142:                   CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2143:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2144:                   IWORK = IE + M
2145: *
2146: *                 Perform bidiagonal QR iteration, computing right
2147: *                 singular vectors of A in A
2148: *                 (Workspace: need BDSPAC)
2149: *
2150:                   CALL SBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2151:      $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2152: *
2153:                END IF
2154: *
2155:             ELSE IF( WNTVO .AND. WNTUAS ) THEN
2156: *
2157: *              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2158: *              M right singular vectors to be overwritten on A and
2159: *              M left singular vectors to be computed in U
2160: *
2161:                IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2162: *
2163: *                 Sufficient workspace for a fast algorithm
2164: *
2165:                   IR = 1
2166:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2167: *
2168: *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2169: *
2170:                      LDWRKU = LDA
2171:                      CHUNK = N
2172:                      LDWRKR = LDA
2173:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2174: *
2175: *                    WORK(IU) is LDA by N and WORK(IR) is M by M
2176: *
2177:                      LDWRKU = LDA
2178:                      CHUNK = N
2179:                      LDWRKR = M
2180:                   ELSE
2181: *
2182: *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2183: *
2184:                      LDWRKU = M
2185:                      CHUNK = ( LWORK-M*M-M ) / M
2186:                      LDWRKR = M
2187:                   END IF
2188:                   ITAU = IR + LDWRKR*M
2189:                   IWORK = ITAU + M
2190: *
2191: *                 Compute A=L*Q
2192: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2193: *
2194:                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2195:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2196: *
2197: *                 Copy L to U, zeroing about above it
2198: *
2199:                   CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
2200:                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2201:      $                         LDU )
2202: *
2203: *                 Generate Q in A
2204: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2205: *
2206:                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2207:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2208:                   IE = ITAU
2209:                   ITAUQ = IE + M
2210:                   ITAUP = ITAUQ + M
2211:                   IWORK = ITAUP + M
2212: *
2213: *                 Bidiagonalize L in U, copying result to WORK(IR)
2214: *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2215: *
2216:                   CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
2217:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2218:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2219:                   CALL SLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2220: *
2221: *                 Generate right vectors bidiagonalizing L in WORK(IR)
2222: *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2223: *
2224:                   CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2225:      $                         WORK( ITAUP ), WORK( IWORK ),
2226:      $                         LWORK-IWORK+1, IERR )
2227: *
2228: *                 Generate left vectors bidiagonalizing L in U
2229: *                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2230: *
2231:                   CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2232:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2233:                   IWORK = IE + M
2234: *
2235: *                 Perform bidiagonal QR iteration, computing left
2236: *                 singular vectors of L in U, and computing right
2237: *                 singular vectors of L in WORK(IR)
2238: *                 (Workspace: need M*M+BDSPAC)
2239: *
2240:                   CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2241:      $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2242:      $                         WORK( IWORK ), INFO )
2243:                   IU = IE + M
2244: *
2245: *                 Multiply right singular vectors of L in WORK(IR) by Q
2246: *                 in A, storing result in WORK(IU) and copying to A
2247: *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2248: *
2249:                   DO 40 I = 1, N, CHUNK
2250:                      BLK = MIN( N-I+1, CHUNK )
2251:                      CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2252:      $                           LDWRKR, A( 1, I ), LDA, ZERO,
2253:      $                           WORK( IU ), LDWRKU )
2254:                      CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2255:      $                            A( 1, I ), LDA )
2256:    40             CONTINUE
2257: *
2258:                ELSE
2259: *
2260: *                 Insufficient workspace for a fast algorithm
2261: *
2262:                   ITAU = 1
2263:                   IWORK = ITAU + M
2264: *
2265: *                 Compute A=L*Q
2266: *                 (Workspace: need 2*M, prefer M+M*NB)
2267: *
2268:                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2269:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2270: *
2271: *                 Copy L to U, zeroing out above it
2272: *
2273:                   CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
2274:                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2275:      $                         LDU )
2276: *
2277: *                 Generate Q in A
2278: *                 (Workspace: need 2*M, prefer M+M*NB)
2279: *
2280:                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2281:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2282:                   IE = ITAU
2283:                   ITAUQ = IE + M
2284:                   ITAUP = ITAUQ + M
2285:                   IWORK = ITAUP + M
2286: *
2287: *                 Bidiagonalize L in U
2288: *                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
2289: *
2290:                   CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
2291:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2292:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2293: *
2294: *                 Multiply right vectors bidiagonalizing L by Q in A
2295: *                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
2296: *
2297:                   CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2298:      $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
2299:      $                         LWORK-IWORK+1, IERR )
2300: *
2301: *                 Generate left vectors bidiagonalizing L in U
2302: *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2303: *
2304:                   CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2305:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2306:                   IWORK = IE + M
2307: *
2308: *                 Perform bidiagonal QR iteration, computing left
2309: *                 singular vectors of A in U and computing right
2310: *                 singular vectors of A in A
2311: *                 (Workspace: need BDSPAC)
2312: *
2313:                   CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2314:      $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
2315: *
2316:                END IF
2317: *
2318:             ELSE IF( WNTVS ) THEN
2319: *
2320:                IF( WNTUN ) THEN
2321: *
2322: *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2323: *                 M right singular vectors to be computed in VT and
2324: *                 no left singular vectors to be computed
2325: *
2326:                   IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2327: *
2328: *                    Sufficient workspace for a fast algorithm
2329: *
2330:                      IR = 1
2331:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2332: *
2333: *                       WORK(IR) is LDA by M
2334: *
2335:                         LDWRKR = LDA
2336:                      ELSE
2337: *
2338: *                       WORK(IR) is M by M
2339: *
2340:                         LDWRKR = M
2341:                      END IF
2342:                      ITAU = IR + LDWRKR*M
2343:                      IWORK = ITAU + M
2344: *
2345: *                    Compute A=L*Q
2346: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2347: *
2348:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2349:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2350: *
2351: *                    Copy L to WORK(IR), zeroing out above it
2352: *
2353:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
2354:      $                            LDWRKR )
2355:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2356:      $                            WORK( IR+LDWRKR ), LDWRKR )
2357: *
2358: *                    Generate Q in A
2359: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2360: *
2361:                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2362:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2363:                      IE = ITAU
2364:                      ITAUQ = IE + M
2365:                      ITAUP = ITAUQ + M
2366:                      IWORK = ITAUP + M
2367: *
2368: *                    Bidiagonalize L in WORK(IR)
2369: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2370: *
2371:                      CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
2372:      $                            WORK( IE ), WORK( ITAUQ ),
2373:      $                            WORK( ITAUP ), WORK( IWORK ),
2374:      $                            LWORK-IWORK+1, IERR )
2375: *
2376: *                    Generate right vectors bidiagonalizing L in
2377: *                    WORK(IR)
2378: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2379: *
2380:                      CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2381:      $                            WORK( ITAUP ), WORK( IWORK ),
2382:      $                            LWORK-IWORK+1, IERR )
2383:                      IWORK = IE + M
2384: *
2385: *                    Perform bidiagonal QR iteration, computing right
2386: *                    singular vectors of L in WORK(IR)
2387: *                    (Workspace: need M*M+BDSPAC)
2388: *
2389:                      CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2390:      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2391:      $                            WORK( IWORK ), INFO )
2392: *
2393: *                    Multiply right singular vectors of L in WORK(IR) by
2394: *                    Q in A, storing result in VT
2395: *                    (Workspace: need M*M)
2396: *
2397:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2398:      $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
2399: *
2400:                   ELSE
2401: *
2402: *                    Insufficient workspace for a fast algorithm
2403: *
2404:                      ITAU = 1
2405:                      IWORK = ITAU + M
2406: *
2407: *                    Compute A=L*Q
2408: *                    (Workspace: need 2*M, prefer M+M*NB)
2409: *
2410:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2411:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2412: *
2413: *                    Copy result to VT
2414: *
2415:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2416: *
2417: *                    Generate Q in VT
2418: *                    (Workspace: need 2*M, prefer M+M*NB)
2419: *
2420:                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2421:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2422:                      IE = ITAU
2423:                      ITAUQ = IE + M
2424:                      ITAUP = ITAUQ + M
2425:                      IWORK = ITAUP + M
2426: *
2427: *                    Zero out above L in A
2428: *
2429:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2430:      $                            LDA )
2431: *
2432: *                    Bidiagonalize L in A
2433: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2434: *
2435:                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
2436:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2437:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2438: *
2439: *                    Multiply right vectors bidiagonalizing L by Q in VT
2440: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2441: *
2442:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2443:      $                            WORK( ITAUP ), VT, LDVT,
2444:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2445:                      IWORK = IE + M
2446: *
2447: *                    Perform bidiagonal QR iteration, computing right
2448: *                    singular vectors of A in VT
2449: *                    (Workspace: need BDSPAC)
2450: *
2451:                      CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2452:      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2453:      $                            INFO )
2454: *
2455:                   END IF
2456: *
2457:                ELSE IF( WNTUO ) THEN
2458: *
2459: *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2460: *                 M right singular vectors to be computed in VT and
2461: *                 M left singular vectors to be overwritten on A
2462: *
2463:                   IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2464: *
2465: *                    Sufficient workspace for a fast algorithm
2466: *
2467:                      IU = 1
2468:                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2469: *
2470: *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2471: *
2472:                         LDWRKU = LDA
2473:                         IR = IU + LDWRKU*M
2474:                         LDWRKR = LDA
2475:                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2476: *
2477: *                       WORK(IU) is LDA by M and WORK(IR) is M by M
2478: *
2479:                         LDWRKU = LDA
2480:                         IR = IU + LDWRKU*M
2481:                         LDWRKR = M
2482:                      ELSE
2483: *
2484: *                       WORK(IU) is M by M and WORK(IR) is M by M
2485: *
2486:                         LDWRKU = M
2487:                         IR = IU + LDWRKU*M
2488:                         LDWRKR = M
2489:                      END IF
2490:                      ITAU = IR + LDWRKR*M
2491:                      IWORK = ITAU + M
2492: *
2493: *                    Compute A=L*Q
2494: *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2495: *
2496:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2497:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2498: *
2499: *                    Copy L to WORK(IU), zeroing out below it
2500: *
2501:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
2502:      $                            LDWRKU )
2503:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2504:      $                            WORK( IU+LDWRKU ), LDWRKU )
2505: *
2506: *                    Generate Q in A
2507: *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2508: *
2509:                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2510:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2511:                      IE = ITAU
2512:                      ITAUQ = IE + M
2513:                      ITAUP = ITAUQ + M
2514:                      IWORK = ITAUP + M
2515: *
2516: *                    Bidiagonalize L in WORK(IU), copying result to
2517: *                    WORK(IR)
2518: *                    (Workspace: need 2*M*M+4*M,
2519: *                                prefer 2*M*M+3*M+2*M*NB)
2520: *
2521:                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
2522:      $                            WORK( IE ), WORK( ITAUQ ),
2523:      $                            WORK( ITAUP ), WORK( IWORK ),
2524:      $                            LWORK-IWORK+1, IERR )
2525:                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2526:      $                            WORK( IR ), LDWRKR )
2527: *
2528: *                    Generate right bidiagonalizing vectors in WORK(IU)
2529: *                    (Workspace: need 2*M*M+4*M-1,
2530: *                                prefer 2*M*M+3*M+(M-1)*NB)
2531: *
2532:                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2533:      $                            WORK( ITAUP ), WORK( IWORK ),
2534:      $                            LWORK-IWORK+1, IERR )
2535: *
2536: *                    Generate left bidiagonalizing vectors in WORK(IR)
2537: *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2538: *
2539:                      CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2540:      $                            WORK( ITAUQ ), WORK( IWORK ),
2541:      $                            LWORK-IWORK+1, IERR )
2542:                      IWORK = IE + M
2543: *
2544: *                    Perform bidiagonal QR iteration, computing left
2545: *                    singular vectors of L in WORK(IR) and computing
2546: *                    right singular vectors of L in WORK(IU)
2547: *                    (Workspace: need 2*M*M+BDSPAC)
2548: *
2549:                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2550:      $                            WORK( IU ), LDWRKU, WORK( IR ),
2551:      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2552: *
2553: *                    Multiply right singular vectors of L in WORK(IU) by
2554: *                    Q in A, storing result in VT
2555: *                    (Workspace: need M*M)
2556: *
2557:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2558:      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2559: *
2560: *                    Copy left singular vectors of L to A
2561: *                    (Workspace: need M*M)
2562: *
2563:                      CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2564:      $                            LDA )
2565: *
2566:                   ELSE
2567: *
2568: *                    Insufficient workspace for a fast algorithm
2569: *
2570:                      ITAU = 1
2571:                      IWORK = ITAU + M
2572: *
2573: *                    Compute A=L*Q, copying result to VT
2574: *                    (Workspace: need 2*M, prefer M+M*NB)
2575: *
2576:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2577:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2578:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2579: *
2580: *                    Generate Q in VT
2581: *                    (Workspace: need 2*M, prefer M+M*NB)
2582: *
2583:                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2584:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2585:                      IE = ITAU
2586:                      ITAUQ = IE + M
2587:                      ITAUP = ITAUQ + M
2588:                      IWORK = ITAUP + M
2589: *
2590: *                    Zero out above L in A
2591: *
2592:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2593:      $                            LDA )
2594: *
2595: *                    Bidiagonalize L in A
2596: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2597: *
2598:                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
2599:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2600:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2601: *
2602: *                    Multiply right vectors bidiagonalizing L by Q in VT
2603: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2604: *
2605:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2606:      $                            WORK( ITAUP ), VT, LDVT,
2607:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2608: *
2609: *                    Generate left bidiagonalizing vectors of L in A
2610: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
2611: *
2612:                      CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2613:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2614:                      IWORK = IE + M
2615: *
2616: *                    Perform bidiagonal QR iteration, compute left
2617: *                    singular vectors of A in A and compute right
2618: *                    singular vectors of A in VT
2619: *                    (Workspace: need BDSPAC)
2620: *
2621:                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2622:      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2623:      $                            INFO )
2624: *
2625:                   END IF
2626: *
2627:                ELSE IF( WNTUAS ) THEN
2628: *
2629: *                 Path 6t(N much larger than M, JOBU='S' or 'A',
2630: *                         JOBVT='S')
2631: *                 M right singular vectors to be computed in VT and
2632: *                 M left singular vectors to be computed in U
2633: *
2634:                   IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2635: *
2636: *                    Sufficient workspace for a fast algorithm
2637: *
2638:                      IU = 1
2639:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2640: *
2641: *                       WORK(IU) is LDA by N
2642: *
2643:                         LDWRKU = LDA
2644:                      ELSE
2645: *
2646: *                       WORK(IU) is LDA by M
2647: *
2648:                         LDWRKU = M
2649:                      END IF
2650:                      ITAU = IU + LDWRKU*M
2651:                      IWORK = ITAU + M
2652: *
2653: *                    Compute A=L*Q
2654: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2655: *
2656:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2657:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2658: *
2659: *                    Copy L to WORK(IU), zeroing out above it
2660: *
2661:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
2662:      $                            LDWRKU )
2663:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2664:      $                            WORK( IU+LDWRKU ), LDWRKU )
2665: *
2666: *                    Generate Q in A
2667: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2668: *
2669:                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2670:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2671:                      IE = ITAU
2672:                      ITAUQ = IE + M
2673:                      ITAUP = ITAUQ + M
2674:                      IWORK = ITAUP + M
2675: *
2676: *                    Bidiagonalize L in WORK(IU), copying result to U
2677: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2678: *
2679:                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
2680:      $                            WORK( IE ), WORK( ITAUQ ),
2681:      $                            WORK( ITAUP ), WORK( IWORK ),
2682:      $                            LWORK-IWORK+1, IERR )
2683:                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2684:      $                            LDU )
2685: *
2686: *                    Generate right bidiagonalizing vectors in WORK(IU)
2687: *                    (Workspace: need M*M+4*M-1,
2688: *                                prefer M*M+3*M+(M-1)*NB)
2689: *
2690:                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2691:      $                            WORK( ITAUP ), WORK( IWORK ),
2692:      $                            LWORK-IWORK+1, IERR )
2693: *
2694: *                    Generate left bidiagonalizing vectors in U
2695: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2696: *
2697:                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2698:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2699:                      IWORK = IE + M
2700: *
2701: *                    Perform bidiagonal QR iteration, computing left
2702: *                    singular vectors of L in U and computing right
2703: *                    singular vectors of L in WORK(IU)
2704: *                    (Workspace: need M*M+BDSPAC)
2705: *
2706:                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2707:      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2708:      $                            WORK( IWORK ), INFO )
2709: *
2710: *                    Multiply right singular vectors of L in WORK(IU) by
2711: *                    Q in A, storing result in VT
2712: *                    (Workspace: need M*M)
2713: *
2714:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2715:      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2716: *
2717:                   ELSE
2718: *
2719: *                    Insufficient workspace for a fast algorithm
2720: *
2721:                      ITAU = 1
2722:                      IWORK = ITAU + M
2723: *
2724: *                    Compute A=L*Q, copying result to VT
2725: *                    (Workspace: need 2*M, prefer M+M*NB)
2726: *
2727:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2728:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2729:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2730: *
2731: *                    Generate Q in VT
2732: *                    (Workspace: need 2*M, prefer M+M*NB)
2733: *
2734:                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2735:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2736: *
2737: *                    Copy L to U, zeroing out above it
2738: *
2739:                      CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
2740:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2741:      $                            LDU )
2742:                      IE = ITAU
2743:                      ITAUQ = IE + M
2744:                      ITAUP = ITAUQ + M
2745:                      IWORK = ITAUP + M
2746: *
2747: *                    Bidiagonalize L in U
2748: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2749: *
2750:                      CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
2751:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2752:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2753: *
2754: *                    Multiply right bidiagonalizing vectors in U by Q
2755: *                    in VT
2756: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2757: *
2758:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2759:      $                            WORK( ITAUP ), VT, LDVT,
2760:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2761: *
2762: *                    Generate left bidiagonalizing vectors in U
2763: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
2764: *
2765:                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2766:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2767:                      IWORK = IE + M
2768: *
2769: *                    Perform bidiagonal QR iteration, computing left
2770: *                    singular vectors of A in U and computing right
2771: *                    singular vectors of A in VT
2772: *                    (Workspace: need BDSPAC)
2773: *
2774:                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2775:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2776:      $                            INFO )
2777: *
2778:                   END IF
2779: *
2780:                END IF
2781: *
2782:             ELSE IF( WNTVA ) THEN
2783: *
2784:                IF( WNTUN ) THEN
2785: *
2786: *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2787: *                 N right singular vectors to be computed in VT and
2788: *                 no left singular vectors to be computed
2789: *
2790:                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2791: *
2792: *                    Sufficient workspace for a fast algorithm
2793: *
2794:                      IR = 1
2795:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2796: *
2797: *                       WORK(IR) is LDA by M
2798: *
2799:                         LDWRKR = LDA
2800:                      ELSE
2801: *
2802: *                       WORK(IR) is M by M
2803: *
2804:                         LDWRKR = M
2805:                      END IF
2806:                      ITAU = IR + LDWRKR*M
2807:                      IWORK = ITAU + M
2808: *
2809: *                    Compute A=L*Q, copying result to VT
2810: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2811: *
2812:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2813:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2814:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2815: *
2816: *                    Copy L to WORK(IR), zeroing out above it
2817: *
2818:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
2819:      $                            LDWRKR )
2820:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2821:      $                            WORK( IR+LDWRKR ), LDWRKR )
2822: *
2823: *                    Generate Q in VT
2824: *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2825: *
2826:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2827:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2828:                      IE = ITAU
2829:                      ITAUQ = IE + M
2830:                      ITAUP = ITAUQ + M
2831:                      IWORK = ITAUP + M
2832: *
2833: *                    Bidiagonalize L in WORK(IR)
2834: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2835: *
2836:                      CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
2837:      $                            WORK( IE ), WORK( ITAUQ ),
2838:      $                            WORK( ITAUP ), WORK( IWORK ),
2839:      $                            LWORK-IWORK+1, IERR )
2840: *
2841: *                    Generate right bidiagonalizing vectors in WORK(IR)
2842: *                    (Workspace: need M*M+4*M-1,
2843: *                                prefer M*M+3*M+(M-1)*NB)
2844: *
2845:                      CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2846:      $                            WORK( ITAUP ), WORK( IWORK ),
2847:      $                            LWORK-IWORK+1, IERR )
2848:                      IWORK = IE + M
2849: *
2850: *                    Perform bidiagonal QR iteration, computing right
2851: *                    singular vectors of L in WORK(IR)
2852: *                    (Workspace: need M*M+BDSPAC)
2853: *
2854:                      CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2855:      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2856:      $                            WORK( IWORK ), INFO )
2857: *
2858: *                    Multiply right singular vectors of L in WORK(IR) by
2859: *                    Q in VT, storing result in A
2860: *                    (Workspace: need M*M)
2861: *
2862:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2863:      $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
2864: *
2865: *                    Copy right singular vectors of A from A to VT
2866: *
2867:                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
2868: *
2869:                   ELSE
2870: *
2871: *                    Insufficient workspace for a fast algorithm
2872: *
2873:                      ITAU = 1
2874:                      IWORK = ITAU + M
2875: *
2876: *                    Compute A=L*Q, copying result to VT
2877: *                    (Workspace: need 2*M, prefer M+M*NB)
2878: *
2879:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2880:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2881:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2882: *
2883: *                    Generate Q in VT
2884: *                    (Workspace: need M+N, prefer M+N*NB)
2885: *
2886:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2887:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2888:                      IE = ITAU
2889:                      ITAUQ = IE + M
2890:                      ITAUP = ITAUQ + M
2891:                      IWORK = ITAUP + M
2892: *
2893: *                    Zero out above L in A
2894: *
2895:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2896:      $                            LDA )
2897: *
2898: *                    Bidiagonalize L in A
2899: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2900: *
2901:                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
2902:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2903:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2904: *
2905: *                    Multiply right bidiagonalizing vectors in A by Q
2906: *                    in VT
2907: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2908: *
2909:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2910:      $                            WORK( ITAUP ), VT, LDVT,
2911:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2912:                      IWORK = IE + M
2913: *
2914: *                    Perform bidiagonal QR iteration, computing right
2915: *                    singular vectors of A in VT
2916: *                    (Workspace: need BDSPAC)
2917: *
2918:                      CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2919:      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2920:      $                            INFO )
2921: *
2922:                   END IF
2923: *
2924:                ELSE IF( WNTUO ) THEN
2925: *
2926: *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
2927: *                 N right singular vectors to be computed in VT and
2928: *                 M left singular vectors to be overwritten on A
2929: *
2930:                   IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2931: *
2932: *                    Sufficient workspace for a fast algorithm
2933: *
2934:                      IU = 1
2935:                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2936: *
2937: *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2938: *
2939:                         LDWRKU = LDA
2940:                         IR = IU + LDWRKU*M
2941:                         LDWRKR = LDA
2942:                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2943: *
2944: *                       WORK(IU) is LDA by M and WORK(IR) is M by M
2945: *
2946:                         LDWRKU = LDA
2947:                         IR = IU + LDWRKU*M
2948:                         LDWRKR = M
2949:                      ELSE
2950: *
2951: *                       WORK(IU) is M by M and WORK(IR) is M by M
2952: *
2953:                         LDWRKU = M
2954:                         IR = IU + LDWRKU*M
2955:                         LDWRKR = M
2956:                      END IF
2957:                      ITAU = IR + LDWRKR*M
2958:                      IWORK = ITAU + M
2959: *
2960: *                    Compute A=L*Q, copying result to VT
2961: *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2962: *
2963:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2964:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2965:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2966: *
2967: *                    Generate Q in VT
2968: *                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
2969: *
2970:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2971:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2972: *
2973: *                    Copy L to WORK(IU), zeroing out above it
2974: *
2975:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
2976:      $                            LDWRKU )
2977:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2978:      $                            WORK( IU+LDWRKU ), LDWRKU )
2979:                      IE = ITAU
2980:                      ITAUQ = IE + M
2981:                      ITAUP = ITAUQ + M
2982:                      IWORK = ITAUP + M
2983: *
2984: *                    Bidiagonalize L in WORK(IU), copying result to
2985: *                    WORK(IR)
2986: *                    (Workspace: need 2*M*M+4*M,
2987: *                                prefer 2*M*M+3*M+2*M*NB)
2988: *
2989:                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
2990:      $                            WORK( IE ), WORK( ITAUQ ),
2991:      $                            WORK( ITAUP ), WORK( IWORK ),
2992:      $                            LWORK-IWORK+1, IERR )
2993:                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2994:      $                            WORK( IR ), LDWRKR )
2995: *
2996: *                    Generate right bidiagonalizing vectors in WORK(IU)
2997: *                    (Workspace: need 2*M*M+4*M-1,
2998: *                                prefer 2*M*M+3*M+(M-1)*NB)
2999: *
3000:                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3001:      $                            WORK( ITAUP ), WORK( IWORK ),
3002:      $                            LWORK-IWORK+1, IERR )
3003: *
3004: *                    Generate left bidiagonalizing vectors in WORK(IR)
3005: *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3006: *
3007:                      CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3008:      $                            WORK( ITAUQ ), WORK( IWORK ),
3009:      $                            LWORK-IWORK+1, IERR )
3010:                      IWORK = IE + M
3011: *
3012: *                    Perform bidiagonal QR iteration, computing left
3013: *                    singular vectors of L in WORK(IR) and computing
3014: *                    right singular vectors of L in WORK(IU)
3015: *                    (Workspace: need 2*M*M+BDSPAC)
3016: *
3017:                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3018:      $                            WORK( IU ), LDWRKU, WORK( IR ),
3019:      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3020: *
3021: *                    Multiply right singular vectors of L in WORK(IU) by
3022: *                    Q in VT, storing result in A
3023: *                    (Workspace: need M*M)
3024: *
3025:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3026:      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3027: *
3028: *                    Copy right singular vectors of A from A to VT
3029: *
3030:                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
3031: *
3032: *                    Copy left singular vectors of A from WORK(IR) to A
3033: *
3034:                      CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3035:      $                            LDA )
3036: *
3037:                   ELSE
3038: *
3039: *                    Insufficient workspace for a fast algorithm
3040: *
3041:                      ITAU = 1
3042:                      IWORK = ITAU + M
3043: *
3044: *                    Compute A=L*Q, copying result to VT
3045: *                    (Workspace: need 2*M, prefer M+M*NB)
3046: *
3047:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
3048:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3049:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3050: *
3051: *                    Generate Q in VT
3052: *                    (Workspace: need M+N, prefer M+N*NB)
3053: *
3054:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3055:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3056:                      IE = ITAU
3057:                      ITAUQ = IE + M
3058:                      ITAUP = ITAUQ + M
3059:                      IWORK = ITAUP + M
3060: *
3061: *                    Zero out above L in A
3062: *
3063:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3064:      $                            LDA )
3065: *
3066: *                    Bidiagonalize L in A
3067: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3068: *
3069:                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
3070:      $                            WORK( ITAUQ ), WORK( ITAUP ),
3071:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3072: *
3073: *                    Multiply right bidiagonalizing vectors in A by Q
3074: *                    in VT
3075: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3076: *
3077:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3078:      $                            WORK( ITAUP ), VT, LDVT,
3079:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3080: *
3081: *                    Generate left bidiagonalizing vectors in A
3082: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
3083: *
3084:                      CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3085:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3086:                      IWORK = IE + M
3087: *
3088: *                    Perform bidiagonal QR iteration, computing left
3089: *                    singular vectors of A in A and computing right
3090: *                    singular vectors of A in VT
3091: *                    (Workspace: need BDSPAC)
3092: *
3093:                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3094:      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3095:      $                            INFO )
3096: *
3097:                   END IF
3098: *
3099:                ELSE IF( WNTUAS ) THEN
3100: *
3101: *                 Path 9t(N much larger than M, JOBU='S' or 'A',
3102: *                         JOBVT='A')
3103: *                 N right singular vectors to be computed in VT and
3104: *                 M left singular vectors to be computed in U
3105: *
3106:                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3107: *
3108: *                    Sufficient workspace for a fast algorithm
3109: *
3110:                      IU = 1
3111:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
3112: *
3113: *                       WORK(IU) is LDA by M
3114: *
3115:                         LDWRKU = LDA
3116:                      ELSE
3117: *
3118: *                       WORK(IU) is M by M
3119: *
3120:                         LDWRKU = M
3121:                      END IF
3122:                      ITAU = IU + LDWRKU*M
3123:                      IWORK = ITAU + M
3124: *
3125: *                    Compute A=L*Q, copying result to VT
3126: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3127: *
3128:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
3129:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3130:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3131: *
3132: *                    Generate Q in VT
3133: *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3134: *
3135:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3136:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3137: *
3138: *                    Copy L to WORK(IU), zeroing out above it
3139: *
3140:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
3141:      $                            LDWRKU )
3142:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
3143:      $                            WORK( IU+LDWRKU ), LDWRKU )
3144:                      IE = ITAU
3145:                      ITAUQ = IE + M
3146:                      ITAUP = ITAUQ + M
3147:                      IWORK = ITAUP + M
3148: *
3149: *                    Bidiagonalize L in WORK(IU), copying result to U
3150: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3151: *
3152:                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
3153:      $                            WORK( IE ), WORK( ITAUQ ),
3154:      $                            WORK( ITAUP ), WORK( IWORK ),
3155:      $                            LWORK-IWORK+1, IERR )
3156:                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3157:      $                            LDU )
3158: *
3159: *                    Generate right bidiagonalizing vectors in WORK(IU)
3160: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3161: *
3162:                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3163:      $                            WORK( ITAUP ), WORK( IWORK ),
3164:      $                            LWORK-IWORK+1, IERR )
3165: *
3166: *                    Generate left bidiagonalizing vectors in U
3167: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3168: *
3169:                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3170:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3171:                      IWORK = IE + M
3172: *
3173: *                    Perform bidiagonal QR iteration, computing left
3174: *                    singular vectors of L in U and computing right
3175: *                    singular vectors of L in WORK(IU)
3176: *                    (Workspace: need M*M+BDSPAC)
3177: *
3178:                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3179:      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3180:      $                            WORK( IWORK ), INFO )
3181: *
3182: *                    Multiply right singular vectors of L in WORK(IU) by
3183: *                    Q in VT, storing result in A
3184: *                    (Workspace: need M*M)
3185: *
3186:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3187:      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3188: *
3189: *                    Copy right singular vectors of A from A to VT
3190: *
3191:                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
3192: *
3193:                   ELSE
3194: *
3195: *                    Insufficient workspace for a fast algorithm
3196: *
3197:                      ITAU = 1
3198:                      IWORK = ITAU + M
3199: *
3200: *                    Compute A=L*Q, copying result to VT
3201: *                    (Workspace: need 2*M, prefer M+M*NB)
3202: *
3203:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
3204:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3205:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3206: *
3207: *                    Generate Q in VT
3208: *                    (Workspace: need M+N, prefer M+N*NB)
3209: *
3210:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3211:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3212: *
3213: *                    Copy L to U, zeroing out above it
3214: *
3215:                      CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
3216:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3217:      $                            LDU )
3218:                      IE = ITAU
3219:                      ITAUQ = IE + M
3220:                      ITAUP = ITAUQ + M
3221:                      IWORK = ITAUP + M
3222: *
3223: *                    Bidiagonalize L in U
3224: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3225: *
3226:                      CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
3227:      $                            WORK( ITAUQ ), WORK( ITAUP ),
3228:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3229: *
3230: *                    Multiply right bidiagonalizing vectors in U by Q
3231: *                    in VT
3232: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3233: *
3234:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3235:      $                            WORK( ITAUP ), VT, LDVT,
3236:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3237: *
3238: *                    Generate left bidiagonalizing vectors in U
3239: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
3240: *
3241:                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3242:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3243:                      IWORK = IE + M
3244: *
3245: *                    Perform bidiagonal QR iteration, computing left
3246: *                    singular vectors of A in U and computing right
3247: *                    singular vectors of A in VT
3248: *                    (Workspace: need BDSPAC)
3249: *
3250:                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3251:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3252:      $                            INFO )
3253: *
3254:                   END IF
3255: *
3256:                END IF
3257: *
3258:             END IF
3259: *
3260:          ELSE
3261: *
3262: *           N .LT. MNTHR
3263: *
3264: *           Path 10t(N greater than M, but not much larger)
3265: *           Reduce to bidiagonal form without LQ decomposition
3266: *
3267:             IE = 1
3268:             ITAUQ = IE + M
3269:             ITAUP = ITAUQ + M
3270:             IWORK = ITAUP + M
3271: *
3272: *           Bidiagonalize A
3273: *           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3274: *
3275:             CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3276:      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3277:      $                   IERR )
3278:             IF( WNTUAS ) THEN
3279: *
3280: *              If left singular vectors desired in U, copy result to U
3281: *              and generate left bidiagonalizing vectors in U
3282: *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3283: *
3284:                CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
3285:                CALL SORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3286:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3287:             END IF
3288:             IF( WNTVAS ) THEN
3289: *
3290: *              If right singular vectors desired in VT, copy result to
3291: *              VT and generate right bidiagonalizing vectors in VT
3292: *              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3293: *
3294:                CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3295:                IF( WNTVA )
3296:      $            NRVT = N
3297:                IF( WNTVS )
3298:      $            NRVT = M
3299:                CALL SORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3300:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3301:             END IF
3302:             IF( WNTUO ) THEN
3303: *
3304: *              If left singular vectors desired in A, generate left
3305: *              bidiagonalizing vectors in A
3306: *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3307: *
3308:                CALL SORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3309:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3310:             END IF
3311:             IF( WNTVO ) THEN
3312: *
3313: *              If right singular vectors desired in A, generate right
3314: *              bidiagonalizing vectors in A
3315: *              (Workspace: need 4*M, prefer 3*M+M*NB)
3316: *
3317:                CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3318:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3319:             END IF
3320:             IWORK = IE + M
3321:             IF( WNTUAS .OR. WNTUO )
3322:      $         NRU = M
3323:             IF( WNTUN )
3324:      $         NRU = 0
3325:             IF( WNTVAS .OR. WNTVO )
3326:      $         NCVT = N
3327:             IF( WNTVN )
3328:      $         NCVT = 0
3329:             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3330: *
3331: *              Perform bidiagonal QR iteration, if desired, computing
3332: *              left singular vectors in U and computing right singular
3333: *              vectors in VT
3334: *              (Workspace: need BDSPAC)
3335: *
3336:                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3337:      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3338:             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3339: *
3340: *              Perform bidiagonal QR iteration, if desired, computing
3341: *              left singular vectors in U and computing right singular
3342: *              vectors in A
3343: *              (Workspace: need BDSPAC)
3344: *
3345:                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3346:      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
3347:             ELSE
3348: *
3349: *              Perform bidiagonal QR iteration, if desired, computing
3350: *              left singular vectors in A and computing right singular
3351: *              vectors in VT
3352: *              (Workspace: need BDSPAC)
3353: *
3354:                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3355:      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3356:             END IF
3357: *
3358:          END IF
3359: *
3360:       END IF
3361: *
3362: *     If SBDSQR failed to converge, copy unconverged superdiagonals
3363: *     to WORK( 2:MINMN )
3364: *
3365:       IF( INFO.NE.0 ) THEN
3366:          IF( IE.GT.2 ) THEN
3367:             DO 50 I = 1, MINMN - 1
3368:                WORK( I+1 ) = WORK( I+IE-1 )
3369:    50       CONTINUE
3370:          END IF
3371:          IF( IE.LT.2 ) THEN
3372:             DO 60 I = MINMN - 1, 1, -1
3373:                WORK( I+1 ) = WORK( I+IE-1 )
3374:    60       CONTINUE
3375:          END IF
3376:       END IF
3377: *
3378: *     Undo scaling if necessary
3379: *
3380:       IF( ISCL.EQ.1 ) THEN
3381:          IF( ANRM.GT.BIGNUM )
3382:      $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3383:      $                   IERR )
3384:          IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3385:      $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3386:      $                   MINMN, IERR )
3387:          IF( ANRM.LT.SMLNUM )
3388:      $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3389:      $                   IERR )
3390:          IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3391:      $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3392:      $                   MINMN, IERR )
3393:       END IF
3394: *
3395: *     Return optimal workspace in WORK(1)
3396: *
3397:       WORK( 1 ) = MAXWRK
3398: *
3399:       RETURN
3400: *
3401: *     End of SGESVD
3402: *
3403:       END
3404: