0001:       SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
0002:      $                   WORK, LWORK, INFO )
0003: *
0004: *  -- LAPACK driver routine (version 3.2) --
0005: *     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
0006: *     November 2006
0007: *
0008: *     .. Scalar Arguments ..
0009:       CHARACTER          JOBU, JOBVT
0010:       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
0011: *     ..
0012: *     .. Array Arguments ..
0013:       REAL               A( LDA, * ), S( * ), U( LDU, * ),
0014:      $                   VT( LDVT, * ), WORK( * )
0015: *     ..
0016: *
0017: *  Purpose
0018: *  =======
0019: *
0020: *  SGESVD computes the singular value decomposition (SVD) of a real
0021: *  M-by-N matrix A, optionally computing the left and/or right singular
0022: *  vectors. The SVD is written
0023: *
0024: *       A = U * SIGMA * transpose(V)
0025: *
0026: *  where SIGMA is an M-by-N matrix which is zero except for its
0027: *  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
0028: *  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
0029: *  are the singular values of A; they are real and non-negative, and
0030: *  are returned in descending order.  The first min(m,n) columns of
0031: *  U and V are the left and right singular vectors of A.
0032: *
0033: *  Note that the routine returns V**T, not V.
0034: *
0035: *  Arguments
0036: *  =========
0037: *
0038: *  JOBU    (input) CHARACTER*1
0039: *          Specifies options for computing all or part of the matrix U:
0040: *          = 'A':  all M columns of U are returned in array U:
0041: *          = 'S':  the first min(m,n) columns of U (the left singular
0042: *                  vectors) are returned in the array U;
0043: *          = 'O':  the first min(m,n) columns of U (the left singular
0044: *                  vectors) are overwritten on the array A;
0045: *          = 'N':  no columns of U (no left singular vectors) are
0046: *                  computed.
0047: *
0048: *  JOBVT   (input) CHARACTER*1
0049: *          Specifies options for computing all or part of the matrix
0050: *          V**T:
0051: *          = 'A':  all N rows of V**T are returned in the array VT;
0052: *          = 'S':  the first min(m,n) rows of V**T (the right singular
0053: *                  vectors) are returned in the array VT;
0054: *          = 'O':  the first min(m,n) rows of V**T (the right singular
0055: *                  vectors) are overwritten on the array A;
0056: *          = 'N':  no rows of V**T (no right singular vectors) are
0057: *                  computed.
0058: *
0059: *          JOBVT and JOBU cannot both be 'O'.
0060: *
0061: *  M       (input) INTEGER
0062: *          The number of rows of the input matrix A.  M >= 0.
0063: *
0064: *  N       (input) INTEGER
0065: *          The number of columns of the input matrix A.  N >= 0.
0066: *
0067: *  A       (input/output) REAL array, dimension (LDA,N)
0068: *          On entry, the M-by-N matrix A.
0069: *          On exit,
0070: *          if JOBU = 'O',  A is overwritten with the first min(m,n)
0071: *                          columns of U (the left singular vectors,
0072: *                          stored columnwise);
0073: *          if JOBVT = 'O', A is overwritten with the first min(m,n)
0074: *                          rows of V**T (the right singular vectors,
0075: *                          stored rowwise);
0076: *          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
0077: *                          are destroyed.
0078: *
0079: *  LDA     (input) INTEGER
0080: *          The leading dimension of the array A.  LDA >= max(1,M).
0081: *
0082: *  S       (output) REAL array, dimension (min(M,N))
0083: *          The singular values of A, sorted so that S(i) >= S(i+1).
0084: *
0085: *  U       (output) REAL array, dimension (LDU,UCOL)
0086: *          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
0087: *          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
0088: *          if JOBU = 'S', U contains the first min(m,n) columns of U
0089: *          (the left singular vectors, stored columnwise);
0090: *          if JOBU = 'N' or 'O', U is not referenced.
0091: *
0092: *  LDU     (input) INTEGER
0093: *          The leading dimension of the array U.  LDU >= 1; if
0094: *          JOBU = 'S' or 'A', LDU >= M.
0095: *
0096: *  VT      (output) REAL array, dimension (LDVT,N)
0097: *          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
0098: *          V**T;
0099: *          if JOBVT = 'S', VT contains the first min(m,n) rows of
0100: *          V**T (the right singular vectors, stored rowwise);
0101: *          if JOBVT = 'N' or 'O', VT is not referenced.
0102: *
0103: *  LDVT    (input) INTEGER
0104: *          The leading dimension of the array VT.  LDVT >= 1; if
0105: *          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
0106: *
0107: *  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
0108: *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
0109: *          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
0110: *          superdiagonal elements of an upper bidiagonal matrix B
0111: *          whose diagonal is in S (not necessarily sorted). B
0112: *          satisfies A = U * B * VT, so it has the same singular values
0113: *          as A, and singular vectors related by U and VT.
0114: *
0115: *  LWORK   (input) INTEGER
0116: *          The dimension of the array WORK.
0117: *          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
0118: *          For good performance, LWORK should generally be larger.
0119: *
0120: *          If LWORK = -1, then a workspace query is assumed; the routine
0121: *          only calculates the optimal size of the WORK array, returns
0122: *          this value as the first entry of the WORK array, and no error
0123: *          message related to LWORK is issued by XERBLA.
0124: *
0125: *  INFO    (output) INTEGER
0126: *          = 0:  successful exit.
0127: *          < 0:  if INFO = -i, the i-th argument had an illegal value.
0128: *          > 0:  if SBDSQR did not converge, INFO specifies how many
0129: *                superdiagonals of an intermediate bidiagonal form B
0130: *                did not converge to zero. See the description of WORK
0131: *                above for details.
0132: *
0133: *  =====================================================================
0134: *
0135: *     .. Parameters ..
0136:       REAL               ZERO, ONE
0137:       PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
0138: *     ..
0139: *     .. Local Scalars ..
0140:       LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
0141:      $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
0142:       INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
0143:      $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
0144:      $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
0145:      $                   NRVT, WRKBL
0146:       REAL               ANRM, BIGNUM, EPS, SMLNUM
0147: *     ..
0148: *     .. Local Arrays ..
0149:       REAL               DUM( 1 )
0150: *     ..
0151: *     .. External Subroutines ..
0152:       EXTERNAL           SBDSQR, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY,
0153:      $                   SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR,
0154:      $                   XERBLA
0155: *     ..
0156: *     .. External Functions ..
0157:       LOGICAL            LSAME
0158:       INTEGER            ILAENV
0159:       REAL               SLAMCH, SLANGE
0160:       EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
0161: *     ..
0162: *     .. Intrinsic Functions ..
0163:       INTRINSIC          MAX, MIN, SQRT
0164: *     ..
0165: *     .. Executable Statements ..
0166: *
0167: *     Test the input arguments
0168: *
0169:       INFO = 0
0170:       MINMN = MIN( M, N )
0171:       WNTUA = LSAME( JOBU, 'A' )
0172:       WNTUS = LSAME( JOBU, 'S' )
0173:       WNTUAS = WNTUA .OR. WNTUS
0174:       WNTUO = LSAME( JOBU, 'O' )
0175:       WNTUN = LSAME( JOBU, 'N' )
0176:       WNTVA = LSAME( JOBVT, 'A' )
0177:       WNTVS = LSAME( JOBVT, 'S' )
0178:       WNTVAS = WNTVA .OR. WNTVS
0179:       WNTVO = LSAME( JOBVT, 'O' )
0180:       WNTVN = LSAME( JOBVT, 'N' )
0181:       LQUERY = ( LWORK.EQ.-1 )
0182: *
0183:       IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
0184:          INFO = -1
0185:       ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
0186:      $         ( WNTVO .AND. WNTUO ) ) THEN
0187:          INFO = -2
0188:       ELSE IF( M.LT.0 ) THEN
0189:          INFO = -3
0190:       ELSE IF( N.LT.0 ) THEN
0191:          INFO = -4
0192:       ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
0193:          INFO = -6
0194:       ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
0195:          INFO = -9
0196:       ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
0197:      $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
0198:          INFO = -11
0199:       END IF
0200: *
0201: *     Compute workspace
0202: *      (Note: Comments in the code beginning "Workspace:" describe the
0203: *       minimal amount of workspace needed at that point in the code,
0204: *       as well as the preferred amount for good performance.
0205: *       NB refers to the optimal block size for the immediately
0206: *       following subroutine, as returned by ILAENV.)
0207: *
0208:       IF( INFO.EQ.0 ) THEN
0209:          MINWRK = 1
0210:          MAXWRK = 1
0211:          IF( M.GE.N .AND. MINMN.GT.0 ) THEN
0212: *
0213: *           Compute space needed for SBDSQR
0214: *
0215:             MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
0216:             BDSPAC = 5*N
0217:             IF( M.GE.MNTHR ) THEN
0218:                IF( WNTUN ) THEN
0219: *
0220: *                 Path 1 (M much larger than N, JOBU='N')
0221: *
0222:                   MAXWRK = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1,
0223:      $                     -1 )
0224:                   MAXWRK = MAX( MAXWRK, 3*N+2*N*
0225:      $                     ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0226:                   IF( WNTVO .OR. WNTVAS )
0227:      $               MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
0228:      $                        ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0229:                   MAXWRK = MAX( MAXWRK, BDSPAC )
0230:                   MINWRK = MAX( 4*N, BDSPAC )
0231:                ELSE IF( WNTUO .AND. WNTVN ) THEN
0232: *
0233: *                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
0234: *
0235:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0236:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
0237:      $                    N, N, -1 ) )
0238:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0239:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0240:                   WRKBL = MAX( WRKBL, 3*N+N*
0241:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0242:                   WRKBL = MAX( WRKBL, BDSPAC )
0243:                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
0244:                   MINWRK = MAX( 3*N+M, BDSPAC )
0245:                ELSE IF( WNTUO .AND. WNTVAS ) THEN
0246: *
0247: *                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
0248: *                 'A')
0249: *
0250:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0251:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
0252:      $                    N, N, -1 ) )
0253:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0254:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0255:                   WRKBL = MAX( WRKBL, 3*N+N*
0256:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0257:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0258:      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0259:                   WRKBL = MAX( WRKBL, BDSPAC )
0260:                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
0261:                   MINWRK = MAX( 3*N+M, BDSPAC )
0262:                ELSE IF( WNTUS .AND. WNTVN ) THEN
0263: *
0264: *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
0265: *
0266:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0267:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
0268:      $                    N, N, -1 ) )
0269:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0270:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0271:                   WRKBL = MAX( WRKBL, 3*N+N*
0272:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0273:                   WRKBL = MAX( WRKBL, BDSPAC )
0274:                   MAXWRK = N*N + WRKBL
0275:                   MINWRK = MAX( 3*N+M, BDSPAC )
0276:                ELSE IF( WNTUS .AND. WNTVO ) THEN
0277: *
0278: *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
0279: *
0280:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0281:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
0282:      $                    N, N, -1 ) )
0283:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0284:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0285:                   WRKBL = MAX( WRKBL, 3*N+N*
0286:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0287:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0288:      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0289:                   WRKBL = MAX( WRKBL, BDSPAC )
0290:                   MAXWRK = 2*N*N + WRKBL
0291:                   MINWRK = MAX( 3*N+M, BDSPAC )
0292:                ELSE IF( WNTUS .AND. WNTVAS ) THEN
0293: *
0294: *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
0295: *                 'A')
0296: *
0297:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0298:                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
0299:      $                    N, N, -1 ) )
0300:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0301:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0302:                   WRKBL = MAX( WRKBL, 3*N+N*
0303:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0304:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0305:      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0306:                   WRKBL = MAX( WRKBL, BDSPAC )
0307:                   MAXWRK = N*N + WRKBL
0308:                   MINWRK = MAX( 3*N+M, BDSPAC )
0309:                ELSE IF( WNTUA .AND. WNTVN ) THEN
0310: *
0311: *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
0312: *
0313:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0314:                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
0315:      $                    M, N, -1 ) )
0316:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0317:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0318:                   WRKBL = MAX( WRKBL, 3*N+N*
0319:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0320:                   WRKBL = MAX( WRKBL, BDSPAC )
0321:                   MAXWRK = N*N + WRKBL
0322:                   MINWRK = MAX( 3*N+M, BDSPAC )
0323:                ELSE IF( WNTUA .AND. WNTVO ) THEN
0324: *
0325: *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
0326: *
0327:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0328:                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
0329:      $                    M, N, -1 ) )
0330:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0331:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0332:                   WRKBL = MAX( WRKBL, 3*N+N*
0333:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0334:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0335:      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0336:                   WRKBL = MAX( WRKBL, BDSPAC )
0337:                   MAXWRK = 2*N*N + WRKBL
0338:                   MINWRK = MAX( 3*N+M, BDSPAC )
0339:                ELSE IF( WNTUA .AND. WNTVAS ) THEN
0340: *
0341: *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
0342: *                 'A')
0343: *
0344:                   WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
0345:                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
0346:      $                    M, N, -1 ) )
0347:                   WRKBL = MAX( WRKBL, 3*N+2*N*
0348:      $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
0349:                   WRKBL = MAX( WRKBL, 3*N+N*
0350:      $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
0351:                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
0352:      $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0353:                   WRKBL = MAX( WRKBL, BDSPAC )
0354:                   MAXWRK = N*N + WRKBL
0355:                   MINWRK = MAX( 3*N+M, BDSPAC )
0356:                END IF
0357:             ELSE
0358: *
0359: *              Path 10 (M at least N, but not much larger)
0360: *
0361:                MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N,
0362:      $                  -1, -1 )
0363:                IF( WNTUS .OR. WNTUO )
0364:      $            MAXWRK = MAX( MAXWRK, 3*N+N*
0365:      $                     ILAENV( 1, 'SORGBR', 'Q', M, N, N, -1 ) )
0366:                IF( WNTUA )
0367:      $            MAXWRK = MAX( MAXWRK, 3*N+M*
0368:      $                     ILAENV( 1, 'SORGBR', 'Q', M, M, N, -1 ) )
0369:                IF( .NOT.WNTVN )
0370:      $            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
0371:      $                     ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
0372:                MAXWRK = MAX( MAXWRK, BDSPAC )
0373:                MINWRK = MAX( 3*N+M, BDSPAC )
0374:             END IF
0375:          ELSE IF( MINMN.GT.0 ) THEN
0376: *
0377: *           Compute space needed for SBDSQR
0378: *
0379:             MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
0380:             BDSPAC = 5*M
0381:             IF( N.GE.MNTHR ) THEN
0382:                IF( WNTVN ) THEN
0383: *
0384: *                 Path 1t(N much larger than M, JOBVT='N')
0385: *
0386:                   MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1,
0387:      $                     -1 )
0388:                   MAXWRK = MAX( MAXWRK, 3*M+2*M*
0389:      $                     ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0390:                   IF( WNTUO .OR. WNTUAS )
0391:      $               MAXWRK = MAX( MAXWRK, 3*M+M*
0392:      $                        ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0393:                   MAXWRK = MAX( MAXWRK, BDSPAC )
0394:                   MINWRK = MAX( 4*M, BDSPAC )
0395:                ELSE IF( WNTVO .AND. WNTUN ) THEN
0396: *
0397: *                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
0398: *
0399:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0400:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
0401:      $                    N, M, -1 ) )
0402:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0403:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0404:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0405:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0406:                   WRKBL = MAX( WRKBL, BDSPAC )
0407:                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
0408:                   MINWRK = MAX( 3*M+N, BDSPAC )
0409:                ELSE IF( WNTVO .AND. WNTUAS ) THEN
0410: *
0411: *                 Path 3t(N much larger than M, JOBU='S' or 'A',
0412: *                 JOBVT='O')
0413: *
0414:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0415:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
0416:      $                    N, M, -1 ) )
0417:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0418:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0419:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0420:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0421:                   WRKBL = MAX( WRKBL, 3*M+M*
0422:      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0423:                   WRKBL = MAX( WRKBL, BDSPAC )
0424:                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
0425:                   MINWRK = MAX( 3*M+N, BDSPAC )
0426:                ELSE IF( WNTVS .AND. WNTUN ) THEN
0427: *
0428: *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
0429: *
0430:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0431:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
0432:      $                    N, M, -1 ) )
0433:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0434:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0435:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0436:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0437:                   WRKBL = MAX( WRKBL, BDSPAC )
0438:                   MAXWRK = M*M + WRKBL
0439:                   MINWRK = MAX( 3*M+N, BDSPAC )
0440:                ELSE IF( WNTVS .AND. WNTUO ) THEN
0441: *
0442: *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
0443: *
0444:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0445:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
0446:      $                    N, M, -1 ) )
0447:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0448:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0449:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0450:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0451:                   WRKBL = MAX( WRKBL, 3*M+M*
0452:      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0453:                   WRKBL = MAX( WRKBL, BDSPAC )
0454:                   MAXWRK = 2*M*M + WRKBL
0455:                   MINWRK = MAX( 3*M+N, BDSPAC )
0456:                   MAXWRK = MAX( MAXWRK, MINWRK )
0457:                ELSE IF( WNTVS .AND. WNTUAS ) THEN
0458: *
0459: *                 Path 6t(N much larger than M, JOBU='S' or 'A',
0460: *                 JOBVT='S')
0461: *
0462:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0463:                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
0464:      $                    N, M, -1 ) )
0465:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0466:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0467:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0468:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0469:                   WRKBL = MAX( WRKBL, 3*M+M*
0470:      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0471:                   WRKBL = MAX( WRKBL, BDSPAC )
0472:                   MAXWRK = M*M + WRKBL
0473:                   MINWRK = MAX( 3*M+N, BDSPAC )
0474:                ELSE IF( WNTVA .AND. WNTUN ) THEN
0475: *
0476: *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
0477: *
0478:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0479:                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
0480:      $                    N, M, -1 ) )
0481:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0482:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0483:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0484:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0485:                   WRKBL = MAX( WRKBL, BDSPAC )
0486:                   MAXWRK = M*M + WRKBL
0487:                   MINWRK = MAX( 3*M+N, BDSPAC )
0488:                ELSE IF( WNTVA .AND. WNTUO ) THEN
0489: *
0490: *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
0491: *
0492:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0493:                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
0494:      $                    N, M, -1 ) )
0495:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0496:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0497:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0498:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0499:                   WRKBL = MAX( WRKBL, 3*M+M*
0500:      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0501:                   WRKBL = MAX( WRKBL, BDSPAC )
0502:                   MAXWRK = 2*M*M + WRKBL
0503:                   MINWRK = MAX( 3*M+N, BDSPAC )
0504:                ELSE IF( WNTVA .AND. WNTUAS ) THEN
0505: *
0506: *                 Path 9t(N much larger than M, JOBU='S' or 'A',
0507: *                 JOBVT='A')
0508: *
0509:                   WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
0510:                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
0511:      $                    N, M, -1 ) )
0512:                   WRKBL = MAX( WRKBL, 3*M+2*M*
0513:      $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
0514:                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
0515:      $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
0516:                   WRKBL = MAX( WRKBL, 3*M+M*
0517:      $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0518:                   WRKBL = MAX( WRKBL, BDSPAC )
0519:                   MAXWRK = M*M + WRKBL
0520:                   MINWRK = MAX( 3*M+N, BDSPAC )
0521:                END IF
0522:             ELSE
0523: *
0524: *              Path 10t(N greater than M, but not much larger)
0525: *
0526:                MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N,
0527:      $                  -1, -1 )
0528:                IF( WNTVS .OR. WNTVO )
0529:      $            MAXWRK = MAX( MAXWRK, 3*M+M*
0530:      $                     ILAENV( 1, 'SORGBR', 'P', M, N, M, -1 ) )
0531:                IF( WNTVA )
0532:      $            MAXWRK = MAX( MAXWRK, 3*M+N*
0533:      $                     ILAENV( 1, 'SORGBR', 'P', N, N, M, -1 ) )
0534:                IF( .NOT.WNTUN )
0535:      $            MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
0536:      $                     ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
0537:                MAXWRK = MAX( MAXWRK, BDSPAC )
0538:                MINWRK = MAX( 3*M+N, BDSPAC )
0539:             END IF
0540:          END IF
0541:          MAXWRK = MAX( MAXWRK, MINWRK )
0542:          WORK( 1 ) = MAXWRK
0543: *
0544:          IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
0545:             INFO = -13
0546:          END IF
0547:       END IF
0548: *
0549:       IF( INFO.NE.0 ) THEN
0550:          CALL XERBLA( 'SGESVD', -INFO )
0551:          RETURN
0552:       ELSE IF( LQUERY ) THEN
0553:          RETURN
0554:       END IF
0555: *
0556: *     Quick return if possible
0557: *
0558:       IF( M.EQ.0 .OR. N.EQ.0 ) THEN
0559:          RETURN
0560:       END IF
0561: *
0562: *     Get machine constants
0563: *
0564:       EPS = SLAMCH( 'P' )
0565:       SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
0566:       BIGNUM = ONE / SMLNUM
0567: *
0568: *     Scale A if max element outside range [SMLNUM,BIGNUM]
0569: *
0570:       ANRM = SLANGE( 'M', M, N, A, LDA, DUM )
0571:       ISCL = 0
0572:       IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
0573:          ISCL = 1
0574:          CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
0575:       ELSE IF( ANRM.GT.BIGNUM ) THEN
0576:          ISCL = 1
0577:          CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
0578:       END IF
0579: *
0580:       IF( M.GE.N ) THEN
0581: *
0582: *        A has at least as many rows as columns. If A has sufficiently
0583: *        more rows than columns, first reduce using the QR
0584: *        decomposition (if sufficient workspace available)
0585: *
0586:          IF( M.GE.MNTHR ) THEN
0587: *
0588:             IF( WNTUN ) THEN
0589: *
0590: *              Path 1 (M much larger than N, JOBU='N')
0591: *              No left singular vectors to be computed
0592: *
0593:                ITAU = 1
0594:                IWORK = ITAU + N
0595: *
0596: *              Compute A=Q*R
0597: *              (Workspace: need 2*N, prefer N+N*NB)
0598: *
0599:                CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
0600:      $                      LWORK-IWORK+1, IERR )
0601: *
0602: *              Zero out below R
0603: *
0604:                CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
0605:                IE = 1
0606:                ITAUQ = IE + N
0607:                ITAUP = ITAUQ + N
0608:                IWORK = ITAUP + N
0609: *
0610: *              Bidiagonalize R in A
0611: *              (Workspace: need 4*N, prefer 3*N+2*N*NB)
0612: *
0613:                CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
0614:      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
0615:      $                      IERR )
0616:                NCVT = 0
0617:                IF( WNTVO .OR. WNTVAS ) THEN
0618: *
0619: *                 If right singular vectors desired, generate P'.
0620: *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
0621: *
0622:                   CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
0623:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0624:                   NCVT = N
0625:                END IF
0626:                IWORK = IE + N
0627: *
0628: *              Perform bidiagonal QR iteration, computing right
0629: *              singular vectors of A in A if desired
0630: *              (Workspace: need BDSPAC)
0631: *
0632:                CALL SBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
0633:      $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
0634: *
0635: *              If right singular vectors desired in VT, copy them there
0636: *
0637:                IF( WNTVAS )
0638:      $            CALL SLACPY( 'F', N, N, A, LDA, VT, LDVT )
0639: *
0640:             ELSE IF( WNTUO .AND. WNTVN ) THEN
0641: *
0642: *              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
0643: *              N left singular vectors to be overwritten on A and
0644: *              no right singular vectors to be computed
0645: *
0646:                IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
0647: *
0648: *                 Sufficient workspace for a fast algorithm
0649: *
0650:                   IR = 1
0651:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
0652: *
0653: *                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
0654: *
0655:                      LDWRKU = LDA
0656:                      LDWRKR = LDA
0657:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
0658: *
0659: *                    WORK(IU) is LDA by N, WORK(IR) is N by N
0660: *
0661:                      LDWRKU = LDA
0662:                      LDWRKR = N
0663:                   ELSE
0664: *
0665: *                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
0666: *
0667:                      LDWRKU = ( LWORK-N*N-N ) / N
0668:                      LDWRKR = N
0669:                   END IF
0670:                   ITAU = IR + LDWRKR*N
0671:                   IWORK = ITAU + N
0672: *
0673: *                 Compute A=Q*R
0674: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0675: *
0676:                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
0677:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0678: *
0679: *                 Copy R to WORK(IR) and zero out below it
0680: *
0681:                   CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
0682:                   CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
0683:      $                         LDWRKR )
0684: *
0685: *                 Generate Q in A
0686: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0687: *
0688:                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
0689:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0690:                   IE = ITAU
0691:                   ITAUQ = IE + N
0692:                   ITAUP = ITAUQ + N
0693:                   IWORK = ITAUP + N
0694: *
0695: *                 Bidiagonalize R in WORK(IR)
0696: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
0697: *
0698:                   CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
0699:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0700:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0701: *
0702: *                 Generate left vectors bidiagonalizing R
0703: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
0704: *
0705:                   CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
0706:      $                         WORK( ITAUQ ), WORK( IWORK ),
0707:      $                         LWORK-IWORK+1, IERR )
0708:                   IWORK = IE + N
0709: *
0710: *                 Perform bidiagonal QR iteration, computing left
0711: *                 singular vectors of R in WORK(IR)
0712: *                 (Workspace: need N*N+BDSPAC)
0713: *
0714:                   CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
0715:      $                         WORK( IR ), LDWRKR, DUM, 1,
0716:      $                         WORK( IWORK ), INFO )
0717:                   IU = IE + N
0718: *
0719: *                 Multiply Q in A by left singular vectors of R in
0720: *                 WORK(IR), storing result in WORK(IU) and copying to A
0721: *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
0722: *
0723:                   DO 10 I = 1, M, LDWRKU
0724:                      CHUNK = MIN( M-I+1, LDWRKU )
0725:                      CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
0726:      $                           LDA, WORK( IR ), LDWRKR, ZERO,
0727:      $                           WORK( IU ), LDWRKU )
0728:                      CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
0729:      $                            A( I, 1 ), LDA )
0730:    10             CONTINUE
0731: *
0732:                ELSE
0733: *
0734: *                 Insufficient workspace for a fast algorithm
0735: *
0736:                   IE = 1
0737:                   ITAUQ = IE + N
0738:                   ITAUP = ITAUQ + N
0739:                   IWORK = ITAUP + N
0740: *
0741: *                 Bidiagonalize A
0742: *                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
0743: *
0744:                   CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
0745:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0746:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0747: *
0748: *                 Generate left vectors bidiagonalizing A
0749: *                 (Workspace: need 4*N, prefer 3*N+N*NB)
0750: *
0751:                   CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
0752:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0753:                   IWORK = IE + N
0754: *
0755: *                 Perform bidiagonal QR iteration, computing left
0756: *                 singular vectors of A in A
0757: *                 (Workspace: need BDSPAC)
0758: *
0759:                   CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
0760:      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
0761: *
0762:                END IF
0763: *
0764:             ELSE IF( WNTUO .AND. WNTVAS ) THEN
0765: *
0766: *              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
0767: *              N left singular vectors to be overwritten on A and
0768: *              N right singular vectors to be computed in VT
0769: *
0770:                IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
0771: *
0772: *                 Sufficient workspace for a fast algorithm
0773: *
0774:                   IR = 1
0775:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
0776: *
0777: *                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
0778: *
0779:                      LDWRKU = LDA
0780:                      LDWRKR = LDA
0781:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
0782: *
0783: *                    WORK(IU) is LDA by N and WORK(IR) is N by N
0784: *
0785:                      LDWRKU = LDA
0786:                      LDWRKR = N
0787:                   ELSE
0788: *
0789: *                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
0790: *
0791:                      LDWRKU = ( LWORK-N*N-N ) / N
0792:                      LDWRKR = N
0793:                   END IF
0794:                   ITAU = IR + LDWRKR*N
0795:                   IWORK = ITAU + N
0796: *
0797: *                 Compute A=Q*R
0798: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0799: *
0800:                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
0801:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0802: *
0803: *                 Copy R to VT, zeroing out below it
0804: *
0805:                   CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
0806:                   IF( N.GT.1 )
0807:      $               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
0808:      $                            VT( 2, 1 ), LDVT )
0809: *
0810: *                 Generate Q in A
0811: *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0812: *
0813:                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
0814:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0815:                   IE = ITAU
0816:                   ITAUQ = IE + N
0817:                   ITAUP = ITAUQ + N
0818:                   IWORK = ITAUP + N
0819: *
0820: *                 Bidiagonalize R in VT, copying result to WORK(IR)
0821: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
0822: *
0823:                   CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
0824:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0825:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0826:                   CALL SLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
0827: *
0828: *                 Generate left vectors bidiagonalizing R in WORK(IR)
0829: *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
0830: *
0831:                   CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
0832:      $                         WORK( ITAUQ ), WORK( IWORK ),
0833:      $                         LWORK-IWORK+1, IERR )
0834: *
0835: *                 Generate right vectors bidiagonalizing R in VT
0836: *                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
0837: *
0838:                   CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
0839:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0840:                   IWORK = IE + N
0841: *
0842: *                 Perform bidiagonal QR iteration, computing left
0843: *                 singular vectors of R in WORK(IR) and computing right
0844: *                 singular vectors of R in VT
0845: *                 (Workspace: need N*N+BDSPAC)
0846: *
0847:                   CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
0848:      $                         WORK( IR ), LDWRKR, DUM, 1,
0849:      $                         WORK( IWORK ), INFO )
0850:                   IU = IE + N
0851: *
0852: *                 Multiply Q in A by left singular vectors of R in
0853: *                 WORK(IR), storing result in WORK(IU) and copying to A
0854: *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
0855: *
0856:                   DO 20 I = 1, M, LDWRKU
0857:                      CHUNK = MIN( M-I+1, LDWRKU )
0858:                      CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
0859:      $                           LDA, WORK( IR ), LDWRKR, ZERO,
0860:      $                           WORK( IU ), LDWRKU )
0861:                      CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
0862:      $                            A( I, 1 ), LDA )
0863:    20             CONTINUE
0864: *
0865:                ELSE
0866: *
0867: *                 Insufficient workspace for a fast algorithm
0868: *
0869:                   ITAU = 1
0870:                   IWORK = ITAU + N
0871: *
0872: *                 Compute A=Q*R
0873: *                 (Workspace: need 2*N, prefer N+N*NB)
0874: *
0875:                   CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
0876:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0877: *
0878: *                 Copy R to VT, zeroing out below it
0879: *
0880:                   CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
0881:                   IF( N.GT.1 )
0882:      $               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
0883:      $                            VT( 2, 1 ), LDVT )
0884: *
0885: *                 Generate Q in A
0886: *                 (Workspace: need 2*N, prefer N+N*NB)
0887: *
0888:                   CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
0889:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0890:                   IE = ITAU
0891:                   ITAUQ = IE + N
0892:                   ITAUP = ITAUQ + N
0893:                   IWORK = ITAUP + N
0894: *
0895: *                 Bidiagonalize R in VT
0896: *                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
0897: *
0898:                   CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
0899:      $                         WORK( ITAUQ ), WORK( ITAUP ),
0900:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0901: *
0902: *                 Multiply Q in A by left vectors bidiagonalizing R
0903: *                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
0904: *
0905:                   CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
0906:      $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
0907:      $                         LWORK-IWORK+1, IERR )
0908: *
0909: *                 Generate right vectors bidiagonalizing R in VT
0910: *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
0911: *
0912:                   CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
0913:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
0914:                   IWORK = IE + N
0915: *
0916: *                 Perform bidiagonal QR iteration, computing left
0917: *                 singular vectors of A in A and computing right
0918: *                 singular vectors of A in VT
0919: *                 (Workspace: need BDSPAC)
0920: *
0921:                   CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
0922:      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
0923: *
0924:                END IF
0925: *
0926:             ELSE IF( WNTUS ) THEN
0927: *
0928:                IF( WNTVN ) THEN
0929: *
0930: *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
0931: *                 N left singular vectors to be computed in U and
0932: *                 no right singular vectors to be computed
0933: *
0934:                   IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
0935: *
0936: *                    Sufficient workspace for a fast algorithm
0937: *
0938:                      IR = 1
0939:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
0940: *
0941: *                       WORK(IR) is LDA by N
0942: *
0943:                         LDWRKR = LDA
0944:                      ELSE
0945: *
0946: *                       WORK(IR) is N by N
0947: *
0948:                         LDWRKR = N
0949:                      END IF
0950:                      ITAU = IR + LDWRKR*N
0951:                      IWORK = ITAU + N
0952: *
0953: *                    Compute A=Q*R
0954: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0955: *
0956:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
0957:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
0958: *
0959: *                    Copy R to WORK(IR), zeroing out below it
0960: *
0961:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
0962:      $                            LDWRKR )
0963:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
0964:      $                            WORK( IR+1 ), LDWRKR )
0965: *
0966: *                    Generate Q in A
0967: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
0968: *
0969:                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
0970:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
0971:                      IE = ITAU
0972:                      ITAUQ = IE + N
0973:                      ITAUP = ITAUQ + N
0974:                      IWORK = ITAUP + N
0975: *
0976: *                    Bidiagonalize R in WORK(IR)
0977: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
0978: *
0979:                      CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
0980:      $                            WORK( IE ), WORK( ITAUQ ),
0981:      $                            WORK( ITAUP ), WORK( IWORK ),
0982:      $                            LWORK-IWORK+1, IERR )
0983: *
0984: *                    Generate left vectors bidiagonalizing R in WORK(IR)
0985: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
0986: *
0987:                      CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
0988:      $                            WORK( ITAUQ ), WORK( IWORK ),
0989:      $                            LWORK-IWORK+1, IERR )
0990:                      IWORK = IE + N
0991: *
0992: *                    Perform bidiagonal QR iteration, computing left
0993: *                    singular vectors of R in WORK(IR)
0994: *                    (Workspace: need N*N+BDSPAC)
0995: *
0996:                      CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
0997:      $                            1, WORK( IR ), LDWRKR, DUM, 1,
0998:      $                            WORK( IWORK ), INFO )
0999: *
1000: *                    Multiply Q in A by left singular vectors of R in
1001: *                    WORK(IR), storing result in U
1002: *                    (Workspace: need N*N)
1003: *
1004:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1005:      $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
1006: *
1007:                   ELSE
1008: *
1009: *                    Insufficient workspace for a fast algorithm
1010: *
1011:                      ITAU = 1
1012:                      IWORK = ITAU + N
1013: *
1014: *                    Compute A=Q*R, copying result to U
1015: *                    (Workspace: need 2*N, prefer N+N*NB)
1016: *
1017:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1018:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1019:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1020: *
1021: *                    Generate Q in U
1022: *                    (Workspace: need 2*N, prefer N+N*NB)
1023: *
1024:                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
1025:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1026:                      IE = ITAU
1027:                      ITAUQ = IE + N
1028:                      ITAUP = ITAUQ + N
1029:                      IWORK = ITAUP + N
1030: *
1031: *                    Zero out below R in A
1032: *
1033:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1034:      $                            LDA )
1035: *
1036: *                    Bidiagonalize R in A
1037: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1038: *
1039:                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1040:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1041:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1042: *
1043: *                    Multiply Q in U by left vectors bidiagonalizing R
1044: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1045: *
1046:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1047:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1048:      $                            LWORK-IWORK+1, IERR )
1049:                      IWORK = IE + N
1050: *
1051: *                    Perform bidiagonal QR iteration, computing left
1052: *                    singular vectors of A in U
1053: *                    (Workspace: need BDSPAC)
1054: *
1055:                      CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1056:      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1057:      $                            INFO )
1058: *
1059:                   END IF
1060: *
1061:                ELSE IF( WNTVO ) THEN
1062: *
1063: *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1064: *                 N left singular vectors to be computed in U and
1065: *                 N right singular vectors to be overwritten on A
1066: *
1067:                   IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
1068: *
1069: *                    Sufficient workspace for a fast algorithm
1070: *
1071:                      IU = 1
1072:                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1073: *
1074: *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1075: *
1076:                         LDWRKU = LDA
1077:                         IR = IU + LDWRKU*N
1078:                         LDWRKR = LDA
1079:                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1080: *
1081: *                       WORK(IU) is LDA by N and WORK(IR) is N by N
1082: *
1083:                         LDWRKU = LDA
1084:                         IR = IU + LDWRKU*N
1085:                         LDWRKR = N
1086:                      ELSE
1087: *
1088: *                       WORK(IU) is N by N and WORK(IR) is N by N
1089: *
1090:                         LDWRKU = N
1091:                         IR = IU + LDWRKU*N
1092:                         LDWRKR = N
1093:                      END IF
1094:                      ITAU = IR + LDWRKR*N
1095:                      IWORK = ITAU + N
1096: *
1097: *                    Compute A=Q*R
1098: *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1099: *
1100:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1101:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1102: *
1103: *                    Copy R to WORK(IU), zeroing out below it
1104: *
1105:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1106:      $                            LDWRKU )
1107:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1108:      $                            WORK( IU+1 ), LDWRKU )
1109: *
1110: *                    Generate Q in A
1111: *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1112: *
1113:                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
1114:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1115:                      IE = ITAU
1116:                      ITAUQ = IE + N
1117:                      ITAUP = ITAUQ + N
1118:                      IWORK = ITAUP + N
1119: *
1120: *                    Bidiagonalize R in WORK(IU), copying result to
1121: *                    WORK(IR)
1122: *                    (Workspace: need 2*N*N+4*N,
1123: *                                prefer 2*N*N+3*N+2*N*NB)
1124: *
1125:                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1126:      $                            WORK( IE ), WORK( ITAUQ ),
1127:      $                            WORK( ITAUP ), WORK( IWORK ),
1128:      $                            LWORK-IWORK+1, IERR )
1129:                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1130:      $                            WORK( IR ), LDWRKR )
1131: *
1132: *                    Generate left bidiagonalizing vectors in WORK(IU)
1133: *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1134: *
1135:                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1136:      $                            WORK( ITAUQ ), WORK( IWORK ),
1137:      $                            LWORK-IWORK+1, IERR )
1138: *
1139: *                    Generate right bidiagonalizing vectors in WORK(IR)
1140: *                    (Workspace: need 2*N*N+4*N-1,
1141: *                                prefer 2*N*N+3*N+(N-1)*NB)
1142: *
1143:                      CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1144:      $                            WORK( ITAUP ), WORK( IWORK ),
1145:      $                            LWORK-IWORK+1, IERR )
1146:                      IWORK = IE + N
1147: *
1148: *                    Perform bidiagonal QR iteration, computing left
1149: *                    singular vectors of R in WORK(IU) and computing
1150: *                    right singular vectors of R in WORK(IR)
1151: *                    (Workspace: need 2*N*N+BDSPAC)
1152: *
1153:                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1154:      $                            WORK( IR ), LDWRKR, WORK( IU ),
1155:      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1156: *
1157: *                    Multiply Q in A by left singular vectors of R in
1158: *                    WORK(IU), storing result in U
1159: *                    (Workspace: need N*N)
1160: *
1161:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1162:      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1163: *
1164: *                    Copy right singular vectors of R to A
1165: *                    (Workspace: need N*N)
1166: *
1167:                      CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1168:      $                            LDA )
1169: *
1170:                   ELSE
1171: *
1172: *                    Insufficient workspace for a fast algorithm
1173: *
1174:                      ITAU = 1
1175:                      IWORK = ITAU + N
1176: *
1177: *                    Compute A=Q*R, copying result to U
1178: *                    (Workspace: need 2*N, prefer N+N*NB)
1179: *
1180:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1181:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1182:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1183: *
1184: *                    Generate Q in U
1185: *                    (Workspace: need 2*N, prefer N+N*NB)
1186: *
1187:                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
1188:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1189:                      IE = ITAU
1190:                      ITAUQ = IE + N
1191:                      ITAUP = ITAUQ + N
1192:                      IWORK = ITAUP + N
1193: *
1194: *                    Zero out below R in A
1195: *
1196:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1197:      $                            LDA )
1198: *
1199: *                    Bidiagonalize R in A
1200: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1201: *
1202:                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1203:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1204:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1205: *
1206: *                    Multiply Q in U by left vectors bidiagonalizing R
1207: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1208: *
1209:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1210:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1211:      $                            LWORK-IWORK+1, IERR )
1212: *
1213: *                    Generate right vectors bidiagonalizing R in A
1214: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1215: *
1216:                      CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1217:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1218:                      IWORK = IE + N
1219: *
1220: *                    Perform bidiagonal QR iteration, computing left
1221: *                    singular vectors of A in U and computing right
1222: *                    singular vectors of A in A
1223: *                    (Workspace: need BDSPAC)
1224: *
1225:                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1226:      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1227:      $                            INFO )
1228: *
1229:                   END IF
1230: *
1231:                ELSE IF( WNTVAS ) THEN
1232: *
1233: *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1234: *                         or 'A')
1235: *                 N left singular vectors to be computed in U and
1236: *                 N right singular vectors to be computed in VT
1237: *
1238:                   IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
1239: *
1240: *                    Sufficient workspace for a fast algorithm
1241: *
1242:                      IU = 1
1243:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1244: *
1245: *                       WORK(IU) is LDA by N
1246: *
1247:                         LDWRKU = LDA
1248:                      ELSE
1249: *
1250: *                       WORK(IU) is N by N
1251: *
1252:                         LDWRKU = N
1253:                      END IF
1254:                      ITAU = IU + LDWRKU*N
1255:                      IWORK = ITAU + N
1256: *
1257: *                    Compute A=Q*R
1258: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1259: *
1260:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1261:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1262: *
1263: *                    Copy R to WORK(IU), zeroing out below it
1264: *
1265:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1266:      $                            LDWRKU )
1267:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1268:      $                            WORK( IU+1 ), LDWRKU )
1269: *
1270: *                    Generate Q in A
1271: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1272: *
1273:                      CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
1274:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1275:                      IE = ITAU
1276:                      ITAUQ = IE + N
1277:                      ITAUP = ITAUQ + N
1278:                      IWORK = ITAUP + N
1279: *
1280: *                    Bidiagonalize R in WORK(IU), copying result to VT
1281: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1282: *
1283:                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1284:      $                            WORK( IE ), WORK( ITAUQ ),
1285:      $                            WORK( ITAUP ), WORK( IWORK ),
1286:      $                            LWORK-IWORK+1, IERR )
1287:                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1288:      $                            LDVT )
1289: *
1290: *                    Generate left bidiagonalizing vectors in WORK(IU)
1291: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1292: *
1293:                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1294:      $                            WORK( ITAUQ ), WORK( IWORK ),
1295:      $                            LWORK-IWORK+1, IERR )
1296: *
1297: *                    Generate right bidiagonalizing vectors in VT
1298: *                    (Workspace: need N*N+4*N-1,
1299: *                                prefer N*N+3*N+(N-1)*NB)
1300: *
1301:                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1302:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1303:                      IWORK = IE + N
1304: *
1305: *                    Perform bidiagonal QR iteration, computing left
1306: *                    singular vectors of R in WORK(IU) and computing
1307: *                    right singular vectors of R in VT
1308: *                    (Workspace: need N*N+BDSPAC)
1309: *
1310:                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1311:      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1312:      $                            WORK( IWORK ), INFO )
1313: *
1314: *                    Multiply Q in A by left singular vectors of R in
1315: *                    WORK(IU), storing result in U
1316: *                    (Workspace: need N*N)
1317: *
1318:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
1319:      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1320: *
1321:                   ELSE
1322: *
1323: *                    Insufficient workspace for a fast algorithm
1324: *
1325:                      ITAU = 1
1326:                      IWORK = ITAU + N
1327: *
1328: *                    Compute A=Q*R, copying result to U
1329: *                    (Workspace: need 2*N, prefer N+N*NB)
1330: *
1331:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1332:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1333:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1334: *
1335: *                    Generate Q in U
1336: *                    (Workspace: need 2*N, prefer N+N*NB)
1337: *
1338:                      CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
1339:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1340: *
1341: *                    Copy R to VT, zeroing out below it
1342: *
1343:                      CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
1344:                      IF( N.GT.1 )
1345:      $                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1346:      $                               VT( 2, 1 ), LDVT )
1347:                      IE = ITAU
1348:                      ITAUQ = IE + N
1349:                      ITAUP = ITAUQ + N
1350:                      IWORK = ITAUP + N
1351: *
1352: *                    Bidiagonalize R in VT
1353: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1354: *
1355:                      CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1356:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1357:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1358: *
1359: *                    Multiply Q in U by left bidiagonalizing vectors
1360: *                    in VT
1361: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1362: *
1363:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1364:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1365:      $                            LWORK-IWORK+1, IERR )
1366: *
1367: *                    Generate right bidiagonalizing vectors in VT
1368: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1369: *
1370:                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1371:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1372:                      IWORK = IE + N
1373: *
1374: *                    Perform bidiagonal QR iteration, computing left
1375: *                    singular vectors of A in U and computing right
1376: *                    singular vectors of A in VT
1377: *                    (Workspace: need BDSPAC)
1378: *
1379:                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1380:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1381:      $                            INFO )
1382: *
1383:                   END IF
1384: *
1385:                END IF
1386: *
1387:             ELSE IF( WNTUA ) THEN
1388: *
1389:                IF( WNTVN ) THEN
1390: *
1391: *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1392: *                 M left singular vectors to be computed in U and
1393: *                 no right singular vectors to be computed
1394: *
1395:                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1396: *
1397: *                    Sufficient workspace for a fast algorithm
1398: *
1399:                      IR = 1
1400:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1401: *
1402: *                       WORK(IR) is LDA by N
1403: *
1404:                         LDWRKR = LDA
1405:                      ELSE
1406: *
1407: *                       WORK(IR) is N by N
1408: *
1409:                         LDWRKR = N
1410:                      END IF
1411:                      ITAU = IR + LDWRKR*N
1412:                      IWORK = ITAU + N
1413: *
1414: *                    Compute A=Q*R, copying result to U
1415: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1416: *
1417:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1418:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1419:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1420: *
1421: *                    Copy R to WORK(IR), zeroing out below it
1422: *
1423:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
1424:      $                            LDWRKR )
1425:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1426:      $                            WORK( IR+1 ), LDWRKR )
1427: *
1428: *                    Generate Q in U
1429: *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1430: *
1431:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1432:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1433:                      IE = ITAU
1434:                      ITAUQ = IE + N
1435:                      ITAUP = ITAUQ + N
1436:                      IWORK = ITAUP + N
1437: *
1438: *                    Bidiagonalize R in WORK(IR)
1439: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1440: *
1441:                      CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
1442:      $                            WORK( IE ), WORK( ITAUQ ),
1443:      $                            WORK( ITAUP ), WORK( IWORK ),
1444:      $                            LWORK-IWORK+1, IERR )
1445: *
1446: *                    Generate left bidiagonalizing vectors in WORK(IR)
1447: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1448: *
1449:                      CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1450:      $                            WORK( ITAUQ ), WORK( IWORK ),
1451:      $                            LWORK-IWORK+1, IERR )
1452:                      IWORK = IE + N
1453: *
1454: *                    Perform bidiagonal QR iteration, computing left
1455: *                    singular vectors of R in WORK(IR)
1456: *                    (Workspace: need N*N+BDSPAC)
1457: *
1458:                      CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1459:      $                            1, WORK( IR ), LDWRKR, DUM, 1,
1460:      $                            WORK( IWORK ), INFO )
1461: *
1462: *                    Multiply Q in U by left singular vectors of R in
1463: *                    WORK(IR), storing result in A
1464: *                    (Workspace: need N*N)
1465: *
1466:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1467:      $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
1468: *
1469: *                    Copy left singular vectors of A from A to U
1470: *
1471:                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
1472: *
1473:                   ELSE
1474: *
1475: *                    Insufficient workspace for a fast algorithm
1476: *
1477:                      ITAU = 1
1478:                      IWORK = ITAU + N
1479: *
1480: *                    Compute A=Q*R, copying result to U
1481: *                    (Workspace: need 2*N, prefer N+N*NB)
1482: *
1483:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1484:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1485:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1486: *
1487: *                    Generate Q in U
1488: *                    (Workspace: need N+M, prefer N+M*NB)
1489: *
1490:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1491:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1492:                      IE = ITAU
1493:                      ITAUQ = IE + N
1494:                      ITAUP = ITAUQ + N
1495:                      IWORK = ITAUP + N
1496: *
1497: *                    Zero out below R in A
1498: *
1499:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1500:      $                            LDA )
1501: *
1502: *                    Bidiagonalize R in A
1503: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1504: *
1505:                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1506:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1507:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1508: *
1509: *                    Multiply Q in U by left bidiagonalizing vectors
1510: *                    in A
1511: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1512: *
1513:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1514:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1515:      $                            LWORK-IWORK+1, IERR )
1516:                      IWORK = IE + N
1517: *
1518: *                    Perform bidiagonal QR iteration, computing left
1519: *                    singular vectors of A in U
1520: *                    (Workspace: need BDSPAC)
1521: *
1522:                      CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1523:      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1524:      $                            INFO )
1525: *
1526:                   END IF
1527: *
1528:                ELSE IF( WNTVO ) THEN
1529: *
1530: *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1531: *                 M left singular vectors to be computed in U and
1532: *                 N right singular vectors to be overwritten on A
1533: *
1534:                   IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1535: *
1536: *                    Sufficient workspace for a fast algorithm
1537: *
1538:                      IU = 1
1539:                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1540: *
1541: *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1542: *
1543:                         LDWRKU = LDA
1544:                         IR = IU + LDWRKU*N
1545:                         LDWRKR = LDA
1546:                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1547: *
1548: *                       WORK(IU) is LDA by N and WORK(IR) is N by N
1549: *
1550:                         LDWRKU = LDA
1551:                         IR = IU + LDWRKU*N
1552:                         LDWRKR = N
1553:                      ELSE
1554: *
1555: *                       WORK(IU) is N by N and WORK(IR) is N by N
1556: *
1557:                         LDWRKU = N
1558:                         IR = IU + LDWRKU*N
1559:                         LDWRKR = N
1560:                      END IF
1561:                      ITAU = IR + LDWRKR*N
1562:                      IWORK = ITAU + N
1563: *
1564: *                    Compute A=Q*R, copying result to U
1565: *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1566: *
1567:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1568:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1569:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1570: *
1571: *                    Generate Q in U
1572: *                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1573: *
1574:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1575:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1576: *
1577: *                    Copy R to WORK(IU), zeroing out below it
1578: *
1579:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1580:      $                            LDWRKU )
1581:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1582:      $                            WORK( IU+1 ), LDWRKU )
1583:                      IE = ITAU
1584:                      ITAUQ = IE + N
1585:                      ITAUP = ITAUQ + N
1586:                      IWORK = ITAUP + N
1587: *
1588: *                    Bidiagonalize R in WORK(IU), copying result to
1589: *                    WORK(IR)
1590: *                    (Workspace: need 2*N*N+4*N,
1591: *                                prefer 2*N*N+3*N+2*N*NB)
1592: *
1593:                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1594:      $                            WORK( IE ), WORK( ITAUQ ),
1595:      $                            WORK( ITAUP ), WORK( IWORK ),
1596:      $                            LWORK-IWORK+1, IERR )
1597:                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1598:      $                            WORK( IR ), LDWRKR )
1599: *
1600: *                    Generate left bidiagonalizing vectors in WORK(IU)
1601: *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1602: *
1603:                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1604:      $                            WORK( ITAUQ ), WORK( IWORK ),
1605:      $                            LWORK-IWORK+1, IERR )
1606: *
1607: *                    Generate right bidiagonalizing vectors in WORK(IR)
1608: *                    (Workspace: need 2*N*N+4*N-1,
1609: *                                prefer 2*N*N+3*N+(N-1)*NB)
1610: *
1611:                      CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1612:      $                            WORK( ITAUP ), WORK( IWORK ),
1613:      $                            LWORK-IWORK+1, IERR )
1614:                      IWORK = IE + N
1615: *
1616: *                    Perform bidiagonal QR iteration, computing left
1617: *                    singular vectors of R in WORK(IU) and computing
1618: *                    right singular vectors of R in WORK(IR)
1619: *                    (Workspace: need 2*N*N+BDSPAC)
1620: *
1621:                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1622:      $                            WORK( IR ), LDWRKR, WORK( IU ),
1623:      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1624: *
1625: *                    Multiply Q in U by left singular vectors of R in
1626: *                    WORK(IU), storing result in A
1627: *                    (Workspace: need N*N)
1628: *
1629:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1630:      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1631: *
1632: *                    Copy left singular vectors of A from A to U
1633: *
1634:                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
1635: *
1636: *                    Copy right singular vectors of R from WORK(IR) to A
1637: *
1638:                      CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1639:      $                            LDA )
1640: *
1641:                   ELSE
1642: *
1643: *                    Insufficient workspace for a fast algorithm
1644: *
1645:                      ITAU = 1
1646:                      IWORK = ITAU + N
1647: *
1648: *                    Compute A=Q*R, copying result to U
1649: *                    (Workspace: need 2*N, prefer N+N*NB)
1650: *
1651:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1652:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1653:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1654: *
1655: *                    Generate Q in U
1656: *                    (Workspace: need N+M, prefer N+M*NB)
1657: *
1658:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1659:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1660:                      IE = ITAU
1661:                      ITAUQ = IE + N
1662:                      ITAUP = ITAUQ + N
1663:                      IWORK = ITAUP + N
1664: *
1665: *                    Zero out below R in A
1666: *
1667:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
1668:      $                            LDA )
1669: *
1670: *                    Bidiagonalize R in A
1671: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1672: *
1673:                      CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
1674:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1675:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1676: *
1677: *                    Multiply Q in U by left bidiagonalizing vectors
1678: *                    in A
1679: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1680: *
1681:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1682:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1683:      $                            LWORK-IWORK+1, IERR )
1684: *
1685: *                    Generate right bidiagonalizing vectors in A
1686: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1687: *
1688:                      CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1689:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1690:                      IWORK = IE + N
1691: *
1692: *                    Perform bidiagonal QR iteration, computing left
1693: *                    singular vectors of A in U and computing right
1694: *                    singular vectors of A in A
1695: *                    (Workspace: need BDSPAC)
1696: *
1697:                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1698:      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1699:      $                            INFO )
1700: *
1701:                   END IF
1702: *
1703:                ELSE IF( WNTVAS ) THEN
1704: *
1705: *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1706: *                         or 'A')
1707: *                 M left singular vectors to be computed in U and
1708: *                 N right singular vectors to be computed in VT
1709: *
1710:                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1711: *
1712: *                    Sufficient workspace for a fast algorithm
1713: *
1714:                      IU = 1
1715:                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1716: *
1717: *                       WORK(IU) is LDA by N
1718: *
1719:                         LDWRKU = LDA
1720:                      ELSE
1721: *
1722: *                       WORK(IU) is N by N
1723: *
1724:                         LDWRKU = N
1725:                      END IF
1726:                      ITAU = IU + LDWRKU*N
1727:                      IWORK = ITAU + N
1728: *
1729: *                    Compute A=Q*R, copying result to U
1730: *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1731: *
1732:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1733:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1734:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1735: *
1736: *                    Generate Q in U
1737: *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1738: *
1739:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1740:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1741: *
1742: *                    Copy R to WORK(IU), zeroing out below it
1743: *
1744:                      CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
1745:      $                            LDWRKU )
1746:                      CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1747:      $                            WORK( IU+1 ), LDWRKU )
1748:                      IE = ITAU
1749:                      ITAUQ = IE + N
1750:                      ITAUP = ITAUQ + N
1751:                      IWORK = ITAUP + N
1752: *
1753: *                    Bidiagonalize R in WORK(IU), copying result to VT
1754: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1755: *
1756:                      CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
1757:      $                            WORK( IE ), WORK( ITAUQ ),
1758:      $                            WORK( ITAUP ), WORK( IWORK ),
1759:      $                            LWORK-IWORK+1, IERR )
1760:                      CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1761:      $                            LDVT )
1762: *
1763: *                    Generate left bidiagonalizing vectors in WORK(IU)
1764: *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1765: *
1766:                      CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1767:      $                            WORK( ITAUQ ), WORK( IWORK ),
1768:      $                            LWORK-IWORK+1, IERR )
1769: *
1770: *                    Generate right bidiagonalizing vectors in VT
1771: *                    (Workspace: need N*N+4*N-1,
1772: *                                prefer N*N+3*N+(N-1)*NB)
1773: *
1774:                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1775:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1776:                      IWORK = IE + N
1777: *
1778: *                    Perform bidiagonal QR iteration, computing left
1779: *                    singular vectors of R in WORK(IU) and computing
1780: *                    right singular vectors of R in VT
1781: *                    (Workspace: need N*N+BDSPAC)
1782: *
1783:                      CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1784:      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1785:      $                            WORK( IWORK ), INFO )
1786: *
1787: *                    Multiply Q in U by left singular vectors of R in
1788: *                    WORK(IU), storing result in A
1789: *                    (Workspace: need N*N)
1790: *
1791:                      CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
1792:      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1793: *
1794: *                    Copy left singular vectors of A from A to U
1795: *
1796:                      CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
1797: *
1798:                   ELSE
1799: *
1800: *                    Insufficient workspace for a fast algorithm
1801: *
1802:                      ITAU = 1
1803:                      IWORK = ITAU + N
1804: *
1805: *                    Compute A=Q*R, copying result to U
1806: *                    (Workspace: need 2*N, prefer N+N*NB)
1807: *
1808:                      CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
1809:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1810:                      CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1811: *
1812: *                    Generate Q in U
1813: *                    (Workspace: need N+M, prefer N+M*NB)
1814: *
1815:                      CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
1816:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1817: *
1818: *                    Copy R from A to VT, zeroing out below it
1819: *
1820:                      CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
1821:                      IF( N.GT.1 )
1822:      $                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
1823:      $                               VT( 2, 1 ), LDVT )
1824:                      IE = ITAU
1825:                      ITAUQ = IE + N
1826:                      ITAUP = ITAUQ + N
1827:                      IWORK = ITAUP + N
1828: *
1829: *                    Bidiagonalize R in VT
1830: *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1831: *
1832:                      CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1833:      $                            WORK( ITAUQ ), WORK( ITAUP ),
1834:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1835: *
1836: *                    Multiply Q in U by left bidiagonalizing vectors
1837: *                    in VT
1838: *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1839: *
1840:                      CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1841:      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1842:      $                            LWORK-IWORK+1, IERR )
1843: *
1844: *                    Generate right bidiagonalizing vectors in VT
1845: *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1846: *
1847:                      CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1848:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1849:                      IWORK = IE + N
1850: *
1851: *                    Perform bidiagonal QR iteration, computing left
1852: *                    singular vectors of A in U and computing right
1853: *                    singular vectors of A in VT
1854: *                    (Workspace: need BDSPAC)
1855: *
1856:                      CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1857:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1858:      $                            INFO )
1859: *
1860:                   END IF
1861: *
1862:                END IF
1863: *
1864:             END IF
1865: *
1866:          ELSE
1867: *
1868: *           M .LT. MNTHR
1869: *
1870: *           Path 10 (M at least N, but not much larger)
1871: *           Reduce to bidiagonal form without QR decomposition
1872: *
1873:             IE = 1
1874:             ITAUQ = IE + N
1875:             ITAUP = ITAUQ + N
1876:             IWORK = ITAUP + N
1877: *
1878: *           Bidiagonalize A
1879: *           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1880: *
1881:             CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1882:      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1883:      $                   IERR )
1884:             IF( WNTUAS ) THEN
1885: *
1886: *              If left singular vectors desired in U, copy result to U
1887: *              and generate left bidiagonalizing vectors in U
1888: *              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1889: *
1890:                CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
1891:                IF( WNTUS )
1892:      $            NCU = N
1893:                IF( WNTUA )
1894:      $            NCU = M
1895:                CALL SORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1896:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1897:             END IF
1898:             IF( WNTVAS ) THEN
1899: *
1900: *              If right singular vectors desired in VT, copy result to
1901: *              VT and generate right bidiagonalizing vectors in VT
1902: *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1903: *
1904:                CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
1905:                CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1906:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1907:             END IF
1908:             IF( WNTUO ) THEN
1909: *
1910: *              If left singular vectors desired in A, generate left
1911: *              bidiagonalizing vectors in A
1912: *              (Workspace: need 4*N, prefer 3*N+N*NB)
1913: *
1914:                CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
1915:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1916:             END IF
1917:             IF( WNTVO ) THEN
1918: *
1919: *              If right singular vectors desired in A, generate right
1920: *              bidiagonalizing vectors in A
1921: *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1922: *
1923:                CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1924:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1925:             END IF
1926:             IWORK = IE + N
1927:             IF( WNTUAS .OR. WNTUO )
1928:      $         NRU = M
1929:             IF( WNTUN )
1930:      $         NRU = 0
1931:             IF( WNTVAS .OR. WNTVO )
1932:      $         NCVT = N
1933:             IF( WNTVN )
1934:      $         NCVT = 0
1935:             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
1936: *
1937: *              Perform bidiagonal QR iteration, if desired, computing
1938: *              left singular vectors in U and computing right singular
1939: *              vectors in VT
1940: *              (Workspace: need BDSPAC)
1941: *
1942:                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1943:      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
1944:             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
1945: *
1946: *              Perform bidiagonal QR iteration, if desired, computing
1947: *              left singular vectors in U and computing right singular
1948: *              vectors in A
1949: *              (Workspace: need BDSPAC)
1950: *
1951:                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
1952:      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
1953:             ELSE
1954: *
1955: *              Perform bidiagonal QR iteration, if desired, computing
1956: *              left singular vectors in A and computing right singular
1957: *              vectors in VT
1958: *              (Workspace: need BDSPAC)
1959: *
1960:                CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1961:      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
1962:             END IF
1963: *
1964:          END IF
1965: *
1966:       ELSE
1967: *
1968: *        A has more columns than rows. If A has sufficiently more
1969: *        columns than rows, first reduce using the LQ decomposition (if
1970: *        sufficient workspace available)
1971: *
1972:          IF( N.GE.MNTHR ) THEN
1973: *
1974:             IF( WNTVN ) THEN
1975: *
1976: *              Path 1t(N much larger than M, JOBVT='N')
1977: *              No right singular vectors to be computed
1978: *
1979:                ITAU = 1
1980:                IWORK = ITAU + M
1981: *
1982: *              Compute A=L*Q
1983: *              (Workspace: need 2*M, prefer M+M*NB)
1984: *
1985:                CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
1986:      $                      LWORK-IWORK+1, IERR )
1987: *
1988: *              Zero out above L
1989: *
1990:                CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
1991:                IE = 1
1992:                ITAUQ = IE + M
1993:                ITAUP = ITAUQ + M
1994:                IWORK = ITAUP + M
1995: *
1996: *              Bidiagonalize L in A
1997: *              (Workspace: need 4*M, prefer 3*M+2*M*NB)
1998: *
1999:                CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2000:      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2001:      $                      IERR )
2002:                IF( WNTUO .OR. WNTUAS ) THEN
2003: *
2004: *                 If left singular vectors desired, generate Q
2005: *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2006: *
2007:                   CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2008:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2009:                END IF
2010:                IWORK = IE + M
2011:                NRU = 0
2012:                IF( WNTUO .OR. WNTUAS )
2013:      $            NRU = M
2014: *
2015: *              Perform bidiagonal QR iteration, computing left singular
2016: *              vectors of A in A if desired
2017: *              (Workspace: need BDSPAC)
2018: *
2019:                CALL SBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2020:      $                      LDA, DUM, 1, WORK( IWORK ), INFO )
2021: *
2022: *              If left singular vectors desired in U, copy them there
2023: *
2024:                IF( WNTUAS )
2025:      $            CALL SLACPY( 'F', M, M, A, LDA, U, LDU )
2026: *
2027:             ELSE IF( WNTVO .AND. WNTUN ) THEN
2028: *
2029: *              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2030: *              M right singular vectors to be overwritten on A and
2031: *              no left singular vectors to be computed
2032: *
2033:                IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2034: *
2035: *                 Sufficient workspace for a fast algorithm
2036: *
2037:                   IR = 1
2038:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2039: *
2040: *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2041: *
2042:                      LDWRKU = LDA
2043:                      CHUNK = N
2044:                      LDWRKR = LDA
2045:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2046: *
2047: *                    WORK(IU) is LDA by N and WORK(IR) is M by M
2048: *
2049:                      LDWRKU = LDA
2050:                      CHUNK = N
2051:                      LDWRKR = M
2052:                   ELSE
2053: *
2054: *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2055: *
2056:                      LDWRKU = M
2057:                      CHUNK = ( LWORK-M*M-M ) / M
2058:                      LDWRKR = M
2059:                   END IF
2060:                   ITAU = IR + LDWRKR*M
2061:                   IWORK = ITAU + M
2062: *
2063: *                 Compute A=L*Q
2064: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2065: *
2066:                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2067:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2068: *
2069: *                 Copy L to WORK(IR) and zero out above it
2070: *
2071:                   CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2072:                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2073:      $                         WORK( IR+LDWRKR ), LDWRKR )
2074: *
2075: *                 Generate Q in A
2076: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2077: *
2078:                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2079:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2080:                   IE = ITAU
2081:                   ITAUQ = IE + M
2082:                   ITAUP = ITAUQ + M
2083:                   IWORK = ITAUP + M
2084: *
2085: *                 Bidiagonalize L in WORK(IR)
2086: *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2087: *
2088:                   CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2089:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2090:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2091: *
2092: *                 Generate right vectors bidiagonalizing L
2093: *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2094: *
2095:                   CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2096:      $                         WORK( ITAUP ), WORK( IWORK ),
2097:      $                         LWORK-IWORK+1, IERR )
2098:                   IWORK = IE + M
2099: *
2100: *                 Perform bidiagonal QR iteration, computing right
2101: *                 singular vectors of L in WORK(IR)
2102: *                 (Workspace: need M*M+BDSPAC)
2103: *
2104:                   CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2105:      $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2106:      $                         WORK( IWORK ), INFO )
2107:                   IU = IE + M
2108: *
2109: *                 Multiply right singular vectors of L in WORK(IR) by Q
2110: *                 in A, storing result in WORK(IU) and copying to A
2111: *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2112: *
2113:                   DO 30 I = 1, N, CHUNK
2114:                      BLK = MIN( N-I+1, CHUNK )
2115:                      CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2116:      $                           LDWRKR, A( 1, I ), LDA, ZERO,
2117:      $                           WORK( IU ), LDWRKU )
2118:                      CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2119:      $                            A( 1, I ), LDA )
2120:    30             CONTINUE
2121: *
2122:                ELSE
2123: *
2124: *                 Insufficient workspace for a fast algorithm
2125: *
2126:                   IE = 1
2127:                   ITAUQ = IE + M
2128:                   ITAUP = ITAUQ + M
2129:                   IWORK = ITAUP + M
2130: *
2131: *                 Bidiagonalize A
2132: *                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2133: *
2134:                   CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
2135:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2136:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2137: *
2138: *                 Generate right vectors bidiagonalizing A
2139: *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2140: *
2141:                   CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2142:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2143:                   IWORK = IE + M
2144: *
2145: *                 Perform bidiagonal QR iteration, computing right
2146: *                 singular vectors of A in A
2147: *                 (Workspace: need BDSPAC)
2148: *
2149:                   CALL SBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
2150:      $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2151: *
2152:                END IF
2153: *
2154:             ELSE IF( WNTVO .AND. WNTUAS ) THEN
2155: *
2156: *              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2157: *              M right singular vectors to be overwritten on A and
2158: *              M left singular vectors to be computed in U
2159: *
2160:                IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2161: *
2162: *                 Sufficient workspace for a fast algorithm
2163: *
2164:                   IR = 1
2165:                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2166: *
2167: *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2168: *
2169:                      LDWRKU = LDA
2170:                      CHUNK = N
2171:                      LDWRKR = LDA
2172:                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2173: *
2174: *                    WORK(IU) is LDA by N and WORK(IR) is M by M
2175: *
2176:                      LDWRKU = LDA
2177:                      CHUNK = N
2178:                      LDWRKR = M
2179:                   ELSE
2180: *
2181: *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2182: *
2183:                      LDWRKU = M
2184:                      CHUNK = ( LWORK-M*M-M ) / M
2185:                      LDWRKR = M
2186:                   END IF
2187:                   ITAU = IR + LDWRKR*M
2188:                   IWORK = ITAU + M
2189: *
2190: *                 Compute A=L*Q
2191: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2192: *
2193:                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2194:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2195: *
2196: *                 Copy L to U, zeroing about above it
2197: *
2198:                   CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
2199:                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2200:      $                         LDU )
2201: *
2202: *                 Generate Q in A
2203: *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2204: *
2205:                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2206:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2207:                   IE = ITAU
2208:                   ITAUQ = IE + M
2209:                   ITAUP = ITAUQ + M
2210:                   IWORK = ITAUP + M
2211: *
2212: *                 Bidiagonalize L in U, copying result to WORK(IR)
2213: *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2214: *
2215:                   CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
2216:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2217:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2218:                   CALL SLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2219: *
2220: *                 Generate right vectors bidiagonalizing L in WORK(IR)
2221: *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2222: *
2223:                   CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2224:      $                         WORK( ITAUP ), WORK( IWORK ),
2225:      $                         LWORK-IWORK+1, IERR )
2226: *
2227: *                 Generate left vectors bidiagonalizing L in U
2228: *                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2229: *
2230:                   CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2231:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2232:                   IWORK = IE + M
2233: *
2234: *                 Perform bidiagonal QR iteration, computing left
2235: *                 singular vectors of L in U, and computing right
2236: *                 singular vectors of L in WORK(IR)
2237: *                 (Workspace: need M*M+BDSPAC)
2238: *
2239:                   CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2240:      $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2241:      $                         WORK( IWORK ), INFO )
2242:                   IU = IE + M
2243: *
2244: *                 Multiply right singular vectors of L in WORK(IR) by Q
2245: *                 in A, storing result in WORK(IU) and copying to A
2246: *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2247: *
2248:                   DO 40 I = 1, N, CHUNK
2249:                      BLK = MIN( N-I+1, CHUNK )
2250:                      CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
2251:      $                           LDWRKR, A( 1, I ), LDA, ZERO,
2252:      $                           WORK( IU ), LDWRKU )
2253:                      CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2254:      $                            A( 1, I ), LDA )
2255:    40             CONTINUE
2256: *
2257:                ELSE
2258: *
2259: *                 Insufficient workspace for a fast algorithm
2260: *
2261:                   ITAU = 1
2262:                   IWORK = ITAU + M
2263: *
2264: *                 Compute A=L*Q
2265: *                 (Workspace: need 2*M, prefer M+M*NB)
2266: *
2267:                   CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2268:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2269: *
2270: *                 Copy L to U, zeroing out above it
2271: *
2272:                   CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
2273:                   CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2274:      $                         LDU )
2275: *
2276: *                 Generate Q in A
2277: *                 (Workspace: need 2*M, prefer M+M*NB)
2278: *
2279:                   CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2280:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2281:                   IE = ITAU
2282:                   ITAUQ = IE + M
2283:                   ITAUP = ITAUQ + M
2284:                   IWORK = ITAUP + M
2285: *
2286: *                 Bidiagonalize L in U
2287: *                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
2288: *
2289:                   CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
2290:      $                         WORK( ITAUQ ), WORK( ITAUP ),
2291:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2292: *
2293: *                 Multiply right vectors bidiagonalizing L by Q in A
2294: *                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
2295: *
2296:                   CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2297:      $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
2298:      $                         LWORK-IWORK+1, IERR )
2299: *
2300: *                 Generate left vectors bidiagonalizing L in U
2301: *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2302: *
2303:                   CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2304:      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2305:                   IWORK = IE + M
2306: *
2307: *                 Perform bidiagonal QR iteration, computing left
2308: *                 singular vectors of A in U and computing right
2309: *                 singular vectors of A in A
2310: *                 (Workspace: need BDSPAC)
2311: *
2312:                   CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2313:      $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
2314: *
2315:                END IF
2316: *
2317:             ELSE IF( WNTVS ) THEN
2318: *
2319:                IF( WNTUN ) THEN
2320: *
2321: *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2322: *                 M right singular vectors to be computed in VT and
2323: *                 no left singular vectors to be computed
2324: *
2325:                   IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2326: *
2327: *                    Sufficient workspace for a fast algorithm
2328: *
2329:                      IR = 1
2330:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2331: *
2332: *                       WORK(IR) is LDA by M
2333: *
2334:                         LDWRKR = LDA
2335:                      ELSE
2336: *
2337: *                       WORK(IR) is M by M
2338: *
2339:                         LDWRKR = M
2340:                      END IF
2341:                      ITAU = IR + LDWRKR*M
2342:                      IWORK = ITAU + M
2343: *
2344: *                    Compute A=L*Q
2345: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2346: *
2347:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2348:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2349: *
2350: *                    Copy L to WORK(IR), zeroing out above it
2351: *
2352:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
2353:      $                            LDWRKR )
2354:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2355:      $                            WORK( IR+LDWRKR ), LDWRKR )
2356: *
2357: *                    Generate Q in A
2358: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2359: *
2360:                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2361:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2362:                      IE = ITAU
2363:                      ITAUQ = IE + M
2364:                      ITAUP = ITAUQ + M
2365:                      IWORK = ITAUP + M
2366: *
2367: *                    Bidiagonalize L in WORK(IR)
2368: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2369: *
2370:                      CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
2371:      $                            WORK( IE ), WORK( ITAUQ ),
2372:      $                            WORK( ITAUP ), WORK( IWORK ),
2373:      $                            LWORK-IWORK+1, IERR )
2374: *
2375: *                    Generate right vectors bidiagonalizing L in
2376: *                    WORK(IR)
2377: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2378: *
2379:                      CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2380:      $                            WORK( ITAUP ), WORK( IWORK ),
2381:      $                            LWORK-IWORK+1, IERR )
2382:                      IWORK = IE + M
2383: *
2384: *                    Perform bidiagonal QR iteration, computing right
2385: *                    singular vectors of L in WORK(IR)
2386: *                    (Workspace: need M*M+BDSPAC)
2387: *
2388:                      CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2389:      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2390:      $                            WORK( IWORK ), INFO )
2391: *
2392: *                    Multiply right singular vectors of L in WORK(IR) by
2393: *                    Q in A, storing result in VT
2394: *                    (Workspace: need M*M)
2395: *
2396:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2397:      $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
2398: *
2399:                   ELSE
2400: *
2401: *                    Insufficient workspace for a fast algorithm
2402: *
2403:                      ITAU = 1
2404:                      IWORK = ITAU + M
2405: *
2406: *                    Compute A=L*Q
2407: *                    (Workspace: need 2*M, prefer M+M*NB)
2408: *
2409:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2410:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2411: *
2412: *                    Copy result to VT
2413: *
2414:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2415: *
2416: *                    Generate Q in VT
2417: *                    (Workspace: need 2*M, prefer M+M*NB)
2418: *
2419:                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2420:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2421:                      IE = ITAU
2422:                      ITAUQ = IE + M
2423:                      ITAUP = ITAUQ + M
2424:                      IWORK = ITAUP + M
2425: *
2426: *                    Zero out above L in A
2427: *
2428:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2429:      $                            LDA )
2430: *
2431: *                    Bidiagonalize L in A
2432: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2433: *
2434:                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
2435:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2436:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2437: *
2438: *                    Multiply right vectors bidiagonalizing L by Q in VT
2439: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2440: *
2441:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2442:      $                            WORK( ITAUP ), VT, LDVT,
2443:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2444:                      IWORK = IE + M
2445: *
2446: *                    Perform bidiagonal QR iteration, computing right
2447: *                    singular vectors of A in VT
2448: *                    (Workspace: need BDSPAC)
2449: *
2450:                      CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2451:      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2452:      $                            INFO )
2453: *
2454:                   END IF
2455: *
2456:                ELSE IF( WNTUO ) THEN
2457: *
2458: *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2459: *                 M right singular vectors to be computed in VT and
2460: *                 M left singular vectors to be overwritten on A
2461: *
2462:                   IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
2463: *
2464: *                    Sufficient workspace for a fast algorithm
2465: *
2466:                      IU = 1
2467:                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2468: *
2469: *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2470: *
2471:                         LDWRKU = LDA
2472:                         IR = IU + LDWRKU*M
2473:                         LDWRKR = LDA
2474:                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2475: *
2476: *                       WORK(IU) is LDA by M and WORK(IR) is M by M
2477: *
2478:                         LDWRKU = LDA
2479:                         IR = IU + LDWRKU*M
2480:                         LDWRKR = M
2481:                      ELSE
2482: *
2483: *                       WORK(IU) is M by M and WORK(IR) is M by M
2484: *
2485:                         LDWRKU = M
2486:                         IR = IU + LDWRKU*M
2487:                         LDWRKR = M
2488:                      END IF
2489:                      ITAU = IR + LDWRKR*M
2490:                      IWORK = ITAU + M
2491: *
2492: *                    Compute A=L*Q
2493: *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2494: *
2495:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2496:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2497: *
2498: *                    Copy L to WORK(IU), zeroing out below it
2499: *
2500:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
2501:      $                            LDWRKU )
2502:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2503:      $                            WORK( IU+LDWRKU ), LDWRKU )
2504: *
2505: *                    Generate Q in A
2506: *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2507: *
2508:                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2509:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2510:                      IE = ITAU
2511:                      ITAUQ = IE + M
2512:                      ITAUP = ITAUQ + M
2513:                      IWORK = ITAUP + M
2514: *
2515: *                    Bidiagonalize L in WORK(IU), copying result to
2516: *                    WORK(IR)
2517: *                    (Workspace: need 2*M*M+4*M,
2518: *                                prefer 2*M*M+3*M+2*M*NB)
2519: *
2520:                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
2521:      $                            WORK( IE ), WORK( ITAUQ ),
2522:      $                            WORK( ITAUP ), WORK( IWORK ),
2523:      $                            LWORK-IWORK+1, IERR )
2524:                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2525:      $                            WORK( IR ), LDWRKR )
2526: *
2527: *                    Generate right bidiagonalizing vectors in WORK(IU)
2528: *                    (Workspace: need 2*M*M+4*M-1,
2529: *                                prefer 2*M*M+3*M+(M-1)*NB)
2530: *
2531:                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2532:      $                            WORK( ITAUP ), WORK( IWORK ),
2533:      $                            LWORK-IWORK+1, IERR )
2534: *
2535: *                    Generate left bidiagonalizing vectors in WORK(IR)
2536: *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2537: *
2538:                      CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2539:      $                            WORK( ITAUQ ), WORK( IWORK ),
2540:      $                            LWORK-IWORK+1, IERR )
2541:                      IWORK = IE + M
2542: *
2543: *                    Perform bidiagonal QR iteration, computing left
2544: *                    singular vectors of L in WORK(IR) and computing
2545: *                    right singular vectors of L in WORK(IU)
2546: *                    (Workspace: need 2*M*M+BDSPAC)
2547: *
2548:                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2549:      $                            WORK( IU ), LDWRKU, WORK( IR ),
2550:      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2551: *
2552: *                    Multiply right singular vectors of L in WORK(IU) by
2553: *                    Q in A, storing result in VT
2554: *                    (Workspace: need M*M)
2555: *
2556:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2557:      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2558: *
2559: *                    Copy left singular vectors of L to A
2560: *                    (Workspace: need M*M)
2561: *
2562:                      CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2563:      $                            LDA )
2564: *
2565:                   ELSE
2566: *
2567: *                    Insufficient workspace for a fast algorithm
2568: *
2569:                      ITAU = 1
2570:                      IWORK = ITAU + M
2571: *
2572: *                    Compute A=L*Q, copying result to VT
2573: *                    (Workspace: need 2*M, prefer M+M*NB)
2574: *
2575:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2576:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2577:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2578: *
2579: *                    Generate Q in VT
2580: *                    (Workspace: need 2*M, prefer M+M*NB)
2581: *
2582:                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2583:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2584:                      IE = ITAU
2585:                      ITAUQ = IE + M
2586:                      ITAUP = ITAUQ + M
2587:                      IWORK = ITAUP + M
2588: *
2589: *                    Zero out above L in A
2590: *
2591:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2592:      $                            LDA )
2593: *
2594: *                    Bidiagonalize L in A
2595: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2596: *
2597:                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
2598:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2599:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2600: *
2601: *                    Multiply right vectors bidiagonalizing L by Q in VT
2602: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2603: *
2604:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2605:      $                            WORK( ITAUP ), VT, LDVT,
2606:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2607: *
2608: *                    Generate left bidiagonalizing vectors of L in A
2609: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
2610: *
2611:                      CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2612:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2613:                      IWORK = IE + M
2614: *
2615: *                    Perform bidiagonal QR iteration, compute left
2616: *                    singular vectors of A in A and compute right
2617: *                    singular vectors of A in VT
2618: *                    (Workspace: need BDSPAC)
2619: *
2620:                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2621:      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2622:      $                            INFO )
2623: *
2624:                   END IF
2625: *
2626:                ELSE IF( WNTUAS ) THEN
2627: *
2628: *                 Path 6t(N much larger than M, JOBU='S' or 'A',
2629: *                         JOBVT='S')
2630: *                 M right singular vectors to be computed in VT and
2631: *                 M left singular vectors to be computed in U
2632: *
2633:                   IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
2634: *
2635: *                    Sufficient workspace for a fast algorithm
2636: *
2637:                      IU = 1
2638:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2639: *
2640: *                       WORK(IU) is LDA by N
2641: *
2642:                         LDWRKU = LDA
2643:                      ELSE
2644: *
2645: *                       WORK(IU) is LDA by M
2646: *
2647:                         LDWRKU = M
2648:                      END IF
2649:                      ITAU = IU + LDWRKU*M
2650:                      IWORK = ITAU + M
2651: *
2652: *                    Compute A=L*Q
2653: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2654: *
2655:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2656:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2657: *
2658: *                    Copy L to WORK(IU), zeroing out above it
2659: *
2660:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
2661:      $                            LDWRKU )
2662:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2663:      $                            WORK( IU+LDWRKU ), LDWRKU )
2664: *
2665: *                    Generate Q in A
2666: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2667: *
2668:                      CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2669:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2670:                      IE = ITAU
2671:                      ITAUQ = IE + M
2672:                      ITAUP = ITAUQ + M
2673:                      IWORK = ITAUP + M
2674: *
2675: *                    Bidiagonalize L in WORK(IU), copying result to U
2676: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2677: *
2678:                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
2679:      $                            WORK( IE ), WORK( ITAUQ ),
2680:      $                            WORK( ITAUP ), WORK( IWORK ),
2681:      $                            LWORK-IWORK+1, IERR )
2682:                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2683:      $                            LDU )
2684: *
2685: *                    Generate right bidiagonalizing vectors in WORK(IU)
2686: *                    (Workspace: need M*M+4*M-1,
2687: *                                prefer M*M+3*M+(M-1)*NB)
2688: *
2689:                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2690:      $                            WORK( ITAUP ), WORK( IWORK ),
2691:      $                            LWORK-IWORK+1, IERR )
2692: *
2693: *                    Generate left bidiagonalizing vectors in U
2694: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2695: *
2696:                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2697:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2698:                      IWORK = IE + M
2699: *
2700: *                    Perform bidiagonal QR iteration, computing left
2701: *                    singular vectors of L in U and computing right
2702: *                    singular vectors of L in WORK(IU)
2703: *                    (Workspace: need M*M+BDSPAC)
2704: *
2705:                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2706:      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2707:      $                            WORK( IWORK ), INFO )
2708: *
2709: *                    Multiply right singular vectors of L in WORK(IU) by
2710: *                    Q in A, storing result in VT
2711: *                    (Workspace: need M*M)
2712: *
2713:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
2714:      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2715: *
2716:                   ELSE
2717: *
2718: *                    Insufficient workspace for a fast algorithm
2719: *
2720:                      ITAU = 1
2721:                      IWORK = ITAU + M
2722: *
2723: *                    Compute A=L*Q, copying result to VT
2724: *                    (Workspace: need 2*M, prefer M+M*NB)
2725: *
2726:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2727:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2728:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2729: *
2730: *                    Generate Q in VT
2731: *                    (Workspace: need 2*M, prefer M+M*NB)
2732: *
2733:                      CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2734:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2735: *
2736: *                    Copy L to U, zeroing out above it
2737: *
2738:                      CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
2739:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
2740:      $                            LDU )
2741:                      IE = ITAU
2742:                      ITAUQ = IE + M
2743:                      ITAUP = ITAUQ + M
2744:                      IWORK = ITAUP + M
2745: *
2746: *                    Bidiagonalize L in U
2747: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2748: *
2749:                      CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
2750:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2751:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2752: *
2753: *                    Multiply right bidiagonalizing vectors in U by Q
2754: *                    in VT
2755: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2756: *
2757:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
2758:      $                            WORK( ITAUP ), VT, LDVT,
2759:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2760: *
2761: *                    Generate left bidiagonalizing vectors in U
2762: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
2763: *
2764:                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2765:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2766:                      IWORK = IE + M
2767: *
2768: *                    Perform bidiagonal QR iteration, computing left
2769: *                    singular vectors of A in U and computing right
2770: *                    singular vectors of A in VT
2771: *                    (Workspace: need BDSPAC)
2772: *
2773:                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2774:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2775:      $                            INFO )
2776: *
2777:                   END IF
2778: *
2779:                END IF
2780: *
2781:             ELSE IF( WNTVA ) THEN
2782: *
2783:                IF( WNTUN ) THEN
2784: *
2785: *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2786: *                 N right singular vectors to be computed in VT and
2787: *                 no left singular vectors to be computed
2788: *
2789:                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2790: *
2791: *                    Sufficient workspace for a fast algorithm
2792: *
2793:                      IR = 1
2794:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2795: *
2796: *                       WORK(IR) is LDA by M
2797: *
2798:                         LDWRKR = LDA
2799:                      ELSE
2800: *
2801: *                       WORK(IR) is M by M
2802: *
2803:                         LDWRKR = M
2804:                      END IF
2805:                      ITAU = IR + LDWRKR*M
2806:                      IWORK = ITAU + M
2807: *
2808: *                    Compute A=L*Q, copying result to VT
2809: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2810: *
2811:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2812:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2813:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2814: *
2815: *                    Copy L to WORK(IR), zeroing out above it
2816: *
2817:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
2818:      $                            LDWRKR )
2819:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2820:      $                            WORK( IR+LDWRKR ), LDWRKR )
2821: *
2822: *                    Generate Q in VT
2823: *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2824: *
2825:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2826:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2827:                      IE = ITAU
2828:                      ITAUQ = IE + M
2829:                      ITAUP = ITAUQ + M
2830:                      IWORK = ITAUP + M
2831: *
2832: *                    Bidiagonalize L in WORK(IR)
2833: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2834: *
2835:                      CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
2836:      $                            WORK( IE ), WORK( ITAUQ ),
2837:      $                            WORK( ITAUP ), WORK( IWORK ),
2838:      $                            LWORK-IWORK+1, IERR )
2839: *
2840: *                    Generate right bidiagonalizing vectors in WORK(IR)
2841: *                    (Workspace: need M*M+4*M-1,
2842: *                                prefer M*M+3*M+(M-1)*NB)
2843: *
2844:                      CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2845:      $                            WORK( ITAUP ), WORK( IWORK ),
2846:      $                            LWORK-IWORK+1, IERR )
2847:                      IWORK = IE + M
2848: *
2849: *                    Perform bidiagonal QR iteration, computing right
2850: *                    singular vectors of L in WORK(IR)
2851: *                    (Workspace: need M*M+BDSPAC)
2852: *
2853:                      CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
2854:      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2855:      $                            WORK( IWORK ), INFO )
2856: *
2857: *                    Multiply right singular vectors of L in WORK(IR) by
2858: *                    Q in VT, storing result in A
2859: *                    (Workspace: need M*M)
2860: *
2861:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
2862:      $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
2863: *
2864: *                    Copy right singular vectors of A from A to VT
2865: *
2866:                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
2867: *
2868:                   ELSE
2869: *
2870: *                    Insufficient workspace for a fast algorithm
2871: *
2872:                      ITAU = 1
2873:                      IWORK = ITAU + M
2874: *
2875: *                    Compute A=L*Q, copying result to VT
2876: *                    (Workspace: need 2*M, prefer M+M*NB)
2877: *
2878:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2879:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2880:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2881: *
2882: *                    Generate Q in VT
2883: *                    (Workspace: need M+N, prefer M+N*NB)
2884: *
2885:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2886:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2887:                      IE = ITAU
2888:                      ITAUQ = IE + M
2889:                      ITAUP = ITAUQ + M
2890:                      IWORK = ITAUP + M
2891: *
2892: *                    Zero out above L in A
2893: *
2894:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
2895:      $                            LDA )
2896: *
2897: *                    Bidiagonalize L in A
2898: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2899: *
2900:                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
2901:      $                            WORK( ITAUQ ), WORK( ITAUP ),
2902:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2903: *
2904: *                    Multiply right bidiagonalizing vectors in A by Q
2905: *                    in VT
2906: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2907: *
2908:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
2909:      $                            WORK( ITAUP ), VT, LDVT,
2910:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2911:                      IWORK = IE + M
2912: *
2913: *                    Perform bidiagonal QR iteration, computing right
2914: *                    singular vectors of A in VT
2915: *                    (Workspace: need BDSPAC)
2916: *
2917:                      CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
2918:      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2919:      $                            INFO )
2920: *
2921:                   END IF
2922: *
2923:                ELSE IF( WNTUO ) THEN
2924: *
2925: *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
2926: *                 N right singular vectors to be computed in VT and
2927: *                 M left singular vectors to be overwritten on A
2928: *
2929:                   IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2930: *
2931: *                    Sufficient workspace for a fast algorithm
2932: *
2933:                      IU = 1
2934:                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2935: *
2936: *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2937: *
2938:                         LDWRKU = LDA
2939:                         IR = IU + LDWRKU*M
2940:                         LDWRKR = LDA
2941:                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2942: *
2943: *                       WORK(IU) is LDA by M and WORK(IR) is M by M
2944: *
2945:                         LDWRKU = LDA
2946:                         IR = IU + LDWRKU*M
2947:                         LDWRKR = M
2948:                      ELSE
2949: *
2950: *                       WORK(IU) is M by M and WORK(IR) is M by M
2951: *
2952:                         LDWRKU = M
2953:                         IR = IU + LDWRKU*M
2954:                         LDWRKR = M
2955:                      END IF
2956:                      ITAU = IR + LDWRKR*M
2957:                      IWORK = ITAU + M
2958: *
2959: *                    Compute A=L*Q, copying result to VT
2960: *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2961: *
2962:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
2963:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2964:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
2965: *
2966: *                    Generate Q in VT
2967: *                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
2968: *
2969:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2970:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2971: *
2972: *                    Copy L to WORK(IU), zeroing out above it
2973: *
2974:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
2975:      $                            LDWRKU )
2976:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
2977:      $                            WORK( IU+LDWRKU ), LDWRKU )
2978:                      IE = ITAU
2979:                      ITAUQ = IE + M
2980:                      ITAUP = ITAUQ + M
2981:                      IWORK = ITAUP + M
2982: *
2983: *                    Bidiagonalize L in WORK(IU), copying result to
2984: *                    WORK(IR)
2985: *                    (Workspace: need 2*M*M+4*M,
2986: *                                prefer 2*M*M+3*M+2*M*NB)
2987: *
2988:                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
2989:      $                            WORK( IE ), WORK( ITAUQ ),
2990:      $                            WORK( ITAUP ), WORK( IWORK ),
2991:      $                            LWORK-IWORK+1, IERR )
2992:                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2993:      $                            WORK( IR ), LDWRKR )
2994: *
2995: *                    Generate right bidiagonalizing vectors in WORK(IU)
2996: *                    (Workspace: need 2*M*M+4*M-1,
2997: *                                prefer 2*M*M+3*M+(M-1)*NB)
2998: *
2999:                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3000:      $                            WORK( ITAUP ), WORK( IWORK ),
3001:      $                            LWORK-IWORK+1, IERR )
3002: *
3003: *                    Generate left bidiagonalizing vectors in WORK(IR)
3004: *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3005: *
3006:                      CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3007:      $                            WORK( ITAUQ ), WORK( IWORK ),
3008:      $                            LWORK-IWORK+1, IERR )
3009:                      IWORK = IE + M
3010: *
3011: *                    Perform bidiagonal QR iteration, computing left
3012: *                    singular vectors of L in WORK(IR) and computing
3013: *                    right singular vectors of L in WORK(IU)
3014: *                    (Workspace: need 2*M*M+BDSPAC)
3015: *
3016:                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3017:      $                            WORK( IU ), LDWRKU, WORK( IR ),
3018:      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3019: *
3020: *                    Multiply right singular vectors of L in WORK(IU) by
3021: *                    Q in VT, storing result in A
3022: *                    (Workspace: need M*M)
3023: *
3024:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3025:      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3026: *
3027: *                    Copy right singular vectors of A from A to VT
3028: *
3029:                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
3030: *
3031: *                    Copy left singular vectors of A from WORK(IR) to A
3032: *
3033:                      CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3034:      $                            LDA )
3035: *
3036:                   ELSE
3037: *
3038: *                    Insufficient workspace for a fast algorithm
3039: *
3040:                      ITAU = 1
3041:                      IWORK = ITAU + M
3042: *
3043: *                    Compute A=L*Q, copying result to VT
3044: *                    (Workspace: need 2*M, prefer M+M*NB)
3045: *
3046:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
3047:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3048:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3049: *
3050: *                    Generate Q in VT
3051: *                    (Workspace: need M+N, prefer M+N*NB)
3052: *
3053:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3054:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3055:                      IE = ITAU
3056:                      ITAUQ = IE + M
3057:                      ITAUP = ITAUQ + M
3058:                      IWORK = ITAUP + M
3059: *
3060: *                    Zero out above L in A
3061: *
3062:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
3063:      $                            LDA )
3064: *
3065: *                    Bidiagonalize L in A
3066: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3067: *
3068:                      CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
3069:      $                            WORK( ITAUQ ), WORK( ITAUP ),
3070:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3071: *
3072: *                    Multiply right bidiagonalizing vectors in A by Q
3073: *                    in VT
3074: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3075: *
3076:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
3077:      $                            WORK( ITAUP ), VT, LDVT,
3078:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3079: *
3080: *                    Generate left bidiagonalizing vectors in A
3081: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
3082: *
3083:                      CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3084:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3085:                      IWORK = IE + M
3086: *
3087: *                    Perform bidiagonal QR iteration, computing left
3088: *                    singular vectors of A in A and computing right
3089: *                    singular vectors of A in VT
3090: *                    (Workspace: need BDSPAC)
3091: *
3092:                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3093:      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3094:      $                            INFO )
3095: *
3096:                   END IF
3097: *
3098:                ELSE IF( WNTUAS ) THEN
3099: *
3100: *                 Path 9t(N much larger than M, JOBU='S' or 'A',
3101: *                         JOBVT='A')
3102: *                 N right singular vectors to be computed in VT and
3103: *                 M left singular vectors to be computed in U
3104: *
3105:                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3106: *
3107: *                    Sufficient workspace for a fast algorithm
3108: *
3109:                      IU = 1
3110:                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
3111: *
3112: *                       WORK(IU) is LDA by M
3113: *
3114:                         LDWRKU = LDA
3115:                      ELSE
3116: *
3117: *                       WORK(IU) is M by M
3118: *
3119:                         LDWRKU = M
3120:                      END IF
3121:                      ITAU = IU + LDWRKU*M
3122:                      IWORK = ITAU + M
3123: *
3124: *                    Compute A=L*Q, copying result to VT
3125: *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3126: *
3127:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
3128:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3129:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3130: *
3131: *                    Generate Q in VT
3132: *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3133: *
3134:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3135:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3136: *
3137: *                    Copy L to WORK(IU), zeroing out above it
3138: *
3139:                      CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
3140:      $                            LDWRKU )
3141:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
3142:      $                            WORK( IU+LDWRKU ), LDWRKU )
3143:                      IE = ITAU
3144:                      ITAUQ = IE + M
3145:                      ITAUP = ITAUQ + M
3146:                      IWORK = ITAUP + M
3147: *
3148: *                    Bidiagonalize L in WORK(IU), copying result to U
3149: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3150: *
3151:                      CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
3152:      $                            WORK( IE ), WORK( ITAUQ ),
3153:      $                            WORK( ITAUP ), WORK( IWORK ),
3154:      $                            LWORK-IWORK+1, IERR )
3155:                      CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3156:      $                            LDU )
3157: *
3158: *                    Generate right bidiagonalizing vectors in WORK(IU)
3159: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3160: *
3161:                      CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3162:      $                            WORK( ITAUP ), WORK( IWORK ),
3163:      $                            LWORK-IWORK+1, IERR )
3164: *
3165: *                    Generate left bidiagonalizing vectors in U
3166: *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3167: *
3168:                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3169:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3170:                      IWORK = IE + M
3171: *
3172: *                    Perform bidiagonal QR iteration, computing left
3173: *                    singular vectors of L in U and computing right
3174: *                    singular vectors of L in WORK(IU)
3175: *                    (Workspace: need M*M+BDSPAC)
3176: *
3177:                      CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3178:      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3179:      $                            WORK( IWORK ), INFO )
3180: *
3181: *                    Multiply right singular vectors of L in WORK(IU) by
3182: *                    Q in VT, storing result in A
3183: *                    (Workspace: need M*M)
3184: *
3185:                      CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
3186:      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3187: *
3188: *                    Copy right singular vectors of A from A to VT
3189: *
3190:                      CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
3191: *
3192:                   ELSE
3193: *
3194: *                    Insufficient workspace for a fast algorithm
3195: *
3196:                      ITAU = 1
3197:                      IWORK = ITAU + M
3198: *
3199: *                    Compute A=L*Q, copying result to VT
3200: *                    (Workspace: need 2*M, prefer M+M*NB)
3201: *
3202:                      CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
3203:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3204:                      CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3205: *
3206: *                    Generate Q in VT
3207: *                    (Workspace: need M+N, prefer M+N*NB)
3208: *
3209:                      CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3210:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3211: *
3212: *                    Copy L to U, zeroing out above it
3213: *
3214:                      CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
3215:                      CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
3216:      $                            LDU )
3217:                      IE = ITAU
3218:                      ITAUQ = IE + M
3219:                      ITAUP = ITAUQ + M
3220:                      IWORK = ITAUP + M
3221: *
3222: *                    Bidiagonalize L in U
3223: *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3224: *
3225:                      CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
3226:      $                            WORK( ITAUQ ), WORK( ITAUP ),
3227:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3228: *
3229: *                    Multiply right bidiagonalizing vectors in U by Q
3230: *                    in VT
3231: *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3232: *
3233:                      CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
3234:      $                            WORK( ITAUP ), VT, LDVT,
3235:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3236: *
3237: *                    Generate left bidiagonalizing vectors in U
3238: *                    (Workspace: need 4*M, prefer 3*M+M*NB)
3239: *
3240:                      CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3241:      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3242:                      IWORK = IE + M
3243: *
3244: *                    Perform bidiagonal QR iteration, computing left
3245: *                    singular vectors of A in U and computing right
3246: *                    singular vectors of A in VT
3247: *                    (Workspace: need BDSPAC)
3248: *
3249:                      CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3250:      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3251:      $                            INFO )
3252: *
3253:                   END IF
3254: *
3255:                END IF
3256: *
3257:             END IF
3258: *
3259:          ELSE
3260: *
3261: *           N .LT. MNTHR
3262: *
3263: *           Path 10t(N greater than M, but not much larger)
3264: *           Reduce to bidiagonal form without LQ decomposition
3265: *
3266:             IE = 1
3267:             ITAUQ = IE + M
3268:             ITAUP = ITAUQ + M
3269:             IWORK = ITAUP + M
3270: *
3271: *           Bidiagonalize A
3272: *           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3273: *
3274:             CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3275:      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3276:      $                   IERR )
3277:             IF( WNTUAS ) THEN
3278: *
3279: *              If left singular vectors desired in U, copy result to U
3280: *              and generate left bidiagonalizing vectors in U
3281: *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3282: *
3283:                CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
3284:                CALL SORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3285:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3286:             END IF
3287:             IF( WNTVAS ) THEN
3288: *
3289: *              If right singular vectors desired in VT, copy result to
3290: *              VT and generate right bidiagonalizing vectors in VT
3291: *              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3292: *
3293:                CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
3294:                IF( WNTVA )
3295:      $            NRVT = N
3296:                IF( WNTVS )
3297:      $            NRVT = M
3298:                CALL SORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3299:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3300:             END IF
3301:             IF( WNTUO ) THEN
3302: *
3303: *              If left singular vectors desired in A, generate left
3304: *              bidiagonalizing vectors in A
3305: *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3306: *
3307:                CALL SORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3308:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3309:             END IF
3310:             IF( WNTVO ) THEN
3311: *
3312: *              If right singular vectors desired in A, generate right
3313: *              bidiagonalizing vectors in A
3314: *              (Workspace: need 4*M, prefer 3*M+M*NB)
3315: *
3316:                CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3317:      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3318:             END IF
3319:             IWORK = IE + M
3320:             IF( WNTUAS .OR. WNTUO )
3321:      $         NRU = M
3322:             IF( WNTUN )
3323:      $         NRU = 0
3324:             IF( WNTVAS .OR. WNTVO )
3325:      $         NCVT = N
3326:             IF( WNTVN )
3327:      $         NCVT = 0
3328:             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3329: *
3330: *              Perform bidiagonal QR iteration, if desired, computing
3331: *              left singular vectors in U and computing right singular
3332: *              vectors in VT
3333: *              (Workspace: need BDSPAC)
3334: *
3335:                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3336:      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3337:             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3338: *
3339: *              Perform bidiagonal QR iteration, if desired, computing
3340: *              left singular vectors in U and computing right singular
3341: *              vectors in A
3342: *              (Workspace: need BDSPAC)
3343: *
3344:                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3345:      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
3346:             ELSE
3347: *
3348: *              Perform bidiagonal QR iteration, if desired, computing
3349: *              left singular vectors in A and computing right singular
3350: *              vectors in VT
3351: *              (Workspace: need BDSPAC)
3352: *
3353:                CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3354:      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3355:             END IF
3356: *
3357:          END IF
3358: *
3359:       END IF
3360: *
3361: *     If SBDSQR failed to converge, copy unconverged superdiagonals
3362: *     to WORK( 2:MINMN )
3363: *
3364:       IF( INFO.NE.0 ) THEN
3365:          IF( IE.GT.2 ) THEN
3366:             DO 50 I = 1, MINMN - 1
3367:                WORK( I+1 ) = WORK( I+IE-1 )
3368:    50       CONTINUE
3369:          END IF
3370:          IF( IE.LT.2 ) THEN
3371:             DO 60 I = MINMN - 1, 1, -1
3372:                WORK( I+1 ) = WORK( I+IE-1 )
3373:    60       CONTINUE
3374:          END IF
3375:       END IF
3376: *
3377: *     Undo scaling if necessary
3378: *
3379:       IF( ISCL.EQ.1 ) THEN
3380:          IF( ANRM.GT.BIGNUM )
3381:      $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3382:      $                   IERR )
3383:          IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3384:      $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3385:      $                   MINMN, IERR )
3386:          IF( ANRM.LT.SMLNUM )
3387:      $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3388:      $                   IERR )
3389:          IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3390:      $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
3391:      $                   MINMN, IERR )
3392:       END IF
3393: *
3394: *     Return optimal workspace in WORK(1)
3395: *
3396:       WORK( 1 ) = MAXWRK
3397: *
3398:       RETURN
3399: *
3400: *     End of SGESVD
3401: *
3402:       END
3403: