LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ sgesvd()

subroutine sgesvd ( character jobu,
character jobvt,
integer m,
integer n,
real, dimension( lda, * ) a,
integer lda,
real, dimension( * ) s,
real, dimension( ldu, * ) u,
integer ldu,
real, dimension( ldvt, * ) vt,
integer ldvt,
real, dimension( * ) work,
integer lwork,
integer info )

SGESVD computes the singular value decomposition (SVD) for GE matrices

Download SGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> SGESVD computes the singular value decomposition (SVD) of a real
!> M-by-N matrix A, optionally computing the left and/or right singular
!> vectors. The SVD is written
!>
!>      A = U * SIGMA * transpose(V)
!>
!> where SIGMA is an M-by-N matrix which is zero except for its
!> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
!> V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
!> are the singular values of A; they are real and non-negative, and
!> are returned in descending order.  The first min(m,n) columns of
!> U and V are the left and right singular vectors of A.
!>
!> Note that the routine returns V**T, not V.
!> 
Parameters
[in]JOBU
!>          JOBU is CHARACTER*1
!>          Specifies options for computing all or part of the matrix U:
!>          = 'A':  all M columns of U are returned in array U:
!>          = 'S':  the first min(m,n) columns of U (the left singular
!>                  vectors) are returned in the array U;
!>          = 'O':  the first min(m,n) columns of U (the left singular
!>                  vectors) are overwritten on the array A;
!>          = 'N':  no columns of U (no left singular vectors) are
!>                  computed.
!> 
[in]JOBVT
!>          JOBVT is CHARACTER*1
!>          Specifies options for computing all or part of the matrix
!>          V**T:
!>          = 'A':  all N rows of V**T are returned in the array VT;
!>          = 'S':  the first min(m,n) rows of V**T (the right singular
!>                  vectors) are returned in the array VT;
!>          = 'O':  the first min(m,n) rows of V**T (the right singular
!>                  vectors) are overwritten on the array A;
!>          = 'N':  no rows of V**T (no right singular vectors) are
!>                  computed.
!>
!>          JOBVT and JOBU cannot both be 'O'.
!> 
[in]M
!>          M is INTEGER
!>          The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>          The number of columns of the input matrix A.  N >= 0.
!> 
[in,out]A
!>          A is REAL array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!>          On exit,
!>          if JOBU = 'O',  A is overwritten with the first min(m,n)
!>                          columns of U (the left singular vectors,
!>                          stored columnwise);
!>          if JOBVT = 'O', A is overwritten with the first min(m,n)
!>                          rows of V**T (the right singular vectors,
!>                          stored rowwise);
!>          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
!>                          are destroyed.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]S
!>          S is REAL array, dimension (min(M,N))
!>          The singular values of A, sorted so that S(i) >= S(i+1).
!> 
[out]U
!>          U is REAL array, dimension (LDU,UCOL)
!>          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
!>          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
!>          if JOBU = 'S', U contains the first min(m,n) columns of U
!>          (the left singular vectors, stored columnwise);
!>          if JOBU = 'N' or 'O', U is not referenced.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U.  LDU >= 1; if
!>          JOBU = 'S' or 'A', LDU >= M.
!> 
[out]VT
!>          VT is REAL array, dimension (LDVT,N)
!>          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
!>          V**T;
!>          if JOBVT = 'S', VT contains the first min(m,n) rows of
!>          V**T (the right singular vectors, stored rowwise);
!>          if JOBVT = 'N' or 'O', VT is not referenced.
!> 
[in]LDVT
!>          LDVT is INTEGER
!>          The leading dimension of the array VT.  LDVT >= 1; if
!>          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
!> 
[out]WORK
!>          WORK is REAL array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
!>          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
!>          superdiagonal elements of an upper bidiagonal matrix B
!>          whose diagonal is in S (not necessarily sorted). B
!>          satisfies A = U * B * VT, so it has the same singular values
!>          as A, and singular vectors related by U and VT.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK.
!>          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
!>             - PATH 1  (M much larger than N, JOBU='N')
!>             - PATH 1t (N much larger than M, JOBVT='N')
!>          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
!>          For good performance, LWORK should generally be larger.
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit.
!>          < 0:  if INFO = -i, the i-th argument had an illegal value.
!>          > 0:  if SBDSQR did not converge, INFO specifies how many
!>                superdiagonals of an intermediate bidiagonal form B
!>                did not converge to zero. See the description of WORK
!>                above for details.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 207 of file sgesvd.f.

210*
211* -- LAPACK driver routine --
212* -- LAPACK is a software package provided by Univ. of Tennessee, --
213* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
214*
215* .. Scalar Arguments ..
216 CHARACTER JOBU, JOBVT
217 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
218* ..
219* .. Array Arguments ..
220 REAL A( LDA, * ), S( * ), U( LDU, * ),
221 $ VT( LDVT, * ), WORK( * )
222* ..
223*
224* =====================================================================
225*
226* .. Parameters ..
227 REAL ZERO, ONE
228 parameter( zero = 0.0e0, one = 1.0e0 )
229* ..
230* .. Local Scalars ..
231 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
232 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
233 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
234 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
235 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
236 $ NRVT, WRKBL
237 INTEGER LWORK_SGEQRF, LWORK_SORGQR_N, LWORK_SORGQR_M,
238 $ LWORK_SGEBRD, LWORK_SORGBR_P, LWORK_SORGBR_Q,
239 $ LWORK_SGELQF, LWORK_SORGLQ_N, LWORK_SORGLQ_M
240 REAL ANRM, BIGNUM, EPS, SMLNUM
241* ..
242* .. Local Arrays ..
243 REAL DUM( 1 )
244* ..
245* .. External Subroutines ..
246 EXTERNAL sbdsqr, sgebrd, sgelqf, sgemm, sgeqrf,
247 $ slacpy,
249 $ xerbla
250* ..
251* .. External Functions ..
252 LOGICAL LSAME
253 INTEGER ILAENV
254 REAL SLAMCH, SLANGE, SROUNDUP_LWORK
255 EXTERNAL lsame, ilaenv, slamch, slange,
257* ..
258* .. Intrinsic Functions ..
259 INTRINSIC max, min, sqrt
260* ..
261* .. Executable Statements ..
262*
263* Test the input arguments
264*
265 info = 0
266 minmn = min( m, n )
267 wntua = lsame( jobu, 'A' )
268 wntus = lsame( jobu, 'S' )
269 wntuas = wntua .OR. wntus
270 wntuo = lsame( jobu, 'O' )
271 wntun = lsame( jobu, 'N' )
272 wntva = lsame( jobvt, 'A' )
273 wntvs = lsame( jobvt, 'S' )
274 wntvas = wntva .OR. wntvs
275 wntvo = lsame( jobvt, 'O' )
276 wntvn = lsame( jobvt, 'N' )
277 lquery = ( lwork.EQ.-1 )
278*
279 IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
280 info = -1
281 ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
282 $ ( wntvo .AND. wntuo ) ) THEN
283 info = -2
284 ELSE IF( m.LT.0 ) THEN
285 info = -3
286 ELSE IF( n.LT.0 ) THEN
287 info = -4
288 ELSE IF( lda.LT.max( 1, m ) ) THEN
289 info = -6
290 ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
291 info = -9
292 ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
293 $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
294 info = -11
295 END IF
296*
297* Compute workspace
298* (Note: Comments in the code beginning "Workspace:" describe the
299* minimal amount of workspace needed at that point in the code,
300* as well as the preferred amount for good performance.
301* NB refers to the optimal block size for the immediately
302* following subroutine, as returned by ILAENV.)
303*
304 IF( info.EQ.0 ) THEN
305 minwrk = 1
306 maxwrk = 1
307 IF( m.GE.n .AND. minmn.GT.0 ) THEN
308*
309* Compute space needed for SBDSQR
310*
311 mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
312 bdspac = 5*n
313* Compute space needed for SGEQRF
314 CALL sgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
315 lwork_sgeqrf = int( dum(1) )
316* Compute space needed for SORGQR
317 CALL sorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
318 lwork_sorgqr_n = int( dum(1) )
319 CALL sorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
320 lwork_sorgqr_m = int( dum(1) )
321* Compute space needed for SGEBRD
322 CALL sgebrd( n, n, a, lda, s, dum(1), dum(1),
323 $ dum(1), dum(1), -1, ierr )
324 lwork_sgebrd = int( dum(1) )
325* Compute space needed for SORGBR P
326 CALL sorgbr( 'P', n, n, n, a, lda, dum(1),
327 $ dum(1), -1, ierr )
328 lwork_sorgbr_p = int( dum(1) )
329* Compute space needed for SORGBR Q
330 CALL sorgbr( 'Q', n, n, n, a, lda, dum(1),
331 $ dum(1), -1, ierr )
332 lwork_sorgbr_q = int( dum(1) )
333*
334 IF( m.GE.mnthr ) THEN
335 IF( wntun ) THEN
336*
337* Path 1 (M much larger than N, JOBU='N')
338*
339 maxwrk = n + lwork_sgeqrf
340 maxwrk = max( maxwrk, 3*n+lwork_sgebrd )
341 IF( wntvo .OR. wntvas )
342 $ maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
343 maxwrk = max( maxwrk, bdspac )
344 minwrk = max( 4*n, bdspac )
345 ELSE IF( wntuo .AND. wntvn ) THEN
346*
347* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
348*
349 wrkbl = n + lwork_sgeqrf
350 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
351 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
352 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
353 wrkbl = max( wrkbl, bdspac )
354 maxwrk = max( n*n+wrkbl, n*n+m*n+n )
355 minwrk = max( 3*n+m, bdspac )
356 ELSE IF( wntuo .AND. wntvas ) THEN
357*
358* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
359* 'A')
360*
361 wrkbl = n + lwork_sgeqrf
362 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
363 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
364 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
365 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
366 wrkbl = max( wrkbl, bdspac )
367 maxwrk = max( n*n+wrkbl, n*n+m*n+n )
368 minwrk = max( 3*n+m, bdspac )
369 ELSE IF( wntus .AND. wntvn ) THEN
370*
371* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
372*
373 wrkbl = n + lwork_sgeqrf
374 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
375 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
376 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
377 wrkbl = max( wrkbl, bdspac )
378 maxwrk = n*n + wrkbl
379 minwrk = max( 3*n+m, bdspac )
380 ELSE IF( wntus .AND. wntvo ) THEN
381*
382* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
383*
384 wrkbl = n + lwork_sgeqrf
385 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
386 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
387 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
388 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
389 wrkbl = max( wrkbl, bdspac )
390 maxwrk = 2*n*n + wrkbl
391 minwrk = max( 3*n+m, bdspac )
392 ELSE IF( wntus .AND. wntvas ) THEN
393*
394* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
395* 'A')
396*
397 wrkbl = n + lwork_sgeqrf
398 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
399 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
400 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
401 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
402 wrkbl = max( wrkbl, bdspac )
403 maxwrk = n*n + wrkbl
404 minwrk = max( 3*n+m, bdspac )
405 ELSE IF( wntua .AND. wntvn ) THEN
406*
407* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
408*
409 wrkbl = n + lwork_sgeqrf
410 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
411 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
412 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
413 wrkbl = max( wrkbl, bdspac )
414 maxwrk = n*n + wrkbl
415 minwrk = max( 3*n+m, bdspac )
416 ELSE IF( wntua .AND. wntvo ) THEN
417*
418* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
419*
420 wrkbl = n + lwork_sgeqrf
421 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
422 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
423 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
424 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
425 wrkbl = max( wrkbl, bdspac )
426 maxwrk = 2*n*n + wrkbl
427 minwrk = max( 3*n+m, bdspac )
428 ELSE IF( wntua .AND. wntvas ) THEN
429*
430* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
431* 'A')
432*
433 wrkbl = n + lwork_sgeqrf
434 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
435 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
436 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
437 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
438 wrkbl = max( wrkbl, bdspac )
439 maxwrk = n*n + wrkbl
440 minwrk = max( 3*n+m, bdspac )
441 END IF
442 ELSE
443*
444* Path 10 (M at least N, but not much larger)
445*
446 CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
447 $ dum(1), dum(1), -1, ierr )
448 lwork_sgebrd = int( dum(1) )
449 maxwrk = 3*n + lwork_sgebrd
450 IF( wntus .OR. wntuo ) THEN
451 CALL sorgbr( 'Q', m, n, n, a, lda, dum(1),
452 $ dum(1), -1, ierr )
453 lwork_sorgbr_q = int( dum(1) )
454 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
455 END IF
456 IF( wntua ) THEN
457 CALL sorgbr( 'Q', m, m, n, a, lda, dum(1),
458 $ dum(1), -1, ierr )
459 lwork_sorgbr_q = int( dum(1) )
460 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
461 END IF
462 IF( .NOT.wntvn ) THEN
463 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
464 END IF
465 maxwrk = max( maxwrk, bdspac )
466 minwrk = max( 3*n+m, bdspac )
467 END IF
468 ELSE IF( minmn.GT.0 ) THEN
469*
470* Compute space needed for SBDSQR
471*
472 mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
473 bdspac = 5*m
474* Compute space needed for SGELQF
475 CALL sgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
476 lwork_sgelqf = int( dum(1) )
477* Compute space needed for SORGLQ
478 CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1,
479 $ ierr )
480 lwork_sorglq_n = int( dum(1) )
481 CALL sorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
482 lwork_sorglq_m = int( dum(1) )
483* Compute space needed for SGEBRD
484 CALL sgebrd( m, m, a, lda, s, dum(1), dum(1),
485 $ dum(1), dum(1), -1, ierr )
486 lwork_sgebrd = int( dum(1) )
487* Compute space needed for SORGBR P
488 CALL sorgbr( 'P', m, m, m, a, n, dum(1),
489 $ dum(1), -1, ierr )
490 lwork_sorgbr_p = int( dum(1) )
491* Compute space needed for SORGBR Q
492 CALL sorgbr( 'Q', m, m, m, a, n, dum(1),
493 $ dum(1), -1, ierr )
494 lwork_sorgbr_q = int( dum(1) )
495 IF( n.GE.mnthr ) THEN
496 IF( wntvn ) THEN
497*
498* Path 1t(N much larger than M, JOBVT='N')
499*
500 maxwrk = m + lwork_sgelqf
501 maxwrk = max( maxwrk, 3*m+lwork_sgebrd )
502 IF( wntuo .OR. wntuas )
503 $ maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
504 maxwrk = max( maxwrk, bdspac )
505 minwrk = max( 4*m, bdspac )
506 ELSE IF( wntvo .AND. wntun ) THEN
507*
508* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
509*
510 wrkbl = m + lwork_sgelqf
511 wrkbl = max( wrkbl, m+lwork_sorglq_m )
512 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
513 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
514 wrkbl = max( wrkbl, bdspac )
515 maxwrk = max( m*m+wrkbl, m*m+m*n+m )
516 minwrk = max( 3*m+n, bdspac )
517 ELSE IF( wntvo .AND. wntuas ) THEN
518*
519* Path 3t(N much larger than M, JOBU='S' or 'A',
520* JOBVT='O')
521*
522 wrkbl = m + lwork_sgelqf
523 wrkbl = max( wrkbl, m+lwork_sorglq_m )
524 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
525 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
526 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
527 wrkbl = max( wrkbl, bdspac )
528 maxwrk = max( m*m+wrkbl, m*m+m*n+m )
529 minwrk = max( 3*m+n, bdspac )
530 ELSE IF( wntvs .AND. wntun ) THEN
531*
532* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
533*
534 wrkbl = m + lwork_sgelqf
535 wrkbl = max( wrkbl, m+lwork_sorglq_m )
536 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
537 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
538 wrkbl = max( wrkbl, bdspac )
539 maxwrk = m*m + wrkbl
540 minwrk = max( 3*m+n, bdspac )
541 ELSE IF( wntvs .AND. wntuo ) THEN
542*
543* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
544*
545 wrkbl = m + lwork_sgelqf
546 wrkbl = max( wrkbl, m+lwork_sorglq_m )
547 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
548 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
549 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
550 wrkbl = max( wrkbl, bdspac )
551 maxwrk = 2*m*m + wrkbl
552 minwrk = max( 3*m+n, bdspac )
553 maxwrk = max( maxwrk, minwrk )
554 ELSE IF( wntvs .AND. wntuas ) THEN
555*
556* Path 6t(N much larger than M, JOBU='S' or 'A',
557* JOBVT='S')
558*
559 wrkbl = m + lwork_sgelqf
560 wrkbl = max( wrkbl, m+lwork_sorglq_m )
561 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
562 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
563 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
564 wrkbl = max( wrkbl, bdspac )
565 maxwrk = m*m + wrkbl
566 minwrk = max( 3*m+n, bdspac )
567 ELSE IF( wntva .AND. wntun ) THEN
568*
569* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
570*
571 wrkbl = m + lwork_sgelqf
572 wrkbl = max( wrkbl, m+lwork_sorglq_n )
573 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
574 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
575 wrkbl = max( wrkbl, bdspac )
576 maxwrk = m*m + wrkbl
577 minwrk = max( 3*m+n, bdspac )
578 ELSE IF( wntva .AND. wntuo ) THEN
579*
580* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
581*
582 wrkbl = m + lwork_sgelqf
583 wrkbl = max( wrkbl, m+lwork_sorglq_n )
584 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
585 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
586 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
587 wrkbl = max( wrkbl, bdspac )
588 maxwrk = 2*m*m + wrkbl
589 minwrk = max( 3*m+n, bdspac )
590 ELSE IF( wntva .AND. wntuas ) THEN
591*
592* Path 9t(N much larger than M, JOBU='S' or 'A',
593* JOBVT='A')
594*
595 wrkbl = m + lwork_sgelqf
596 wrkbl = max( wrkbl, m+lwork_sorglq_n )
597 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
598 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
599 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
600 wrkbl = max( wrkbl, bdspac )
601 maxwrk = m*m + wrkbl
602 minwrk = max( 3*m+n, bdspac )
603 END IF
604 ELSE
605*
606* Path 10t(N greater than M, but not much larger)
607*
608 CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
609 $ dum(1), dum(1), -1, ierr )
610 lwork_sgebrd = int( dum(1) )
611 maxwrk = 3*m + lwork_sgebrd
612 IF( wntvs .OR. wntvo ) THEN
613* Compute space needed for SORGBR P
614 CALL sorgbr( 'P', m, n, m, a, n, dum(1),
615 $ dum(1), -1, ierr )
616 lwork_sorgbr_p = int( dum(1) )
617 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
618 END IF
619 IF( wntva ) THEN
620 CALL sorgbr( 'P', n, n, m, a, n, dum(1),
621 $ dum(1), -1, ierr )
622 lwork_sorgbr_p = int( dum(1) )
623 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
624 END IF
625 IF( .NOT.wntun ) THEN
626 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
627 END IF
628 maxwrk = max( maxwrk, bdspac )
629 minwrk = max( 3*m+n, bdspac )
630 END IF
631 END IF
632 maxwrk = max( maxwrk, minwrk )
633 work( 1 ) = sroundup_lwork(maxwrk)
634*
635 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
636 info = -13
637 END IF
638 END IF
639*
640 IF( info.NE.0 ) THEN
641 CALL xerbla( 'SGESVD', -info )
642 RETURN
643 ELSE IF( lquery ) THEN
644 RETURN
645 END IF
646*
647* Quick return if possible
648*
649 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
650 RETURN
651 END IF
652*
653* Get machine constants
654*
655 eps = slamch( 'P' )
656 smlnum = sqrt( slamch( 'S' ) ) / eps
657 bignum = one / smlnum
658*
659* Scale A if max element outside range [SMLNUM,BIGNUM]
660*
661 anrm = slange( 'M', m, n, a, lda, dum )
662 iscl = 0
663 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
664 iscl = 1
665 CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
666 ELSE IF( anrm.GT.bignum ) THEN
667 iscl = 1
668 CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
669 END IF
670*
671 IF( m.GE.n ) THEN
672*
673* A has at least as many rows as columns. If A has sufficiently
674* more rows than columns, first reduce using the QR
675* decomposition (if sufficient workspace available)
676*
677 IF( m.GE.mnthr ) THEN
678*
679 IF( wntun ) THEN
680*
681* Path 1 (M much larger than N, JOBU='N')
682* No left singular vectors to be computed
683*
684 itau = 1
685 iwork = itau + n
686*
687* Compute A=Q*R
688* (Workspace: need 2*N, prefer N+N*NB)
689*
690 CALL sgeqrf( m, n, a, lda, work( itau ),
691 $ work( iwork ),
692 $ lwork-iwork+1, ierr )
693*
694* Zero out below R
695*
696 IF( n .GT. 1 ) THEN
697 CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
698 $ lda )
699 END IF
700 ie = 1
701 itauq = ie + n
702 itaup = itauq + n
703 iwork = itaup + n
704*
705* Bidiagonalize R in A
706* (Workspace: need 4*N, prefer 3*N+2*N*NB)
707*
708 CALL sgebrd( n, n, a, lda, s, work( ie ),
709 $ work( itauq ),
710 $ work( itaup ), work( iwork ), lwork-iwork+1,
711 $ ierr )
712 ncvt = 0
713 IF( wntvo .OR. wntvas ) THEN
714*
715* If right singular vectors desired, generate P'.
716* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
717*
718 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
719 $ work( iwork ), lwork-iwork+1, ierr )
720 ncvt = n
721 END IF
722 iwork = ie + n
723*
724* Perform bidiagonal QR iteration, computing right
725* singular vectors of A in A if desired
726* (Workspace: need BDSPAC)
727*
728 CALL sbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a,
729 $ lda,
730 $ dum, 1, dum, 1, work( iwork ), info )
731*
732* If right singular vectors desired in VT, copy them there
733*
734 IF( wntvas )
735 $ CALL slacpy( 'F', n, n, a, lda, vt, ldvt )
736*
737 ELSE IF( wntuo .AND. wntvn ) THEN
738*
739* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
740* N left singular vectors to be overwritten on A and
741* no right singular vectors to be computed
742*
743 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
744*
745* Sufficient workspace for a fast algorithm
746*
747 ir = 1
748 IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
749*
750* WORK(IU) is LDA by N, WORK(IR) is LDA by N
751*
752 ldwrku = lda
753 ldwrkr = lda
754 ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
755*
756* WORK(IU) is LDA by N, WORK(IR) is N by N
757*
758 ldwrku = lda
759 ldwrkr = n
760 ELSE
761*
762* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
763*
764 ldwrku = ( lwork-n*n-n ) / n
765 ldwrkr = n
766 END IF
767 itau = ir + ldwrkr*n
768 iwork = itau + n
769*
770* Compute A=Q*R
771* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
772*
773 CALL sgeqrf( m, n, a, lda, work( itau ),
774 $ work( iwork ), lwork-iwork+1, ierr )
775*
776* Copy R to WORK(IR) and zero out below it
777*
778 CALL slacpy( 'U', n, n, a, lda, work( ir ),
779 $ ldwrkr )
780 CALL slaset( 'L', n-1, n-1, zero, zero,
781 $ work( ir+1 ),
782 $ ldwrkr )
783*
784* Generate Q in A
785* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
786*
787 CALL sorgqr( m, n, n, a, lda, work( itau ),
788 $ work( iwork ), lwork-iwork+1, ierr )
789 ie = itau
790 itauq = ie + n
791 itaup = itauq + n
792 iwork = itaup + n
793*
794* Bidiagonalize R in WORK(IR)
795* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
796*
797 CALL sgebrd( n, n, work( ir ), ldwrkr, s,
798 $ work( ie ),
799 $ work( itauq ), work( itaup ),
800 $ work( iwork ), lwork-iwork+1, ierr )
801*
802* Generate left vectors bidiagonalizing R
803* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
804*
805 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
806 $ work( itauq ), work( iwork ),
807 $ lwork-iwork+1, ierr )
808 iwork = ie + n
809*
810* Perform bidiagonal QR iteration, computing left
811* singular vectors of R in WORK(IR)
812* (Workspace: need N*N+BDSPAC)
813*
814 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
815 $ 1,
816 $ work( ir ), ldwrkr, dum, 1,
817 $ work( iwork ), info )
818 iu = ie + n
819*
820* Multiply Q in A by left singular vectors of R in
821* WORK(IR), storing result in WORK(IU) and copying to A
822* (Workspace: need N*N+2*N, prefer N*N+M*N+N)
823*
824 DO 10 i = 1, m, ldwrku
825 chunk = min( m-i+1, ldwrku )
826 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i,
827 $ 1 ),
828 $ lda, work( ir ), ldwrkr, zero,
829 $ work( iu ), ldwrku )
830 CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
831 $ a( i, 1 ), lda )
832 10 CONTINUE
833*
834 ELSE
835*
836* Insufficient workspace for a fast algorithm
837*
838 ie = 1
839 itauq = ie + n
840 itaup = itauq + n
841 iwork = itaup + n
842*
843* Bidiagonalize A
844* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
845*
846 CALL sgebrd( m, n, a, lda, s, work( ie ),
847 $ work( itauq ), work( itaup ),
848 $ work( iwork ), lwork-iwork+1, ierr )
849*
850* Generate left vectors bidiagonalizing A
851* (Workspace: need 4*N, prefer 3*N+N*NB)
852*
853 CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
854 $ work( iwork ), lwork-iwork+1, ierr )
855 iwork = ie + n
856*
857* Perform bidiagonal QR iteration, computing left
858* singular vectors of A in A
859* (Workspace: need BDSPAC)
860*
861 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
862 $ 1,
863 $ a, lda, dum, 1, work( iwork ), info )
864*
865 END IF
866*
867 ELSE IF( wntuo .AND. wntvas ) THEN
868*
869* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
870* N left singular vectors to be overwritten on A and
871* N right singular vectors to be computed in VT
872*
873 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
874*
875* Sufficient workspace for a fast algorithm
876*
877 ir = 1
878 IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
879*
880* WORK(IU) is LDA by N and WORK(IR) is LDA by N
881*
882 ldwrku = lda
883 ldwrkr = lda
884 ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
885*
886* WORK(IU) is LDA by N and WORK(IR) is N by N
887*
888 ldwrku = lda
889 ldwrkr = n
890 ELSE
891*
892* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
893*
894 ldwrku = ( lwork-n*n-n ) / n
895 ldwrkr = n
896 END IF
897 itau = ir + ldwrkr*n
898 iwork = itau + n
899*
900* Compute A=Q*R
901* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
902*
903 CALL sgeqrf( m, n, a, lda, work( itau ),
904 $ work( iwork ), lwork-iwork+1, ierr )
905*
906* Copy R to VT, zeroing out below it
907*
908 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
909 IF( n.GT.1 )
910 $ CALL slaset( 'L', n-1, n-1, zero, zero,
911 $ vt( 2, 1 ), ldvt )
912*
913* Generate Q in A
914* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
915*
916 CALL sorgqr( m, n, n, a, lda, work( itau ),
917 $ work( iwork ), lwork-iwork+1, ierr )
918 ie = itau
919 itauq = ie + n
920 itaup = itauq + n
921 iwork = itaup + n
922*
923* Bidiagonalize R in VT, copying result to WORK(IR)
924* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
925*
926 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
927 $ work( itauq ), work( itaup ),
928 $ work( iwork ), lwork-iwork+1, ierr )
929 CALL slacpy( 'L', n, n, vt, ldvt, work( ir ),
930 $ ldwrkr )
931*
932* Generate left vectors bidiagonalizing R in WORK(IR)
933* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
934*
935 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
936 $ work( itauq ), work( iwork ),
937 $ lwork-iwork+1, ierr )
938*
939* Generate right vectors bidiagonalizing R in VT
940* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
941*
942 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
943 $ work( iwork ), lwork-iwork+1, ierr )
944 iwork = ie + n
945*
946* Perform bidiagonal QR iteration, computing left
947* singular vectors of R in WORK(IR) and computing right
948* singular vectors of R in VT
949* (Workspace: need N*N+BDSPAC)
950*
951 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
952 $ ldvt,
953 $ work( ir ), ldwrkr, dum, 1,
954 $ work( iwork ), info )
955 iu = ie + n
956*
957* Multiply Q in A by left singular vectors of R in
958* WORK(IR), storing result in WORK(IU) and copying to A
959* (Workspace: need N*N+2*N, prefer N*N+M*N+N)
960*
961 DO 20 i = 1, m, ldwrku
962 chunk = min( m-i+1, ldwrku )
963 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i,
964 $ 1 ),
965 $ lda, work( ir ), ldwrkr, zero,
966 $ work( iu ), ldwrku )
967 CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
968 $ a( i, 1 ), lda )
969 20 CONTINUE
970*
971 ELSE
972*
973* Insufficient workspace for a fast algorithm
974*
975 itau = 1
976 iwork = itau + n
977*
978* Compute A=Q*R
979* (Workspace: need 2*N, prefer N+N*NB)
980*
981 CALL sgeqrf( m, n, a, lda, work( itau ),
982 $ work( iwork ), lwork-iwork+1, ierr )
983*
984* Copy R to VT, zeroing out below it
985*
986 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
987 IF( n.GT.1 )
988 $ CALL slaset( 'L', n-1, n-1, zero, zero,
989 $ vt( 2, 1 ), ldvt )
990*
991* Generate Q in A
992* (Workspace: need 2*N, prefer N+N*NB)
993*
994 CALL sorgqr( m, n, n, a, lda, work( itau ),
995 $ work( iwork ), lwork-iwork+1, ierr )
996 ie = itau
997 itauq = ie + n
998 itaup = itauq + n
999 iwork = itaup + n
1000*
1001* Bidiagonalize R in VT
1002* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1003*
1004 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1005 $ work( itauq ), work( itaup ),
1006 $ work( iwork ), lwork-iwork+1, ierr )
1007*
1008* Multiply Q in A by left vectors bidiagonalizing R
1009* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1010*
1011 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1012 $ work( itauq ), a, lda, work( iwork ),
1013 $ lwork-iwork+1, ierr )
1014*
1015* Generate right vectors bidiagonalizing R in VT
1016* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1017*
1018 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1019 $ work( iwork ), lwork-iwork+1, ierr )
1020 iwork = ie + n
1021*
1022* Perform bidiagonal QR iteration, computing left
1023* singular vectors of A in A and computing right
1024* singular vectors of A in VT
1025* (Workspace: need BDSPAC)
1026*
1027 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1028 $ ldvt,
1029 $ a, lda, dum, 1, work( iwork ), info )
1030*
1031 END IF
1032*
1033 ELSE IF( wntus ) THEN
1034*
1035 IF( wntvn ) THEN
1036*
1037* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1038* N left singular vectors to be computed in U and
1039* no right singular vectors to be computed
1040*
1041 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1042*
1043* Sufficient workspace for a fast algorithm
1044*
1045 ir = 1
1046 IF( lwork.GE.wrkbl+lda*n ) THEN
1047*
1048* WORK(IR) is LDA by N
1049*
1050 ldwrkr = lda
1051 ELSE
1052*
1053* WORK(IR) is N by N
1054*
1055 ldwrkr = n
1056 END IF
1057 itau = ir + ldwrkr*n
1058 iwork = itau + n
1059*
1060* Compute A=Q*R
1061* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1062*
1063 CALL sgeqrf( m, n, a, lda, work( itau ),
1064 $ work( iwork ), lwork-iwork+1, ierr )
1065*
1066* Copy R to WORK(IR), zeroing out below it
1067*
1068 CALL slacpy( 'U', n, n, a, lda, work( ir ),
1069 $ ldwrkr )
1070 CALL slaset( 'L', n-1, n-1, zero, zero,
1071 $ work( ir+1 ), ldwrkr )
1072*
1073* Generate Q in A
1074* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1075*
1076 CALL sorgqr( m, n, n, a, lda, work( itau ),
1077 $ work( iwork ), lwork-iwork+1, ierr )
1078 ie = itau
1079 itauq = ie + n
1080 itaup = itauq + n
1081 iwork = itaup + n
1082*
1083* Bidiagonalize R in WORK(IR)
1084* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1085*
1086 CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1087 $ work( ie ), work( itauq ),
1088 $ work( itaup ), work( iwork ),
1089 $ lwork-iwork+1, ierr )
1090*
1091* Generate left vectors bidiagonalizing R in WORK(IR)
1092* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1093*
1094 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1095 $ work( itauq ), work( iwork ),
1096 $ lwork-iwork+1, ierr )
1097 iwork = ie + n
1098*
1099* Perform bidiagonal QR iteration, computing left
1100* singular vectors of R in WORK(IR)
1101* (Workspace: need N*N+BDSPAC)
1102*
1103 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ),
1104 $ dum,
1105 $ 1, work( ir ), ldwrkr, dum, 1,
1106 $ work( iwork ), info )
1107*
1108* Multiply Q in A by left singular vectors of R in
1109* WORK(IR), storing result in U
1110* (Workspace: need N*N)
1111*
1112 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1113 $ work( ir ), ldwrkr, zero, u, ldu )
1114*
1115 ELSE
1116*
1117* Insufficient workspace for a fast algorithm
1118*
1119 itau = 1
1120 iwork = itau + n
1121*
1122* Compute A=Q*R, copying result to U
1123* (Workspace: need 2*N, prefer N+N*NB)
1124*
1125 CALL sgeqrf( m, n, a, lda, work( itau ),
1126 $ work( iwork ), lwork-iwork+1, ierr )
1127 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1128*
1129* Generate Q in U
1130* (Workspace: need 2*N, prefer N+N*NB)
1131*
1132 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1133 $ work( iwork ), lwork-iwork+1, ierr )
1134 ie = itau
1135 itauq = ie + n
1136 itaup = itauq + n
1137 iwork = itaup + n
1138*
1139* Zero out below R in A
1140*
1141 IF( n .GT. 1 ) THEN
1142 CALL slaset( 'L', n-1, n-1, zero, zero,
1143 $ a( 2, 1 ), lda )
1144 END IF
1145*
1146* Bidiagonalize R in A
1147* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1148*
1149 CALL sgebrd( n, n, a, lda, s, work( ie ),
1150 $ work( itauq ), work( itaup ),
1151 $ work( iwork ), lwork-iwork+1, ierr )
1152*
1153* Multiply Q in U by left vectors bidiagonalizing R
1154* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1155*
1156 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1157 $ work( itauq ), u, ldu, work( iwork ),
1158 $ lwork-iwork+1, ierr )
1159 iwork = ie + n
1160*
1161* Perform bidiagonal QR iteration, computing left
1162* singular vectors of A in U
1163* (Workspace: need BDSPAC)
1164*
1165 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ),
1166 $ dum,
1167 $ 1, u, ldu, dum, 1, work( iwork ),
1168 $ info )
1169*
1170 END IF
1171*
1172 ELSE IF( wntvo ) THEN
1173*
1174* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1175* N left singular vectors to be computed in U and
1176* N right singular vectors to be overwritten on A
1177*
1178 IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1179*
1180* Sufficient workspace for a fast algorithm
1181*
1182 iu = 1
1183 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1184*
1185* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1186*
1187 ldwrku = lda
1188 ir = iu + ldwrku*n
1189 ldwrkr = lda
1190 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1191*
1192* WORK(IU) is LDA by N and WORK(IR) is N by N
1193*
1194 ldwrku = lda
1195 ir = iu + ldwrku*n
1196 ldwrkr = n
1197 ELSE
1198*
1199* WORK(IU) is N by N and WORK(IR) is N by N
1200*
1201 ldwrku = n
1202 ir = iu + ldwrku*n
1203 ldwrkr = n
1204 END IF
1205 itau = ir + ldwrkr*n
1206 iwork = itau + n
1207*
1208* Compute A=Q*R
1209* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1210*
1211 CALL sgeqrf( m, n, a, lda, work( itau ),
1212 $ work( iwork ), lwork-iwork+1, ierr )
1213*
1214* Copy R to WORK(IU), zeroing out below it
1215*
1216 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1217 $ ldwrku )
1218 CALL slaset( 'L', n-1, n-1, zero, zero,
1219 $ work( iu+1 ), ldwrku )
1220*
1221* Generate Q in A
1222* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1223*
1224 CALL sorgqr( m, n, n, a, lda, work( itau ),
1225 $ work( iwork ), lwork-iwork+1, ierr )
1226 ie = itau
1227 itauq = ie + n
1228 itaup = itauq + n
1229 iwork = itaup + n
1230*
1231* Bidiagonalize R in WORK(IU), copying result to
1232* WORK(IR)
1233* (Workspace: need 2*N*N+4*N,
1234* prefer 2*N*N+3*N+2*N*NB)
1235*
1236 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1237 $ work( ie ), work( itauq ),
1238 $ work( itaup ), work( iwork ),
1239 $ lwork-iwork+1, ierr )
1240 CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1241 $ work( ir ), ldwrkr )
1242*
1243* Generate left bidiagonalizing vectors in WORK(IU)
1244* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1245*
1246 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1247 $ work( itauq ), work( iwork ),
1248 $ lwork-iwork+1, ierr )
1249*
1250* Generate right bidiagonalizing vectors in WORK(IR)
1251* (Workspace: need 2*N*N+4*N-1,
1252* prefer 2*N*N+3*N+(N-1)*NB)
1253*
1254 CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1255 $ work( itaup ), work( iwork ),
1256 $ lwork-iwork+1, ierr )
1257 iwork = ie + n
1258*
1259* Perform bidiagonal QR iteration, computing left
1260* singular vectors of R in WORK(IU) and computing
1261* right singular vectors of R in WORK(IR)
1262* (Workspace: need 2*N*N+BDSPAC)
1263*
1264 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1265 $ work( ir ), ldwrkr, work( iu ),
1266 $ ldwrku, dum, 1, work( iwork ), info )
1267*
1268* Multiply Q in A by left singular vectors of R in
1269* WORK(IU), storing result in U
1270* (Workspace: need N*N)
1271*
1272 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1273 $ work( iu ), ldwrku, zero, u, ldu )
1274*
1275* Copy right singular vectors of R to A
1276* (Workspace: need N*N)
1277*
1278 CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1279 $ lda )
1280*
1281 ELSE
1282*
1283* Insufficient workspace for a fast algorithm
1284*
1285 itau = 1
1286 iwork = itau + n
1287*
1288* Compute A=Q*R, copying result to U
1289* (Workspace: need 2*N, prefer N+N*NB)
1290*
1291 CALL sgeqrf( m, n, a, lda, work( itau ),
1292 $ work( iwork ), lwork-iwork+1, ierr )
1293 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1294*
1295* Generate Q in U
1296* (Workspace: need 2*N, prefer N+N*NB)
1297*
1298 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1299 $ work( iwork ), lwork-iwork+1, ierr )
1300 ie = itau
1301 itauq = ie + n
1302 itaup = itauq + n
1303 iwork = itaup + n
1304*
1305* Zero out below R in A
1306*
1307 IF( n .GT. 1 ) THEN
1308 CALL slaset( 'L', n-1, n-1, zero, zero,
1309 $ a( 2, 1 ), lda )
1310 END IF
1311*
1312* Bidiagonalize R in A
1313* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1314*
1315 CALL sgebrd( n, n, a, lda, s, work( ie ),
1316 $ work( itauq ), work( itaup ),
1317 $ work( iwork ), lwork-iwork+1, ierr )
1318*
1319* Multiply Q in U by left vectors bidiagonalizing R
1320* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1321*
1322 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1323 $ work( itauq ), u, ldu, work( iwork ),
1324 $ lwork-iwork+1, ierr )
1325*
1326* Generate right vectors bidiagonalizing R in A
1327* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1328*
1329 CALL sorgbr( 'P', n, n, n, a, lda,
1330 $ work( itaup ),
1331 $ work( iwork ), lwork-iwork+1, ierr )
1332 iwork = ie + n
1333*
1334* Perform bidiagonal QR iteration, computing left
1335* singular vectors of A in U and computing right
1336* singular vectors of A in A
1337* (Workspace: need BDSPAC)
1338*
1339 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1340 $ lda, u, ldu, dum, 1, work( iwork ),
1341 $ info )
1342*
1343 END IF
1344*
1345 ELSE IF( wntvas ) THEN
1346*
1347* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1348* or 'A')
1349* N left singular vectors to be computed in U and
1350* N right singular vectors to be computed in VT
1351*
1352 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1353*
1354* Sufficient workspace for a fast algorithm
1355*
1356 iu = 1
1357 IF( lwork.GE.wrkbl+lda*n ) THEN
1358*
1359* WORK(IU) is LDA by N
1360*
1361 ldwrku = lda
1362 ELSE
1363*
1364* WORK(IU) is N by N
1365*
1366 ldwrku = n
1367 END IF
1368 itau = iu + ldwrku*n
1369 iwork = itau + n
1370*
1371* Compute A=Q*R
1372* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1373*
1374 CALL sgeqrf( m, n, a, lda, work( itau ),
1375 $ work( iwork ), lwork-iwork+1, ierr )
1376*
1377* Copy R to WORK(IU), zeroing out below it
1378*
1379 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1380 $ ldwrku )
1381 CALL slaset( 'L', n-1, n-1, zero, zero,
1382 $ work( iu+1 ), ldwrku )
1383*
1384* Generate Q in A
1385* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1386*
1387 CALL sorgqr( m, n, n, a, lda, work( itau ),
1388 $ work( iwork ), lwork-iwork+1, ierr )
1389 ie = itau
1390 itauq = ie + n
1391 itaup = itauq + n
1392 iwork = itaup + n
1393*
1394* Bidiagonalize R in WORK(IU), copying result to VT
1395* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1396*
1397 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1398 $ work( ie ), work( itauq ),
1399 $ work( itaup ), work( iwork ),
1400 $ lwork-iwork+1, ierr )
1401 CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1402 $ ldvt )
1403*
1404* Generate left bidiagonalizing vectors in WORK(IU)
1405* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1406*
1407 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1408 $ work( itauq ), work( iwork ),
1409 $ lwork-iwork+1, ierr )
1410*
1411* Generate right bidiagonalizing vectors in VT
1412* (Workspace: need N*N+4*N-1,
1413* prefer N*N+3*N+(N-1)*NB)
1414*
1415 CALL sorgbr( 'P', n, n, n, vt, ldvt,
1416 $ work( itaup ),
1417 $ work( iwork ), lwork-iwork+1, ierr )
1418 iwork = ie + n
1419*
1420* Perform bidiagonal QR iteration, computing left
1421* singular vectors of R in WORK(IU) and computing
1422* right singular vectors of R in VT
1423* (Workspace: need N*N+BDSPAC)
1424*
1425 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1426 $ ldvt, work( iu ), ldwrku, dum, 1,
1427 $ work( iwork ), info )
1428*
1429* Multiply Q in A by left singular vectors of R in
1430* WORK(IU), storing result in U
1431* (Workspace: need N*N)
1432*
1433 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1434 $ work( iu ), ldwrku, zero, u, ldu )
1435*
1436 ELSE
1437*
1438* Insufficient workspace for a fast algorithm
1439*
1440 itau = 1
1441 iwork = itau + n
1442*
1443* Compute A=Q*R, copying result to U
1444* (Workspace: need 2*N, prefer N+N*NB)
1445*
1446 CALL sgeqrf( m, n, a, lda, work( itau ),
1447 $ work( iwork ), lwork-iwork+1, ierr )
1448 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1449*
1450* Generate Q in U
1451* (Workspace: need 2*N, prefer N+N*NB)
1452*
1453 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1454 $ work( iwork ), lwork-iwork+1, ierr )
1455*
1456* Copy R to VT, zeroing out below it
1457*
1458 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1459 IF( n.GT.1 )
1460 $ CALL slaset( 'L', n-1, n-1, zero, zero,
1461 $ vt( 2, 1 ), ldvt )
1462 ie = itau
1463 itauq = ie + n
1464 itaup = itauq + n
1465 iwork = itaup + n
1466*
1467* Bidiagonalize R in VT
1468* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1469*
1470 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1471 $ work( itauq ), work( itaup ),
1472 $ work( iwork ), lwork-iwork+1, ierr )
1473*
1474* Multiply Q in U by left bidiagonalizing vectors
1475* in VT
1476* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1477*
1478 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1479 $ work( itauq ), u, ldu, work( iwork ),
1480 $ lwork-iwork+1, ierr )
1481*
1482* Generate right bidiagonalizing vectors in VT
1483* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1484*
1485 CALL sorgbr( 'P', n, n, n, vt, ldvt,
1486 $ work( itaup ),
1487 $ work( iwork ), lwork-iwork+1, ierr )
1488 iwork = ie + n
1489*
1490* Perform bidiagonal QR iteration, computing left
1491* singular vectors of A in U and computing right
1492* singular vectors of A in VT
1493* (Workspace: need BDSPAC)
1494*
1495 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1496 $ ldvt, u, ldu, dum, 1, work( iwork ),
1497 $ info )
1498*
1499 END IF
1500*
1501 END IF
1502*
1503 ELSE IF( wntua ) THEN
1504*
1505 IF( wntvn ) THEN
1506*
1507* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1508* M left singular vectors to be computed in U and
1509* no right singular vectors to be computed
1510*
1511 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1512*
1513* Sufficient workspace for a fast algorithm
1514*
1515 ir = 1
1516 IF( lwork.GE.wrkbl+lda*n ) THEN
1517*
1518* WORK(IR) is LDA by N
1519*
1520 ldwrkr = lda
1521 ELSE
1522*
1523* WORK(IR) is N by N
1524*
1525 ldwrkr = n
1526 END IF
1527 itau = ir + ldwrkr*n
1528 iwork = itau + n
1529*
1530* Compute A=Q*R, copying result to U
1531* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1532*
1533 CALL sgeqrf( m, n, a, lda, work( itau ),
1534 $ work( iwork ), lwork-iwork+1, ierr )
1535 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1536*
1537* Copy R to WORK(IR), zeroing out below it
1538*
1539 CALL slacpy( 'U', n, n, a, lda, work( ir ),
1540 $ ldwrkr )
1541 CALL slaset( 'L', n-1, n-1, zero, zero,
1542 $ work( ir+1 ), ldwrkr )
1543*
1544* Generate Q in U
1545* (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1546*
1547 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1548 $ work( iwork ), lwork-iwork+1, ierr )
1549 ie = itau
1550 itauq = ie + n
1551 itaup = itauq + n
1552 iwork = itaup + n
1553*
1554* Bidiagonalize R in WORK(IR)
1555* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1556*
1557 CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1558 $ work( ie ), work( itauq ),
1559 $ work( itaup ), work( iwork ),
1560 $ lwork-iwork+1, ierr )
1561*
1562* Generate left bidiagonalizing vectors in WORK(IR)
1563* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1564*
1565 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1566 $ work( itauq ), work( iwork ),
1567 $ lwork-iwork+1, ierr )
1568 iwork = ie + n
1569*
1570* Perform bidiagonal QR iteration, computing left
1571* singular vectors of R in WORK(IR)
1572* (Workspace: need N*N+BDSPAC)
1573*
1574 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ),
1575 $ dum,
1576 $ 1, work( ir ), ldwrkr, dum, 1,
1577 $ work( iwork ), info )
1578*
1579* Multiply Q in U by left singular vectors of R in
1580* WORK(IR), storing result in A
1581* (Workspace: need N*N)
1582*
1583 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1584 $ work( ir ), ldwrkr, zero, a, lda )
1585*
1586* Copy left singular vectors of A from A to U
1587*
1588 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1589*
1590 ELSE
1591*
1592* Insufficient workspace for a fast algorithm
1593*
1594 itau = 1
1595 iwork = itau + n
1596*
1597* Compute A=Q*R, copying result to U
1598* (Workspace: need 2*N, prefer N+N*NB)
1599*
1600 CALL sgeqrf( m, n, a, lda, work( itau ),
1601 $ work( iwork ), lwork-iwork+1, ierr )
1602 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1603*
1604* Generate Q in U
1605* (Workspace: need N+M, prefer N+M*NB)
1606*
1607 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1608 $ work( iwork ), lwork-iwork+1, ierr )
1609 ie = itau
1610 itauq = ie + n
1611 itaup = itauq + n
1612 iwork = itaup + n
1613*
1614* Zero out below R in A
1615*
1616 IF( n .GT. 1 ) THEN
1617 CALL slaset( 'L', n-1, n-1, zero, zero,
1618 $ a( 2, 1 ), lda )
1619 END IF
1620*
1621* Bidiagonalize R in A
1622* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1623*
1624 CALL sgebrd( n, n, a, lda, s, work( ie ),
1625 $ work( itauq ), work( itaup ),
1626 $ work( iwork ), lwork-iwork+1, ierr )
1627*
1628* Multiply Q in U by left bidiagonalizing vectors
1629* in A
1630* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1631*
1632 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1633 $ work( itauq ), u, ldu, work( iwork ),
1634 $ lwork-iwork+1, ierr )
1635 iwork = ie + n
1636*
1637* Perform bidiagonal QR iteration, computing left
1638* singular vectors of A in U
1639* (Workspace: need BDSPAC)
1640*
1641 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ),
1642 $ dum,
1643 $ 1, u, ldu, dum, 1, work( iwork ),
1644 $ info )
1645*
1646 END IF
1647*
1648 ELSE IF( wntvo ) THEN
1649*
1650* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1651* M left singular vectors to be computed in U and
1652* N right singular vectors to be overwritten on A
1653*
1654 IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1655*
1656* Sufficient workspace for a fast algorithm
1657*
1658 iu = 1
1659 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1660*
1661* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1662*
1663 ldwrku = lda
1664 ir = iu + ldwrku*n
1665 ldwrkr = lda
1666 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1667*
1668* WORK(IU) is LDA by N and WORK(IR) is N by N
1669*
1670 ldwrku = lda
1671 ir = iu + ldwrku*n
1672 ldwrkr = n
1673 ELSE
1674*
1675* WORK(IU) is N by N and WORK(IR) is N by N
1676*
1677 ldwrku = n
1678 ir = iu + ldwrku*n
1679 ldwrkr = n
1680 END IF
1681 itau = ir + ldwrkr*n
1682 iwork = itau + n
1683*
1684* Compute A=Q*R, copying result to U
1685* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1686*
1687 CALL sgeqrf( m, n, a, lda, work( itau ),
1688 $ work( iwork ), lwork-iwork+1, ierr )
1689 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1690*
1691* Generate Q in U
1692* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1693*
1694 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1695 $ work( iwork ), lwork-iwork+1, ierr )
1696*
1697* Copy R to WORK(IU), zeroing out below it
1698*
1699 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1700 $ ldwrku )
1701 CALL slaset( 'L', n-1, n-1, zero, zero,
1702 $ work( iu+1 ), ldwrku )
1703 ie = itau
1704 itauq = ie + n
1705 itaup = itauq + n
1706 iwork = itaup + n
1707*
1708* Bidiagonalize R in WORK(IU), copying result to
1709* WORK(IR)
1710* (Workspace: need 2*N*N+4*N,
1711* prefer 2*N*N+3*N+2*N*NB)
1712*
1713 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1714 $ work( ie ), work( itauq ),
1715 $ work( itaup ), work( iwork ),
1716 $ lwork-iwork+1, ierr )
1717 CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1718 $ work( ir ), ldwrkr )
1719*
1720* Generate left bidiagonalizing vectors in WORK(IU)
1721* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1722*
1723 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1724 $ work( itauq ), work( iwork ),
1725 $ lwork-iwork+1, ierr )
1726*
1727* Generate right bidiagonalizing vectors in WORK(IR)
1728* (Workspace: need 2*N*N+4*N-1,
1729* prefer 2*N*N+3*N+(N-1)*NB)
1730*
1731 CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1732 $ work( itaup ), work( iwork ),
1733 $ lwork-iwork+1, ierr )
1734 iwork = ie + n
1735*
1736* Perform bidiagonal QR iteration, computing left
1737* singular vectors of R in WORK(IU) and computing
1738* right singular vectors of R in WORK(IR)
1739* (Workspace: need 2*N*N+BDSPAC)
1740*
1741 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1742 $ work( ir ), ldwrkr, work( iu ),
1743 $ ldwrku, dum, 1, work( iwork ), info )
1744*
1745* Multiply Q in U by left singular vectors of R in
1746* WORK(IU), storing result in A
1747* (Workspace: need N*N)
1748*
1749 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1750 $ work( iu ), ldwrku, zero, a, lda )
1751*
1752* Copy left singular vectors of A from A to U
1753*
1754 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1755*
1756* Copy right singular vectors of R from WORK(IR) to A
1757*
1758 CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1759 $ lda )
1760*
1761 ELSE
1762*
1763* Insufficient workspace for a fast algorithm
1764*
1765 itau = 1
1766 iwork = itau + n
1767*
1768* Compute A=Q*R, copying result to U
1769* (Workspace: need 2*N, prefer N+N*NB)
1770*
1771 CALL sgeqrf( m, n, a, lda, work( itau ),
1772 $ work( iwork ), lwork-iwork+1, ierr )
1773 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1774*
1775* Generate Q in U
1776* (Workspace: need N+M, prefer N+M*NB)
1777*
1778 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1779 $ work( iwork ), lwork-iwork+1, ierr )
1780 ie = itau
1781 itauq = ie + n
1782 itaup = itauq + n
1783 iwork = itaup + n
1784*
1785* Zero out below R in A
1786*
1787 IF( n .GT. 1 ) THEN
1788 CALL slaset( 'L', n-1, n-1, zero, zero,
1789 $ a( 2, 1 ), lda )
1790 END IF
1791*
1792* Bidiagonalize R in A
1793* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1794*
1795 CALL sgebrd( n, n, a, lda, s, work( ie ),
1796 $ work( itauq ), work( itaup ),
1797 $ work( iwork ), lwork-iwork+1, ierr )
1798*
1799* Multiply Q in U by left bidiagonalizing vectors
1800* in A
1801* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1802*
1803 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1804 $ work( itauq ), u, ldu, work( iwork ),
1805 $ lwork-iwork+1, ierr )
1806*
1807* Generate right bidiagonalizing vectors in A
1808* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1809*
1810 CALL sorgbr( 'P', n, n, n, a, lda,
1811 $ work( itaup ),
1812 $ work( iwork ), lwork-iwork+1, ierr )
1813 iwork = ie + n
1814*
1815* Perform bidiagonal QR iteration, computing left
1816* singular vectors of A in U and computing right
1817* singular vectors of A in A
1818* (Workspace: need BDSPAC)
1819*
1820 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1821 $ lda, u, ldu, dum, 1, work( iwork ),
1822 $ info )
1823*
1824 END IF
1825*
1826 ELSE IF( wntvas ) THEN
1827*
1828* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1829* or 'A')
1830* M left singular vectors to be computed in U and
1831* N right singular vectors to be computed in VT
1832*
1833 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1834*
1835* Sufficient workspace for a fast algorithm
1836*
1837 iu = 1
1838 IF( lwork.GE.wrkbl+lda*n ) THEN
1839*
1840* WORK(IU) is LDA by N
1841*
1842 ldwrku = lda
1843 ELSE
1844*
1845* WORK(IU) is N by N
1846*
1847 ldwrku = n
1848 END IF
1849 itau = iu + ldwrku*n
1850 iwork = itau + n
1851*
1852* Compute A=Q*R, copying result to U
1853* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1854*
1855 CALL sgeqrf( m, n, a, lda, work( itau ),
1856 $ work( iwork ), lwork-iwork+1, ierr )
1857 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1858*
1859* Generate Q in U
1860* (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1861*
1862 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1863 $ work( iwork ), lwork-iwork+1, ierr )
1864*
1865* Copy R to WORK(IU), zeroing out below it
1866*
1867 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1868 $ ldwrku )
1869 CALL slaset( 'L', n-1, n-1, zero, zero,
1870 $ work( iu+1 ), ldwrku )
1871 ie = itau
1872 itauq = ie + n
1873 itaup = itauq + n
1874 iwork = itaup + n
1875*
1876* Bidiagonalize R in WORK(IU), copying result to VT
1877* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1878*
1879 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1880 $ work( ie ), work( itauq ),
1881 $ work( itaup ), work( iwork ),
1882 $ lwork-iwork+1, ierr )
1883 CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1884 $ ldvt )
1885*
1886* Generate left bidiagonalizing vectors in WORK(IU)
1887* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1888*
1889 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1890 $ work( itauq ), work( iwork ),
1891 $ lwork-iwork+1, ierr )
1892*
1893* Generate right bidiagonalizing vectors in VT
1894* (Workspace: need N*N+4*N-1,
1895* prefer N*N+3*N+(N-1)*NB)
1896*
1897 CALL sorgbr( 'P', n, n, n, vt, ldvt,
1898 $ work( itaup ),
1899 $ work( iwork ), lwork-iwork+1, ierr )
1900 iwork = ie + n
1901*
1902* Perform bidiagonal QR iteration, computing left
1903* singular vectors of R in WORK(IU) and computing
1904* right singular vectors of R in VT
1905* (Workspace: need N*N+BDSPAC)
1906*
1907 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1908 $ ldvt, work( iu ), ldwrku, dum, 1,
1909 $ work( iwork ), info )
1910*
1911* Multiply Q in U by left singular vectors of R in
1912* WORK(IU), storing result in A
1913* (Workspace: need N*N)
1914*
1915 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1916 $ work( iu ), ldwrku, zero, a, lda )
1917*
1918* Copy left singular vectors of A from A to U
1919*
1920 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1921*
1922 ELSE
1923*
1924* Insufficient workspace for a fast algorithm
1925*
1926 itau = 1
1927 iwork = itau + n
1928*
1929* Compute A=Q*R, copying result to U
1930* (Workspace: need 2*N, prefer N+N*NB)
1931*
1932 CALL sgeqrf( m, n, a, lda, work( itau ),
1933 $ work( iwork ), lwork-iwork+1, ierr )
1934 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1935*
1936* Generate Q in U
1937* (Workspace: need N+M, prefer N+M*NB)
1938*
1939 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1940 $ work( iwork ), lwork-iwork+1, ierr )
1941*
1942* Copy R from A to VT, zeroing out below it
1943*
1944 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1945 IF( n.GT.1 )
1946 $ CALL slaset( 'L', n-1, n-1, zero, zero,
1947 $ vt( 2, 1 ), ldvt )
1948 ie = itau
1949 itauq = ie + n
1950 itaup = itauq + n
1951 iwork = itaup + n
1952*
1953* Bidiagonalize R in VT
1954* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1955*
1956 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1957 $ work( itauq ), work( itaup ),
1958 $ work( iwork ), lwork-iwork+1, ierr )
1959*
1960* Multiply Q in U by left bidiagonalizing vectors
1961* in VT
1962* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1963*
1964 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1965 $ work( itauq ), u, ldu, work( iwork ),
1966 $ lwork-iwork+1, ierr )
1967*
1968* Generate right bidiagonalizing vectors in VT
1969* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1970*
1971 CALL sorgbr( 'P', n, n, n, vt, ldvt,
1972 $ work( itaup ),
1973 $ work( iwork ), lwork-iwork+1, ierr )
1974 iwork = ie + n
1975*
1976* Perform bidiagonal QR iteration, computing left
1977* singular vectors of A in U and computing right
1978* singular vectors of A in VT
1979* (Workspace: need BDSPAC)
1980*
1981 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1982 $ ldvt, u, ldu, dum, 1, work( iwork ),
1983 $ info )
1984*
1985 END IF
1986*
1987 END IF
1988*
1989 END IF
1990*
1991 ELSE
1992*
1993* M .LT. MNTHR
1994*
1995* Path 10 (M at least N, but not much larger)
1996* Reduce to bidiagonal form without QR decomposition
1997*
1998 ie = 1
1999 itauq = ie + n
2000 itaup = itauq + n
2001 iwork = itaup + n
2002*
2003* Bidiagonalize A
2004* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
2005*
2006 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
2007 $ work( itaup ), work( iwork ), lwork-iwork+1,
2008 $ ierr )
2009 IF( wntuas ) THEN
2010*
2011* If left singular vectors desired in U, copy result to U
2012* and generate left bidiagonalizing vectors in U
2013* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
2014*
2015 CALL slacpy( 'L', m, n, a, lda, u, ldu )
2016 IF( wntus )
2017 $ ncu = n
2018 IF( wntua )
2019 $ ncu = m
2020 CALL sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2021 $ work( iwork ), lwork-iwork+1, ierr )
2022 END IF
2023 IF( wntvas ) THEN
2024*
2025* If right singular vectors desired in VT, copy result to
2026* VT and generate right bidiagonalizing vectors in VT
2027* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2028*
2029 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
2030 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2031 $ work( iwork ), lwork-iwork+1, ierr )
2032 END IF
2033 IF( wntuo ) THEN
2034*
2035* If left singular vectors desired in A, generate left
2036* bidiagonalizing vectors in A
2037* (Workspace: need 4*N, prefer 3*N+N*NB)
2038*
2039 CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2040 $ work( iwork ), lwork-iwork+1, ierr )
2041 END IF
2042 IF( wntvo ) THEN
2043*
2044* If right singular vectors desired in A, generate right
2045* bidiagonalizing vectors in A
2046* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2047*
2048 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
2049 $ work( iwork ), lwork-iwork+1, ierr )
2050 END IF
2051 iwork = ie + n
2052 IF( wntuas .OR. wntuo )
2053 $ nru = m
2054 IF( wntun )
2055 $ nru = 0
2056 IF( wntvas .OR. wntvo )
2057 $ ncvt = n
2058 IF( wntvn )
2059 $ ncvt = 0
2060 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2061*
2062* Perform bidiagonal QR iteration, if desired, computing
2063* left singular vectors in U and computing right singular
2064* vectors in VT
2065* (Workspace: need BDSPAC)
2066*
2067 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2068 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2069 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2070*
2071* Perform bidiagonal QR iteration, if desired, computing
2072* left singular vectors in U and computing right singular
2073* vectors in A
2074* (Workspace: need BDSPAC)
2075*
2076 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a,
2077 $ lda,
2078 $ u, ldu, dum, 1, work( iwork ), info )
2079 ELSE
2080*
2081* Perform bidiagonal QR iteration, if desired, computing
2082* left singular vectors in A and computing right singular
2083* vectors in VT
2084* (Workspace: need BDSPAC)
2085*
2086 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2087 $ ldvt, a, lda, dum, 1, work( iwork ), info )
2088 END IF
2089*
2090 END IF
2091*
2092 ELSE
2093*
2094* A has more columns than rows. If A has sufficiently more
2095* columns than rows, first reduce using the LQ decomposition (if
2096* sufficient workspace available)
2097*
2098 IF( n.GE.mnthr ) THEN
2099*
2100 IF( wntvn ) THEN
2101*
2102* Path 1t(N much larger than M, JOBVT='N')
2103* No right singular vectors to be computed
2104*
2105 itau = 1
2106 iwork = itau + m
2107*
2108* Compute A=L*Q
2109* (Workspace: need 2*M, prefer M+M*NB)
2110*
2111 CALL sgelqf( m, n, a, lda, work( itau ),
2112 $ work( iwork ),
2113 $ lwork-iwork+1, ierr )
2114*
2115* Zero out above L
2116*
2117 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2118 $ lda )
2119 ie = 1
2120 itauq = ie + m
2121 itaup = itauq + m
2122 iwork = itaup + m
2123*
2124* Bidiagonalize L in A
2125* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2126*
2127 CALL sgebrd( m, m, a, lda, s, work( ie ),
2128 $ work( itauq ),
2129 $ work( itaup ), work( iwork ), lwork-iwork+1,
2130 $ ierr )
2131 IF( wntuo .OR. wntuas ) THEN
2132*
2133* If left singular vectors desired, generate Q
2134* (Workspace: need 4*M, prefer 3*M+M*NB)
2135*
2136 CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2137 $ work( iwork ), lwork-iwork+1, ierr )
2138 END IF
2139 iwork = ie + m
2140 nru = 0
2141 IF( wntuo .OR. wntuas )
2142 $ nru = m
2143*
2144* Perform bidiagonal QR iteration, computing left singular
2145* vectors of A in A if desired
2146* (Workspace: need BDSPAC)
2147*
2148 CALL sbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1,
2149 $ a,
2150 $ lda, dum, 1, work( iwork ), info )
2151*
2152* If left singular vectors desired in U, copy them there
2153*
2154 IF( wntuas )
2155 $ CALL slacpy( 'F', m, m, a, lda, u, ldu )
2156*
2157 ELSE IF( wntvo .AND. wntun ) THEN
2158*
2159* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2160* M right singular vectors to be overwritten on A and
2161* no left singular vectors to be computed
2162*
2163 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2164*
2165* Sufficient workspace for a fast algorithm
2166*
2167 ir = 1
2168 IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2169*
2170* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2171*
2172 ldwrku = lda
2173 chunk = n
2174 ldwrkr = lda
2175 ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2176*
2177* WORK(IU) is LDA by N and WORK(IR) is M by M
2178*
2179 ldwrku = lda
2180 chunk = n
2181 ldwrkr = m
2182 ELSE
2183*
2184* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2185*
2186 ldwrku = m
2187 chunk = ( lwork-m*m-m ) / m
2188 ldwrkr = m
2189 END IF
2190 itau = ir + ldwrkr*m
2191 iwork = itau + m
2192*
2193* Compute A=L*Q
2194* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2195*
2196 CALL sgelqf( m, n, a, lda, work( itau ),
2197 $ work( iwork ), lwork-iwork+1, ierr )
2198*
2199* Copy L to WORK(IR) and zero out above it
2200*
2201 CALL slacpy( 'L', m, m, a, lda, work( ir ),
2202 $ ldwrkr )
2203 CALL slaset( 'U', m-1, m-1, zero, zero,
2204 $ work( ir+ldwrkr ), ldwrkr )
2205*
2206* Generate Q in A
2207* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2208*
2209 CALL sorglq( m, n, m, a, lda, work( itau ),
2210 $ work( iwork ), lwork-iwork+1, ierr )
2211 ie = itau
2212 itauq = ie + m
2213 itaup = itauq + m
2214 iwork = itaup + m
2215*
2216* Bidiagonalize L in WORK(IR)
2217* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2218*
2219 CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2220 $ work( ie ),
2221 $ work( itauq ), work( itaup ),
2222 $ work( iwork ), lwork-iwork+1, ierr )
2223*
2224* Generate right vectors bidiagonalizing L
2225* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2226*
2227 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2228 $ work( itaup ), work( iwork ),
2229 $ lwork-iwork+1, ierr )
2230 iwork = ie + m
2231*
2232* Perform bidiagonal QR iteration, computing right
2233* singular vectors of L in WORK(IR)
2234* (Workspace: need M*M+BDSPAC)
2235*
2236 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2237 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2238 $ work( iwork ), info )
2239 iu = ie + m
2240*
2241* Multiply right singular vectors of L in WORK(IR) by Q
2242* in A, storing result in WORK(IU) and copying to A
2243* (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2244*
2245 DO 30 i = 1, n, chunk
2246 blk = min( n-i+1, chunk )
2247 CALL sgemm( 'N', 'N', m, blk, m, one,
2248 $ work( ir ),
2249 $ ldwrkr, a( 1, i ), lda, zero,
2250 $ work( iu ), ldwrku )
2251 CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2252 $ a( 1, i ), lda )
2253 30 CONTINUE
2254*
2255 ELSE
2256*
2257* Insufficient workspace for a fast algorithm
2258*
2259 ie = 1
2260 itauq = ie + m
2261 itaup = itauq + m
2262 iwork = itaup + m
2263*
2264* Bidiagonalize A
2265* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2266*
2267 CALL sgebrd( m, n, a, lda, s, work( ie ),
2268 $ work( itauq ), work( itaup ),
2269 $ work( iwork ), lwork-iwork+1, ierr )
2270*
2271* Generate right vectors bidiagonalizing A
2272* (Workspace: need 4*M, prefer 3*M+M*NB)
2273*
2274 CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
2275 $ work( iwork ), lwork-iwork+1, ierr )
2276 iwork = ie + m
2277*
2278* Perform bidiagonal QR iteration, computing right
2279* singular vectors of A in A
2280* (Workspace: need BDSPAC)
2281*
2282 CALL sbdsqr( 'L', m, n, 0, 0, s, work( ie ), a,
2283 $ lda,
2284 $ dum, 1, dum, 1, work( iwork ), info )
2285*
2286 END IF
2287*
2288 ELSE IF( wntvo .AND. wntuas ) THEN
2289*
2290* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2291* M right singular vectors to be overwritten on A and
2292* M left singular vectors to be computed in U
2293*
2294 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2295*
2296* Sufficient workspace for a fast algorithm
2297*
2298 ir = 1
2299 IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2300*
2301* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2302*
2303 ldwrku = lda
2304 chunk = n
2305 ldwrkr = lda
2306 ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2307*
2308* WORK(IU) is LDA by N and WORK(IR) is M by M
2309*
2310 ldwrku = lda
2311 chunk = n
2312 ldwrkr = m
2313 ELSE
2314*
2315* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2316*
2317 ldwrku = m
2318 chunk = ( lwork-m*m-m ) / m
2319 ldwrkr = m
2320 END IF
2321 itau = ir + ldwrkr*m
2322 iwork = itau + m
2323*
2324* Compute A=L*Q
2325* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2326*
2327 CALL sgelqf( m, n, a, lda, work( itau ),
2328 $ work( iwork ), lwork-iwork+1, ierr )
2329*
2330* Copy L to U, zeroing about above it
2331*
2332 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2333 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2334 $ ldu )
2335*
2336* Generate Q in A
2337* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2338*
2339 CALL sorglq( m, n, m, a, lda, work( itau ),
2340 $ work( iwork ), lwork-iwork+1, ierr )
2341 ie = itau
2342 itauq = ie + m
2343 itaup = itauq + m
2344 iwork = itaup + m
2345*
2346* Bidiagonalize L in U, copying result to WORK(IR)
2347* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2348*
2349 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2350 $ work( itauq ), work( itaup ),
2351 $ work( iwork ), lwork-iwork+1, ierr )
2352 CALL slacpy( 'U', m, m, u, ldu, work( ir ),
2353 $ ldwrkr )
2354*
2355* Generate right vectors bidiagonalizing L in WORK(IR)
2356* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2357*
2358 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2359 $ work( itaup ), work( iwork ),
2360 $ lwork-iwork+1, ierr )
2361*
2362* Generate left vectors bidiagonalizing L in U
2363* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2364*
2365 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2366 $ work( iwork ), lwork-iwork+1, ierr )
2367 iwork = ie + m
2368*
2369* Perform bidiagonal QR iteration, computing left
2370* singular vectors of L in U, and computing right
2371* singular vectors of L in WORK(IR)
2372* (Workspace: need M*M+BDSPAC)
2373*
2374 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2375 $ work( ir ), ldwrkr, u, ldu, dum, 1,
2376 $ work( iwork ), info )
2377 iu = ie + m
2378*
2379* Multiply right singular vectors of L in WORK(IR) by Q
2380* in A, storing result in WORK(IU) and copying to A
2381* (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2382*
2383 DO 40 i = 1, n, chunk
2384 blk = min( n-i+1, chunk )
2385 CALL sgemm( 'N', 'N', m, blk, m, one,
2386 $ work( ir ),
2387 $ ldwrkr, a( 1, i ), lda, zero,
2388 $ work( iu ), ldwrku )
2389 CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2390 $ a( 1, i ), lda )
2391 40 CONTINUE
2392*
2393 ELSE
2394*
2395* Insufficient workspace for a fast algorithm
2396*
2397 itau = 1
2398 iwork = itau + m
2399*
2400* Compute A=L*Q
2401* (Workspace: need 2*M, prefer M+M*NB)
2402*
2403 CALL sgelqf( m, n, a, lda, work( itau ),
2404 $ work( iwork ), lwork-iwork+1, ierr )
2405*
2406* Copy L to U, zeroing out above it
2407*
2408 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2409 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2410 $ ldu )
2411*
2412* Generate Q in A
2413* (Workspace: need 2*M, prefer M+M*NB)
2414*
2415 CALL sorglq( m, n, m, a, lda, work( itau ),
2416 $ work( iwork ), lwork-iwork+1, ierr )
2417 ie = itau
2418 itauq = ie + m
2419 itaup = itauq + m
2420 iwork = itaup + m
2421*
2422* Bidiagonalize L in U
2423* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2424*
2425 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2426 $ work( itauq ), work( itaup ),
2427 $ work( iwork ), lwork-iwork+1, ierr )
2428*
2429* Multiply right vectors bidiagonalizing L by Q in A
2430* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2431*
2432 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2433 $ work( itaup ), a, lda, work( iwork ),
2434 $ lwork-iwork+1, ierr )
2435*
2436* Generate left vectors bidiagonalizing L in U
2437* (Workspace: need 4*M, prefer 3*M+M*NB)
2438*
2439 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2440 $ work( iwork ), lwork-iwork+1, ierr )
2441 iwork = ie + m
2442*
2443* Perform bidiagonal QR iteration, computing left
2444* singular vectors of A in U and computing right
2445* singular vectors of A in A
2446* (Workspace: need BDSPAC)
2447*
2448 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), a,
2449 $ lda,
2450 $ u, ldu, dum, 1, work( iwork ), info )
2451*
2452 END IF
2453*
2454 ELSE IF( wntvs ) THEN
2455*
2456 IF( wntun ) THEN
2457*
2458* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2459* M right singular vectors to be computed in VT and
2460* no left singular vectors to be computed
2461*
2462 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2463*
2464* Sufficient workspace for a fast algorithm
2465*
2466 ir = 1
2467 IF( lwork.GE.wrkbl+lda*m ) THEN
2468*
2469* WORK(IR) is LDA by M
2470*
2471 ldwrkr = lda
2472 ELSE
2473*
2474* WORK(IR) is M by M
2475*
2476 ldwrkr = m
2477 END IF
2478 itau = ir + ldwrkr*m
2479 iwork = itau + m
2480*
2481* Compute A=L*Q
2482* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2483*
2484 CALL sgelqf( m, n, a, lda, work( itau ),
2485 $ work( iwork ), lwork-iwork+1, ierr )
2486*
2487* Copy L to WORK(IR), zeroing out above it
2488*
2489 CALL slacpy( 'L', m, m, a, lda, work( ir ),
2490 $ ldwrkr )
2491 CALL slaset( 'U', m-1, m-1, zero, zero,
2492 $ work( ir+ldwrkr ), ldwrkr )
2493*
2494* Generate Q in A
2495* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2496*
2497 CALL sorglq( m, n, m, a, lda, work( itau ),
2498 $ work( iwork ), lwork-iwork+1, ierr )
2499 ie = itau
2500 itauq = ie + m
2501 itaup = itauq + m
2502 iwork = itaup + m
2503*
2504* Bidiagonalize L in WORK(IR)
2505* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2506*
2507 CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2508 $ work( ie ), work( itauq ),
2509 $ work( itaup ), work( iwork ),
2510 $ lwork-iwork+1, ierr )
2511*
2512* Generate right vectors bidiagonalizing L in
2513* WORK(IR)
2514* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2515*
2516 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2517 $ work( itaup ), work( iwork ),
2518 $ lwork-iwork+1, ierr )
2519 iwork = ie + m
2520*
2521* Perform bidiagonal QR iteration, computing right
2522* singular vectors of L in WORK(IR)
2523* (Workspace: need M*M+BDSPAC)
2524*
2525 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2526 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2527 $ work( iwork ), info )
2528*
2529* Multiply right singular vectors of L in WORK(IR) by
2530* Q in A, storing result in VT
2531* (Workspace: need M*M)
2532*
2533 CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2534 $ ldwrkr, a, lda, zero, vt, ldvt )
2535*
2536 ELSE
2537*
2538* Insufficient workspace for a fast algorithm
2539*
2540 itau = 1
2541 iwork = itau + m
2542*
2543* Compute A=L*Q
2544* (Workspace: need 2*M, prefer M+M*NB)
2545*
2546 CALL sgelqf( m, n, a, lda, work( itau ),
2547 $ work( iwork ), lwork-iwork+1, ierr )
2548*
2549* Copy result to VT
2550*
2551 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2552*
2553* Generate Q in VT
2554* (Workspace: need 2*M, prefer M+M*NB)
2555*
2556 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2557 $ work( iwork ), lwork-iwork+1, ierr )
2558 ie = itau
2559 itauq = ie + m
2560 itaup = itauq + m
2561 iwork = itaup + m
2562*
2563* Zero out above L in A
2564*
2565 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1,
2566 $ 2 ),
2567 $ lda )
2568*
2569* Bidiagonalize L in A
2570* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2571*
2572 CALL sgebrd( m, m, a, lda, s, work( ie ),
2573 $ work( itauq ), work( itaup ),
2574 $ work( iwork ), lwork-iwork+1, ierr )
2575*
2576* Multiply right vectors bidiagonalizing L by Q in VT
2577* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2578*
2579 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2580 $ work( itaup ), vt, ldvt,
2581 $ work( iwork ), lwork-iwork+1, ierr )
2582 iwork = ie + m
2583*
2584* Perform bidiagonal QR iteration, computing right
2585* singular vectors of A in VT
2586* (Workspace: need BDSPAC)
2587*
2588 CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2589 $ ldvt, dum, 1, dum, 1, work( iwork ),
2590 $ info )
2591*
2592 END IF
2593*
2594 ELSE IF( wntuo ) THEN
2595*
2596* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2597* M right singular vectors to be computed in VT and
2598* M left singular vectors to be overwritten on A
2599*
2600 IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2601*
2602* Sufficient workspace for a fast algorithm
2603*
2604 iu = 1
2605 IF( lwork.GE.wrkbl+2*lda*m ) THEN
2606*
2607* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2608*
2609 ldwrku = lda
2610 ir = iu + ldwrku*m
2611 ldwrkr = lda
2612 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2613*
2614* WORK(IU) is LDA by M and WORK(IR) is M by M
2615*
2616 ldwrku = lda
2617 ir = iu + ldwrku*m
2618 ldwrkr = m
2619 ELSE
2620*
2621* WORK(IU) is M by M and WORK(IR) is M by M
2622*
2623 ldwrku = m
2624 ir = iu + ldwrku*m
2625 ldwrkr = m
2626 END IF
2627 itau = ir + ldwrkr*m
2628 iwork = itau + m
2629*
2630* Compute A=L*Q
2631* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2632*
2633 CALL sgelqf( m, n, a, lda, work( itau ),
2634 $ work( iwork ), lwork-iwork+1, ierr )
2635*
2636* Copy L to WORK(IU), zeroing out below it
2637*
2638 CALL slacpy( 'L', m, m, a, lda, work( iu ),
2639 $ ldwrku )
2640 CALL slaset( 'U', m-1, m-1, zero, zero,
2641 $ work( iu+ldwrku ), ldwrku )
2642*
2643* Generate Q in A
2644* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2645*
2646 CALL sorglq( m, n, m, a, lda, work( itau ),
2647 $ work( iwork ), lwork-iwork+1, ierr )
2648 ie = itau
2649 itauq = ie + m
2650 itaup = itauq + m
2651 iwork = itaup + m
2652*
2653* Bidiagonalize L in WORK(IU), copying result to
2654* WORK(IR)
2655* (Workspace: need 2*M*M+4*M,
2656* prefer 2*M*M+3*M+2*M*NB)
2657*
2658 CALL sgebrd( m, m, work( iu ), ldwrku, s,
2659 $ work( ie ), work( itauq ),
2660 $ work( itaup ), work( iwork ),
2661 $ lwork-iwork+1, ierr )
2662 CALL slacpy( 'L', m, m, work( iu ), ldwrku,
2663 $ work( ir ), ldwrkr )
2664*
2665* Generate right bidiagonalizing vectors in WORK(IU)
2666* (Workspace: need 2*M*M+4*M-1,
2667* prefer 2*M*M+3*M+(M-1)*NB)
2668*
2669 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2670 $ work( itaup ), work( iwork ),
2671 $ lwork-iwork+1, ierr )
2672*
2673* Generate left bidiagonalizing vectors in WORK(IR)
2674* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2675*
2676 CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2677 $ work( itauq ), work( iwork ),
2678 $ lwork-iwork+1, ierr )
2679 iwork = ie + m
2680*
2681* Perform bidiagonal QR iteration, computing left
2682* singular vectors of L in WORK(IR) and computing
2683* right singular vectors of L in WORK(IU)
2684* (Workspace: need 2*M*M+BDSPAC)
2685*
2686 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2687 $ work( iu ), ldwrku, work( ir ),
2688 $ ldwrkr, dum, 1, work( iwork ), info )
2689*
2690* Multiply right singular vectors of L in WORK(IU) by
2691* Q in A, storing result in VT
2692* (Workspace: need M*M)
2693*
2694 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2695 $ ldwrku, a, lda, zero, vt, ldvt )
2696*
2697* Copy left singular vectors of L to A
2698* (Workspace: need M*M)
2699*
2700 CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
2701 $ lda )
2702*
2703 ELSE
2704*
2705* Insufficient workspace for a fast algorithm
2706*
2707 itau = 1
2708 iwork = itau + m
2709*
2710* Compute A=L*Q, copying result to VT
2711* (Workspace: need 2*M, prefer M+M*NB)
2712*
2713 CALL sgelqf( m, n, a, lda, work( itau ),
2714 $ work( iwork ), lwork-iwork+1, ierr )
2715 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2716*
2717* Generate Q in VT
2718* (Workspace: need 2*M, prefer M+M*NB)
2719*
2720 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2721 $ work( iwork ), lwork-iwork+1, ierr )
2722 ie = itau
2723 itauq = ie + m
2724 itaup = itauq + m
2725 iwork = itaup + m
2726*
2727* Zero out above L in A
2728*
2729 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1,
2730 $ 2 ),
2731 $ lda )
2732*
2733* Bidiagonalize L in A
2734* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2735*
2736 CALL sgebrd( m, m, a, lda, s, work( ie ),
2737 $ work( itauq ), work( itaup ),
2738 $ work( iwork ), lwork-iwork+1, ierr )
2739*
2740* Multiply right vectors bidiagonalizing L by Q in VT
2741* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2742*
2743 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2744 $ work( itaup ), vt, ldvt,
2745 $ work( iwork ), lwork-iwork+1, ierr )
2746*
2747* Generate left bidiagonalizing vectors of L in A
2748* (Workspace: need 4*M, prefer 3*M+M*NB)
2749*
2750 CALL sorgbr( 'Q', m, m, m, a, lda,
2751 $ work( itauq ),
2752 $ work( iwork ), lwork-iwork+1, ierr )
2753 iwork = ie + m
2754*
2755* Perform bidiagonal QR iteration, compute left
2756* singular vectors of A in A and compute right
2757* singular vectors of A in VT
2758* (Workspace: need BDSPAC)
2759*
2760 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2761 $ ldvt, a, lda, dum, 1, work( iwork ),
2762 $ info )
2763*
2764 END IF
2765*
2766 ELSE IF( wntuas ) THEN
2767*
2768* Path 6t(N much larger than M, JOBU='S' or 'A',
2769* JOBVT='S')
2770* M right singular vectors to be computed in VT and
2771* M left singular vectors to be computed in U
2772*
2773 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2774*
2775* Sufficient workspace for a fast algorithm
2776*
2777 iu = 1
2778 IF( lwork.GE.wrkbl+lda*m ) THEN
2779*
2780* WORK(IU) is LDA by N
2781*
2782 ldwrku = lda
2783 ELSE
2784*
2785* WORK(IU) is LDA by M
2786*
2787 ldwrku = m
2788 END IF
2789 itau = iu + ldwrku*m
2790 iwork = itau + m
2791*
2792* Compute A=L*Q
2793* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2794*
2795 CALL sgelqf( m, n, a, lda, work( itau ),
2796 $ work( iwork ), lwork-iwork+1, ierr )
2797*
2798* Copy L to WORK(IU), zeroing out above it
2799*
2800 CALL slacpy( 'L', m, m, a, lda, work( iu ),
2801 $ ldwrku )
2802 CALL slaset( 'U', m-1, m-1, zero, zero,
2803 $ work( iu+ldwrku ), ldwrku )
2804*
2805* Generate Q in A
2806* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2807*
2808 CALL sorglq( m, n, m, a, lda, work( itau ),
2809 $ work( iwork ), lwork-iwork+1, ierr )
2810 ie = itau
2811 itauq = ie + m
2812 itaup = itauq + m
2813 iwork = itaup + m
2814*
2815* Bidiagonalize L in WORK(IU), copying result to U
2816* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2817*
2818 CALL sgebrd( m, m, work( iu ), ldwrku, s,
2819 $ work( ie ), work( itauq ),
2820 $ work( itaup ), work( iwork ),
2821 $ lwork-iwork+1, ierr )
2822 CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
2823 $ ldu )
2824*
2825* Generate right bidiagonalizing vectors in WORK(IU)
2826* (Workspace: need M*M+4*M-1,
2827* prefer M*M+3*M+(M-1)*NB)
2828*
2829 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2830 $ work( itaup ), work( iwork ),
2831 $ lwork-iwork+1, ierr )
2832*
2833* Generate left bidiagonalizing vectors in U
2834* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2835*
2836 CALL sorgbr( 'Q', m, m, m, u, ldu,
2837 $ work( itauq ),
2838 $ work( iwork ), lwork-iwork+1, ierr )
2839 iwork = ie + m
2840*
2841* Perform bidiagonal QR iteration, computing left
2842* singular vectors of L in U and computing right
2843* singular vectors of L in WORK(IU)
2844* (Workspace: need M*M+BDSPAC)
2845*
2846 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2847 $ work( iu ), ldwrku, u, ldu, dum, 1,
2848 $ work( iwork ), info )
2849*
2850* Multiply right singular vectors of L in WORK(IU) by
2851* Q in A, storing result in VT
2852* (Workspace: need M*M)
2853*
2854 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2855 $ ldwrku, a, lda, zero, vt, ldvt )
2856*
2857 ELSE
2858*
2859* Insufficient workspace for a fast algorithm
2860*
2861 itau = 1
2862 iwork = itau + m
2863*
2864* Compute A=L*Q, copying result to VT
2865* (Workspace: need 2*M, prefer M+M*NB)
2866*
2867 CALL sgelqf( m, n, a, lda, work( itau ),
2868 $ work( iwork ), lwork-iwork+1, ierr )
2869 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2870*
2871* Generate Q in VT
2872* (Workspace: need 2*M, prefer M+M*NB)
2873*
2874 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2875 $ work( iwork ), lwork-iwork+1, ierr )
2876*
2877* Copy L to U, zeroing out above it
2878*
2879 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2880 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1,
2881 $ 2 ),
2882 $ ldu )
2883 ie = itau
2884 itauq = ie + m
2885 itaup = itauq + m
2886 iwork = itaup + m
2887*
2888* Bidiagonalize L in U
2889* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2890*
2891 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2892 $ work( itauq ), work( itaup ),
2893 $ work( iwork ), lwork-iwork+1, ierr )
2894*
2895* Multiply right bidiagonalizing vectors in U by Q
2896* in VT
2897* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2898*
2899 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2900 $ work( itaup ), vt, ldvt,
2901 $ work( iwork ), lwork-iwork+1, ierr )
2902*
2903* Generate left bidiagonalizing vectors in U
2904* (Workspace: need 4*M, prefer 3*M+M*NB)
2905*
2906 CALL sorgbr( 'Q', m, m, m, u, ldu,
2907 $ work( itauq ),
2908 $ work( iwork ), lwork-iwork+1, ierr )
2909 iwork = ie + m
2910*
2911* Perform bidiagonal QR iteration, computing left
2912* singular vectors of A in U and computing right
2913* singular vectors of A in VT
2914* (Workspace: need BDSPAC)
2915*
2916 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2917 $ ldvt, u, ldu, dum, 1, work( iwork ),
2918 $ info )
2919*
2920 END IF
2921*
2922 END IF
2923*
2924 ELSE IF( wntva ) THEN
2925*
2926 IF( wntun ) THEN
2927*
2928* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2929* N right singular vectors to be computed in VT and
2930* no left singular vectors to be computed
2931*
2932 IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
2933*
2934* Sufficient workspace for a fast algorithm
2935*
2936 ir = 1
2937 IF( lwork.GE.wrkbl+lda*m ) THEN
2938*
2939* WORK(IR) is LDA by M
2940*
2941 ldwrkr = lda
2942 ELSE
2943*
2944* WORK(IR) is M by M
2945*
2946 ldwrkr = m
2947 END IF
2948 itau = ir + ldwrkr*m
2949 iwork = itau + m
2950*
2951* Compute A=L*Q, copying result to VT
2952* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2953*
2954 CALL sgelqf( m, n, a, lda, work( itau ),
2955 $ work( iwork ), lwork-iwork+1, ierr )
2956 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2957*
2958* Copy L to WORK(IR), zeroing out above it
2959*
2960 CALL slacpy( 'L', m, m, a, lda, work( ir ),
2961 $ ldwrkr )
2962 CALL slaset( 'U', m-1, m-1, zero, zero,
2963 $ work( ir+ldwrkr ), ldwrkr )
2964*
2965* Generate Q in VT
2966* (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2967*
2968 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2969 $ work( iwork ), lwork-iwork+1, ierr )
2970 ie = itau
2971 itauq = ie + m
2972 itaup = itauq + m
2973 iwork = itaup + m
2974*
2975* Bidiagonalize L in WORK(IR)
2976* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2977*
2978 CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2979 $ work( ie ), work( itauq ),
2980 $ work( itaup ), work( iwork ),
2981 $ lwork-iwork+1, ierr )
2982*
2983* Generate right bidiagonalizing vectors in WORK(IR)
2984* (Workspace: need M*M+4*M-1,
2985* prefer M*M+3*M+(M-1)*NB)
2986*
2987 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2988 $ work( itaup ), work( iwork ),
2989 $ lwork-iwork+1, ierr )
2990 iwork = ie + m
2991*
2992* Perform bidiagonal QR iteration, computing right
2993* singular vectors of L in WORK(IR)
2994* (Workspace: need M*M+BDSPAC)
2995*
2996 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2997 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2998 $ work( iwork ), info )
2999*
3000* Multiply right singular vectors of L in WORK(IR) by
3001* Q in VT, storing result in A
3002* (Workspace: need M*M)
3003*
3004 CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
3005 $ ldwrkr, vt, ldvt, zero, a, lda )
3006*
3007* Copy right singular vectors of A from A to VT
3008*
3009 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3010*
3011 ELSE
3012*
3013* Insufficient workspace for a fast algorithm
3014*
3015 itau = 1
3016 iwork = itau + m
3017*
3018* Compute A=L*Q, copying result to VT
3019* (Workspace: need 2*M, prefer M+M*NB)
3020*
3021 CALL sgelqf( m, n, a, lda, work( itau ),
3022 $ work( iwork ), lwork-iwork+1, ierr )
3023 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3024*
3025* Generate Q in VT
3026* (Workspace: need M+N, prefer M+N*NB)
3027*
3028 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3029 $ work( iwork ), lwork-iwork+1, ierr )
3030 ie = itau
3031 itauq = ie + m
3032 itaup = itauq + m
3033 iwork = itaup + m
3034*
3035* Zero out above L in A
3036*
3037 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1,
3038 $ 2 ),
3039 $ lda )
3040*
3041* Bidiagonalize L in A
3042* (Workspace: need 4*M, prefer 3*M+2*M*NB)
3043*
3044 CALL sgebrd( m, m, a, lda, s, work( ie ),
3045 $ work( itauq ), work( itaup ),
3046 $ work( iwork ), lwork-iwork+1, ierr )
3047*
3048* Multiply right bidiagonalizing vectors in A by Q
3049* in VT
3050* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3051*
3052 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3053 $ work( itaup ), vt, ldvt,
3054 $ work( iwork ), lwork-iwork+1, ierr )
3055 iwork = ie + m
3056*
3057* Perform bidiagonal QR iteration, computing right
3058* singular vectors of A in VT
3059* (Workspace: need BDSPAC)
3060*
3061 CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3062 $ ldvt, dum, 1, dum, 1, work( iwork ),
3063 $ info )
3064*
3065 END IF
3066*
3067 ELSE IF( wntuo ) THEN
3068*
3069* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3070* N right singular vectors to be computed in VT and
3071* M left singular vectors to be overwritten on A
3072*
3073 IF( lwork.GE.2*m*m+max( n+m, 4*m, bdspac ) ) THEN
3074*
3075* Sufficient workspace for a fast algorithm
3076*
3077 iu = 1
3078 IF( lwork.GE.wrkbl+2*lda*m ) THEN
3079*
3080* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3081*
3082 ldwrku = lda
3083 ir = iu + ldwrku*m
3084 ldwrkr = lda
3085 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3086*
3087* WORK(IU) is LDA by M and WORK(IR) is M by M
3088*
3089 ldwrku = lda
3090 ir = iu + ldwrku*m
3091 ldwrkr = m
3092 ELSE
3093*
3094* WORK(IU) is M by M and WORK(IR) is M by M
3095*
3096 ldwrku = m
3097 ir = iu + ldwrku*m
3098 ldwrkr = m
3099 END IF
3100 itau = ir + ldwrkr*m
3101 iwork = itau + m
3102*
3103* Compute A=L*Q, copying result to VT
3104* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3105*
3106 CALL sgelqf( m, n, a, lda, work( itau ),
3107 $ work( iwork ), lwork-iwork+1, ierr )
3108 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3109*
3110* Generate Q in VT
3111* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3112*
3113 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3114 $ work( iwork ), lwork-iwork+1, ierr )
3115*
3116* Copy L to WORK(IU), zeroing out above it
3117*
3118 CALL slacpy( 'L', m, m, a, lda, work( iu ),
3119 $ ldwrku )
3120 CALL slaset( 'U', m-1, m-1, zero, zero,
3121 $ work( iu+ldwrku ), ldwrku )
3122 ie = itau
3123 itauq = ie + m
3124 itaup = itauq + m
3125 iwork = itaup + m
3126*
3127* Bidiagonalize L in WORK(IU), copying result to
3128* WORK(IR)
3129* (Workspace: need 2*M*M+4*M,
3130* prefer 2*M*M+3*M+2*M*NB)
3131*
3132 CALL sgebrd( m, m, work( iu ), ldwrku, s,
3133 $ work( ie ), work( itauq ),
3134 $ work( itaup ), work( iwork ),
3135 $ lwork-iwork+1, ierr )
3136 CALL slacpy( 'L', m, m, work( iu ), ldwrku,
3137 $ work( ir ), ldwrkr )
3138*
3139* Generate right bidiagonalizing vectors in WORK(IU)
3140* (Workspace: need 2*M*M+4*M-1,
3141* prefer 2*M*M+3*M+(M-1)*NB)
3142*
3143 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3144 $ work( itaup ), work( iwork ),
3145 $ lwork-iwork+1, ierr )
3146*
3147* Generate left bidiagonalizing vectors in WORK(IR)
3148* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3149*
3150 CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3151 $ work( itauq ), work( iwork ),
3152 $ lwork-iwork+1, ierr )
3153 iwork = ie + m
3154*
3155* Perform bidiagonal QR iteration, computing left
3156* singular vectors of L in WORK(IR) and computing
3157* right singular vectors of L in WORK(IU)
3158* (Workspace: need 2*M*M+BDSPAC)
3159*
3160 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3161 $ work( iu ), ldwrku, work( ir ),
3162 $ ldwrkr, dum, 1, work( iwork ), info )
3163*
3164* Multiply right singular vectors of L in WORK(IU) by
3165* Q in VT, storing result in A
3166* (Workspace: need M*M)
3167*
3168 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3169 $ ldwrku, vt, ldvt, zero, a, lda )
3170*
3171* Copy right singular vectors of A from A to VT
3172*
3173 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3174*
3175* Copy left singular vectors of A from WORK(IR) to A
3176*
3177 CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
3178 $ lda )
3179*
3180 ELSE
3181*
3182* Insufficient workspace for a fast algorithm
3183*
3184 itau = 1
3185 iwork = itau + m
3186*
3187* Compute A=L*Q, copying result to VT
3188* (Workspace: need 2*M, prefer M+M*NB)
3189*
3190 CALL sgelqf( m, n, a, lda, work( itau ),
3191 $ work( iwork ), lwork-iwork+1, ierr )
3192 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3193*
3194* Generate Q in VT
3195* (Workspace: need M+N, prefer M+N*NB)
3196*
3197 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3198 $ work( iwork ), lwork-iwork+1, ierr )
3199 ie = itau
3200 itauq = ie + m
3201 itaup = itauq + m
3202 iwork = itaup + m
3203*
3204* Zero out above L in A
3205*
3206 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1,
3207 $ 2 ),
3208 $ lda )
3209*
3210* Bidiagonalize L in A
3211* (Workspace: need 4*M, prefer 3*M+2*M*NB)
3212*
3213 CALL sgebrd( m, m, a, lda, s, work( ie ),
3214 $ work( itauq ), work( itaup ),
3215 $ work( iwork ), lwork-iwork+1, ierr )
3216*
3217* Multiply right bidiagonalizing vectors in A by Q
3218* in VT
3219* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3220*
3221 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3222 $ work( itaup ), vt, ldvt,
3223 $ work( iwork ), lwork-iwork+1, ierr )
3224*
3225* Generate left bidiagonalizing vectors in A
3226* (Workspace: need 4*M, prefer 3*M+M*NB)
3227*
3228 CALL sorgbr( 'Q', m, m, m, a, lda,
3229 $ work( itauq ),
3230 $ work( iwork ), lwork-iwork+1, ierr )
3231 iwork = ie + m
3232*
3233* Perform bidiagonal QR iteration, computing left
3234* singular vectors of A in A and computing right
3235* singular vectors of A in VT
3236* (Workspace: need BDSPAC)
3237*
3238 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3239 $ ldvt, a, lda, dum, 1, work( iwork ),
3240 $ info )
3241*
3242 END IF
3243*
3244 ELSE IF( wntuas ) THEN
3245*
3246* Path 9t(N much larger than M, JOBU='S' or 'A',
3247* JOBVT='A')
3248* N right singular vectors to be computed in VT and
3249* M left singular vectors to be computed in U
3250*
3251 IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
3252*
3253* Sufficient workspace for a fast algorithm
3254*
3255 iu = 1
3256 IF( lwork.GE.wrkbl+lda*m ) THEN
3257*
3258* WORK(IU) is LDA by M
3259*
3260 ldwrku = lda
3261 ELSE
3262*
3263* WORK(IU) is M by M
3264*
3265 ldwrku = m
3266 END IF
3267 itau = iu + ldwrku*m
3268 iwork = itau + m
3269*
3270* Compute A=L*Q, copying result to VT
3271* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3272*
3273 CALL sgelqf( m, n, a, lda, work( itau ),
3274 $ work( iwork ), lwork-iwork+1, ierr )
3275 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3276*
3277* Generate Q in VT
3278* (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3279*
3280 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3281 $ work( iwork ), lwork-iwork+1, ierr )
3282*
3283* Copy L to WORK(IU), zeroing out above it
3284*
3285 CALL slacpy( 'L', m, m, a, lda, work( iu ),
3286 $ ldwrku )
3287 CALL slaset( 'U', m-1, m-1, zero, zero,
3288 $ work( iu+ldwrku ), ldwrku )
3289 ie = itau
3290 itauq = ie + m
3291 itaup = itauq + m
3292 iwork = itaup + m
3293*
3294* Bidiagonalize L in WORK(IU), copying result to U
3295* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3296*
3297 CALL sgebrd( m, m, work( iu ), ldwrku, s,
3298 $ work( ie ), work( itauq ),
3299 $ work( itaup ), work( iwork ),
3300 $ lwork-iwork+1, ierr )
3301 CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
3302 $ ldu )
3303*
3304* Generate right bidiagonalizing vectors in WORK(IU)
3305* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3306*
3307 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3308 $ work( itaup ), work( iwork ),
3309 $ lwork-iwork+1, ierr )
3310*
3311* Generate left bidiagonalizing vectors in U
3312* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3313*
3314 CALL sorgbr( 'Q', m, m, m, u, ldu,
3315 $ work( itauq ),
3316 $ work( iwork ), lwork-iwork+1, ierr )
3317 iwork = ie + m
3318*
3319* Perform bidiagonal QR iteration, computing left
3320* singular vectors of L in U and computing right
3321* singular vectors of L in WORK(IU)
3322* (Workspace: need M*M+BDSPAC)
3323*
3324 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3325 $ work( iu ), ldwrku, u, ldu, dum, 1,
3326 $ work( iwork ), info )
3327*
3328* Multiply right singular vectors of L in WORK(IU) by
3329* Q in VT, storing result in A
3330* (Workspace: need M*M)
3331*
3332 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3333 $ ldwrku, vt, ldvt, zero, a, lda )
3334*
3335* Copy right singular vectors of A from A to VT
3336*
3337 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3338*
3339 ELSE
3340*
3341* Insufficient workspace for a fast algorithm
3342*
3343 itau = 1
3344 iwork = itau + m
3345*
3346* Compute A=L*Q, copying result to VT
3347* (Workspace: need 2*M, prefer M+M*NB)
3348*
3349 CALL sgelqf( m, n, a, lda, work( itau ),
3350 $ work( iwork ), lwork-iwork+1, ierr )
3351 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3352*
3353* Generate Q in VT
3354* (Workspace: need M+N, prefer M+N*NB)
3355*
3356 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3357 $ work( iwork ), lwork-iwork+1, ierr )
3358*
3359* Copy L to U, zeroing out above it
3360*
3361 CALL slacpy( 'L', m, m, a, lda, u, ldu )
3362 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1,
3363 $ 2 ),
3364 $ ldu )
3365 ie = itau
3366 itauq = ie + m
3367 itaup = itauq + m
3368 iwork = itaup + m
3369*
3370* Bidiagonalize L in U
3371* (Workspace: need 4*M, prefer 3*M+2*M*NB)
3372*
3373 CALL sgebrd( m, m, u, ldu, s, work( ie ),
3374 $ work( itauq ), work( itaup ),
3375 $ work( iwork ), lwork-iwork+1, ierr )
3376*
3377* Multiply right bidiagonalizing vectors in U by Q
3378* in VT
3379* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3380*
3381 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3382 $ work( itaup ), vt, ldvt,
3383 $ work( iwork ), lwork-iwork+1, ierr )
3384*
3385* Generate left bidiagonalizing vectors in U
3386* (Workspace: need 4*M, prefer 3*M+M*NB)
3387*
3388 CALL sorgbr( 'Q', m, m, m, u, ldu,
3389 $ work( itauq ),
3390 $ work( iwork ), lwork-iwork+1, ierr )
3391 iwork = ie + m
3392*
3393* Perform bidiagonal QR iteration, computing left
3394* singular vectors of A in U and computing right
3395* singular vectors of A in VT
3396* (Workspace: need BDSPAC)
3397*
3398 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3399 $ ldvt, u, ldu, dum, 1, work( iwork ),
3400 $ info )
3401*
3402 END IF
3403*
3404 END IF
3405*
3406 END IF
3407*
3408 ELSE
3409*
3410* N .LT. MNTHR
3411*
3412* Path 10t(N greater than M, but not much larger)
3413* Reduce to bidiagonal form without LQ decomposition
3414*
3415 ie = 1
3416 itauq = ie + m
3417 itaup = itauq + m
3418 iwork = itaup + m
3419*
3420* Bidiagonalize A
3421* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3422*
3423 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3424 $ work( itaup ), work( iwork ), lwork-iwork+1,
3425 $ ierr )
3426 IF( wntuas ) THEN
3427*
3428* If left singular vectors desired in U, copy result to U
3429* and generate left bidiagonalizing vectors in U
3430* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3431*
3432 CALL slacpy( 'L', m, m, a, lda, u, ldu )
3433 CALL sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3434 $ work( iwork ), lwork-iwork+1, ierr )
3435 END IF
3436 IF( wntvas ) THEN
3437*
3438* If right singular vectors desired in VT, copy result to
3439* VT and generate right bidiagonalizing vectors in VT
3440* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3441*
3442 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3443 IF( wntva )
3444 $ nrvt = n
3445 IF( wntvs )
3446 $ nrvt = m
3447 CALL sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3448 $ work( iwork ), lwork-iwork+1, ierr )
3449 END IF
3450 IF( wntuo ) THEN
3451*
3452* If left singular vectors desired in A, generate left
3453* bidiagonalizing vectors in A
3454* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3455*
3456 CALL sorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3457 $ work( iwork ), lwork-iwork+1, ierr )
3458 END IF
3459 IF( wntvo ) THEN
3460*
3461* If right singular vectors desired in A, generate right
3462* bidiagonalizing vectors in A
3463* (Workspace: need 4*M, prefer 3*M+M*NB)
3464*
3465 CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
3466 $ work( iwork ), lwork-iwork+1, ierr )
3467 END IF
3468 iwork = ie + m
3469 IF( wntuas .OR. wntuo )
3470 $ nru = m
3471 IF( wntun )
3472 $ nru = 0
3473 IF( wntvas .OR. wntvo )
3474 $ ncvt = n
3475 IF( wntvn )
3476 $ ncvt = 0
3477 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3478*
3479* Perform bidiagonal QR iteration, if desired, computing
3480* left singular vectors in U and computing right singular
3481* vectors in VT
3482* (Workspace: need BDSPAC)
3483*
3484 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3485 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3486 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3487*
3488* Perform bidiagonal QR iteration, if desired, computing
3489* left singular vectors in U and computing right singular
3490* vectors in A
3491* (Workspace: need BDSPAC)
3492*
3493 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a,
3494 $ lda,
3495 $ u, ldu, dum, 1, work( iwork ), info )
3496 ELSE
3497*
3498* Perform bidiagonal QR iteration, if desired, computing
3499* left singular vectors in A and computing right singular
3500* vectors in VT
3501* (Workspace: need BDSPAC)
3502*
3503 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3504 $ ldvt, a, lda, dum, 1, work( iwork ), info )
3505 END IF
3506*
3507 END IF
3508*
3509 END IF
3510*
3511* If SBDSQR failed to converge, copy unconverged superdiagonals
3512* to WORK( 2:MINMN )
3513*
3514 IF( info.NE.0 ) THEN
3515 IF( ie.GT.2 ) THEN
3516 DO 50 i = 1, minmn - 1
3517 work( i+1 ) = work( i+ie-1 )
3518 50 CONTINUE
3519 END IF
3520 IF( ie.LT.2 ) THEN
3521 DO 60 i = minmn - 1, 1, -1
3522 work( i+1 ) = work( i+ie-1 )
3523 60 CONTINUE
3524 END IF
3525 END IF
3526*
3527* Undo scaling if necessary
3528*
3529 IF( iscl.EQ.1 ) THEN
3530 IF( anrm.GT.bignum )
3531 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3532 $ ierr )
3533 IF( info.NE.0 .AND. anrm.GT.bignum )
3534 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3535 $ work( 2 ),
3536 $ minmn, ierr )
3537 IF( anrm.LT.smlnum )
3538 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3539 $ ierr )
3540 IF( info.NE.0 .AND. anrm.LT.smlnum )
3541 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3542 $ work( 2 ),
3543 $ minmn, ierr )
3544 END IF
3545*
3546* Return optimal workspace in WORK(1)
3547*
3548 work( 1 ) = sroundup_lwork(maxwrk)
3549*
3550 RETURN
3551*
3552* End of SGESVD
3553*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine sbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, work, info)
SBDSQR
Definition sbdsqr.f:240
subroutine sgebrd(m, n, a, lda, d, e, tauq, taup, work, lwork, info)
SGEBRD
Definition sgebrd.f:204
subroutine sgelqf(m, n, a, lda, tau, work, lwork, info)
SGELQF
Definition sgelqf.f:142
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188
subroutine sgeqrf(m, n, a, lda, tau, work, lwork, info)
SGEQRF
Definition sgeqrf.f:144
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:160
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:101
real function slamch(cmach)
SLAMCH
Definition slamch.f:68
real function slange(norm, m, n, a, lda, work)
SLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition slange.f:112
subroutine slascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition slascl.f:142
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:108
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine sorgbr(vect, m, n, k, a, lda, tau, work, lwork, info)
SORGBR
Definition sorgbr.f:156
subroutine sorglq(m, n, k, a, lda, tau, work, lwork, info)
SORGLQ
Definition sorglq.f:125
subroutine sorgqr(m, n, k, a, lda, tau, work, lwork, info)
SORGQR
Definition sorgqr.f:126
subroutine sormbr(vect, side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
SORMBR
Definition sormbr.f:194
Here is the call graph for this function:
Here is the caller graph for this function: