LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ sgesvd()

subroutine sgesvd ( character  jobu,
character  jobvt,
integer  m,
integer  n,
real, dimension( lda, * )  a,
integer  lda,
real, dimension( * )  s,
real, dimension( ldu, * )  u,
integer  ldu,
real, dimension( ldvt, * )  vt,
integer  ldvt,
real, dimension( * )  work,
integer  lwork,
integer  info 
)

SGESVD computes the singular value decomposition (SVD) for GE matrices

Download SGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 SGESVD computes the singular value decomposition (SVD) of a real
 M-by-N matrix A, optionally computing the left and/or right singular
 vectors. The SVD is written

      A = U * SIGMA * transpose(V)

 where SIGMA is an M-by-N matrix which is zero except for its
 min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
 V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
 are the singular values of A; they are real and non-negative, and
 are returned in descending order.  The first min(m,n) columns of
 U and V are the left and right singular vectors of A.

 Note that the routine returns V**T, not V.
Parameters
[in]JOBU
          JOBU is CHARACTER*1
          Specifies options for computing all or part of the matrix U:
          = 'A':  all M columns of U are returned in array U:
          = 'S':  the first min(m,n) columns of U (the left singular
                  vectors) are returned in the array U;
          = 'O':  the first min(m,n) columns of U (the left singular
                  vectors) are overwritten on the array A;
          = 'N':  no columns of U (no left singular vectors) are
                  computed.
[in]JOBVT
          JOBVT is CHARACTER*1
          Specifies options for computing all or part of the matrix
          V**T:
          = 'A':  all N rows of V**T are returned in the array VT;
          = 'S':  the first min(m,n) rows of V**T (the right singular
                  vectors) are returned in the array VT;
          = 'O':  the first min(m,n) rows of V**T (the right singular
                  vectors) are overwritten on the array A;
          = 'N':  no rows of V**T (no right singular vectors) are
                  computed.

          JOBVT and JOBU cannot both be 'O'.
[in]M
          M is INTEGER
          The number of rows of the input matrix A.  M >= 0.
[in]N
          N is INTEGER
          The number of columns of the input matrix A.  N >= 0.
[in,out]A
          A is REAL array, dimension (LDA,N)
          On entry, the M-by-N matrix A.
          On exit,
          if JOBU = 'O',  A is overwritten with the first min(m,n)
                          columns of U (the left singular vectors,
                          stored columnwise);
          if JOBVT = 'O', A is overwritten with the first min(m,n)
                          rows of V**T (the right singular vectors,
                          stored rowwise);
          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
                          are destroyed.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,M).
[out]S
          S is REAL array, dimension (min(M,N))
          The singular values of A, sorted so that S(i) >= S(i+1).
[out]U
          U is REAL array, dimension (LDU,UCOL)
          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
          if JOBU = 'S', U contains the first min(m,n) columns of U
          (the left singular vectors, stored columnwise);
          if JOBU = 'N' or 'O', U is not referenced.
[in]LDU
          LDU is INTEGER
          The leading dimension of the array U.  LDU >= 1; if
          JOBU = 'S' or 'A', LDU >= M.
[out]VT
          VT is REAL array, dimension (LDVT,N)
          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
          V**T;
          if JOBVT = 'S', VT contains the first min(m,n) rows of
          V**T (the right singular vectors, stored rowwise);
          if JOBVT = 'N' or 'O', VT is not referenced.
[in]LDVT
          LDVT is INTEGER
          The leading dimension of the array VT.  LDVT >= 1; if
          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[out]WORK
          WORK is REAL array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
          superdiagonal elements of an upper bidiagonal matrix B
          whose diagonal is in S (not necessarily sorted). B
          satisfies A = U * B * VT, so it has the same singular values
          as A, and singular vectors related by U and VT.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK.
          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
             - PATH 1  (M much larger than N, JOBU='N')
             - PATH 1t (N much larger than M, JOBVT='N')
          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
          For good performance, LWORK should generally be larger.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit.
          < 0:  if INFO = -i, the i-th argument had an illegal value.
          > 0:  if SBDSQR did not converge, INFO specifies how many
                superdiagonals of an intermediate bidiagonal form B
                did not converge to zero. See the description of WORK
                above for details.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 209 of file sgesvd.f.

211*
212* -- LAPACK driver routine --
213* -- LAPACK is a software package provided by Univ. of Tennessee, --
214* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
215*
216* .. Scalar Arguments ..
217 CHARACTER JOBU, JOBVT
218 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
219* ..
220* .. Array Arguments ..
221 REAL A( LDA, * ), S( * ), U( LDU, * ),
222 $ VT( LDVT, * ), WORK( * )
223* ..
224*
225* =====================================================================
226*
227* .. Parameters ..
228 REAL ZERO, ONE
229 parameter( zero = 0.0e0, one = 1.0e0 )
230* ..
231* .. Local Scalars ..
232 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
233 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
234 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
235 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
236 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
237 $ NRVT, WRKBL
238 INTEGER LWORK_SGEQRF, LWORK_SORGQR_N, LWORK_SORGQR_M,
239 $ LWORK_SGEBRD, LWORK_SORGBR_P, LWORK_SORGBR_Q,
240 $ LWORK_SGELQF, LWORK_SORGLQ_N, LWORK_SORGLQ_M
241 REAL ANRM, BIGNUM, EPS, SMLNUM
242* ..
243* .. Local Arrays ..
244 REAL DUM( 1 )
245* ..
246* .. External Subroutines ..
247 EXTERNAL sbdsqr, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
249 $ xerbla
250* ..
251* .. External Functions ..
252 LOGICAL LSAME
253 INTEGER ILAENV
254 REAL SLAMCH, SLANGE, SROUNDUP_LWORK
256* ..
257* .. Intrinsic Functions ..
258 INTRINSIC max, min, sqrt
259* ..
260* .. Executable Statements ..
261*
262* Test the input arguments
263*
264 info = 0
265 minmn = min( m, n )
266 wntua = lsame( jobu, 'A' )
267 wntus = lsame( jobu, 'S' )
268 wntuas = wntua .OR. wntus
269 wntuo = lsame( jobu, 'O' )
270 wntun = lsame( jobu, 'N' )
271 wntva = lsame( jobvt, 'A' )
272 wntvs = lsame( jobvt, 'S' )
273 wntvas = wntva .OR. wntvs
274 wntvo = lsame( jobvt, 'O' )
275 wntvn = lsame( jobvt, 'N' )
276 lquery = ( lwork.EQ.-1 )
277*
278 IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
279 info = -1
280 ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
281 $ ( wntvo .AND. wntuo ) ) THEN
282 info = -2
283 ELSE IF( m.LT.0 ) THEN
284 info = -3
285 ELSE IF( n.LT.0 ) THEN
286 info = -4
287 ELSE IF( lda.LT.max( 1, m ) ) THEN
288 info = -6
289 ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
290 info = -9
291 ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
292 $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
293 info = -11
294 END IF
295*
296* Compute workspace
297* (Note: Comments in the code beginning "Workspace:" describe the
298* minimal amount of workspace needed at that point in the code,
299* as well as the preferred amount for good performance.
300* NB refers to the optimal block size for the immediately
301* following subroutine, as returned by ILAENV.)
302*
303 IF( info.EQ.0 ) THEN
304 minwrk = 1
305 maxwrk = 1
306 IF( m.GE.n .AND. minmn.GT.0 ) THEN
307*
308* Compute space needed for SBDSQR
309*
310 mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
311 bdspac = 5*n
312* Compute space needed for SGEQRF
313 CALL sgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
314 lwork_sgeqrf = int( dum(1) )
315* Compute space needed for SORGQR
316 CALL sorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
317 lwork_sorgqr_n = int( dum(1) )
318 CALL sorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
319 lwork_sorgqr_m = int( dum(1) )
320* Compute space needed for SGEBRD
321 CALL sgebrd( n, n, a, lda, s, dum(1), dum(1),
322 $ dum(1), dum(1), -1, ierr )
323 lwork_sgebrd = int( dum(1) )
324* Compute space needed for SORGBR P
325 CALL sorgbr( 'P', n, n, n, a, lda, dum(1),
326 $ dum(1), -1, ierr )
327 lwork_sorgbr_p = int( dum(1) )
328* Compute space needed for SORGBR Q
329 CALL sorgbr( 'Q', n, n, n, a, lda, dum(1),
330 $ dum(1), -1, ierr )
331 lwork_sorgbr_q = int( dum(1) )
332*
333 IF( m.GE.mnthr ) THEN
334 IF( wntun ) THEN
335*
336* Path 1 (M much larger than N, JOBU='N')
337*
338 maxwrk = n + lwork_sgeqrf
339 maxwrk = max( maxwrk, 3*n+lwork_sgebrd )
340 IF( wntvo .OR. wntvas )
341 $ maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
342 maxwrk = max( maxwrk, bdspac )
343 minwrk = max( 4*n, bdspac )
344 ELSE IF( wntuo .AND. wntvn ) THEN
345*
346* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
347*
348 wrkbl = n + lwork_sgeqrf
349 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
350 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
351 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
352 wrkbl = max( wrkbl, bdspac )
353 maxwrk = max( n*n+wrkbl, n*n+m*n+n )
354 minwrk = max( 3*n+m, bdspac )
355 ELSE IF( wntuo .AND. wntvas ) THEN
356*
357* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
358* 'A')
359*
360 wrkbl = n + lwork_sgeqrf
361 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
362 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
363 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
364 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
365 wrkbl = max( wrkbl, bdspac )
366 maxwrk = max( n*n+wrkbl, n*n+m*n+n )
367 minwrk = max( 3*n+m, bdspac )
368 ELSE IF( wntus .AND. wntvn ) THEN
369*
370* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
371*
372 wrkbl = n + lwork_sgeqrf
373 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
374 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
375 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
376 wrkbl = max( wrkbl, bdspac )
377 maxwrk = n*n + wrkbl
378 minwrk = max( 3*n+m, bdspac )
379 ELSE IF( wntus .AND. wntvo ) THEN
380*
381* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
382*
383 wrkbl = n + lwork_sgeqrf
384 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
385 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
386 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
387 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
388 wrkbl = max( wrkbl, bdspac )
389 maxwrk = 2*n*n + wrkbl
390 minwrk = max( 3*n+m, bdspac )
391 ELSE IF( wntus .AND. wntvas ) THEN
392*
393* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
394* 'A')
395*
396 wrkbl = n + lwork_sgeqrf
397 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
398 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
399 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
400 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
401 wrkbl = max( wrkbl, bdspac )
402 maxwrk = n*n + wrkbl
403 minwrk = max( 3*n+m, bdspac )
404 ELSE IF( wntua .AND. wntvn ) THEN
405*
406* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
407*
408 wrkbl = n + lwork_sgeqrf
409 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
410 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
411 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
412 wrkbl = max( wrkbl, bdspac )
413 maxwrk = n*n + wrkbl
414 minwrk = max( 3*n+m, bdspac )
415 ELSE IF( wntua .AND. wntvo ) THEN
416*
417* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418*
419 wrkbl = n + lwork_sgeqrf
420 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
421 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
422 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
423 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
424 wrkbl = max( wrkbl, bdspac )
425 maxwrk = 2*n*n + wrkbl
426 minwrk = max( 3*n+m, bdspac )
427 ELSE IF( wntua .AND. wntvas ) THEN
428*
429* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430* 'A')
431*
432 wrkbl = n + lwork_sgeqrf
433 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
434 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
435 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
436 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
437 wrkbl = max( wrkbl, bdspac )
438 maxwrk = n*n + wrkbl
439 minwrk = max( 3*n+m, bdspac )
440 END IF
441 ELSE
442*
443* Path 10 (M at least N, but not much larger)
444*
445 CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
446 $ dum(1), dum(1), -1, ierr )
447 lwork_sgebrd = int( dum(1) )
448 maxwrk = 3*n + lwork_sgebrd
449 IF( wntus .OR. wntuo ) THEN
450 CALL sorgbr( 'Q', m, n, n, a, lda, dum(1),
451 $ dum(1), -1, ierr )
452 lwork_sorgbr_q = int( dum(1) )
453 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
454 END IF
455 IF( wntua ) THEN
456 CALL sorgbr( 'Q', m, m, n, a, lda, dum(1),
457 $ dum(1), -1, ierr )
458 lwork_sorgbr_q = int( dum(1) )
459 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
460 END IF
461 IF( .NOT.wntvn ) THEN
462 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
463 END IF
464 maxwrk = max( maxwrk, bdspac )
465 minwrk = max( 3*n+m, bdspac )
466 END IF
467 ELSE IF( minmn.GT.0 ) THEN
468*
469* Compute space needed for SBDSQR
470*
471 mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
472 bdspac = 5*m
473* Compute space needed for SGELQF
474 CALL sgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
475 lwork_sgelqf = int( dum(1) )
476* Compute space needed for SORGLQ
477 CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
478 lwork_sorglq_n = int( dum(1) )
479 CALL sorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480 lwork_sorglq_m = int( dum(1) )
481* Compute space needed for SGEBRD
482 CALL sgebrd( m, m, a, lda, s, dum(1), dum(1),
483 $ dum(1), dum(1), -1, ierr )
484 lwork_sgebrd = int( dum(1) )
485* Compute space needed for SORGBR P
486 CALL sorgbr( 'P', m, m, m, a, n, dum(1),
487 $ dum(1), -1, ierr )
488 lwork_sorgbr_p = int( dum(1) )
489* Compute space needed for SORGBR Q
490 CALL sorgbr( 'Q', m, m, m, a, n, dum(1),
491 $ dum(1), -1, ierr )
492 lwork_sorgbr_q = int( dum(1) )
493 IF( n.GE.mnthr ) THEN
494 IF( wntvn ) THEN
495*
496* Path 1t(N much larger than M, JOBVT='N')
497*
498 maxwrk = m + lwork_sgelqf
499 maxwrk = max( maxwrk, 3*m+lwork_sgebrd )
500 IF( wntuo .OR. wntuas )
501 $ maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
502 maxwrk = max( maxwrk, bdspac )
503 minwrk = max( 4*m, bdspac )
504 ELSE IF( wntvo .AND. wntun ) THEN
505*
506* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507*
508 wrkbl = m + lwork_sgelqf
509 wrkbl = max( wrkbl, m+lwork_sorglq_m )
510 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
511 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
512 wrkbl = max( wrkbl, bdspac )
513 maxwrk = max( m*m+wrkbl, m*m+m*n+m )
514 minwrk = max( 3*m+n, bdspac )
515 ELSE IF( wntvo .AND. wntuas ) THEN
516*
517* Path 3t(N much larger than M, JOBU='S' or 'A',
518* JOBVT='O')
519*
520 wrkbl = m + lwork_sgelqf
521 wrkbl = max( wrkbl, m+lwork_sorglq_m )
522 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
523 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
524 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
525 wrkbl = max( wrkbl, bdspac )
526 maxwrk = max( m*m+wrkbl, m*m+m*n+m )
527 minwrk = max( 3*m+n, bdspac )
528 ELSE IF( wntvs .AND. wntun ) THEN
529*
530* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531*
532 wrkbl = m + lwork_sgelqf
533 wrkbl = max( wrkbl, m+lwork_sorglq_m )
534 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
535 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
536 wrkbl = max( wrkbl, bdspac )
537 maxwrk = m*m + wrkbl
538 minwrk = max( 3*m+n, bdspac )
539 ELSE IF( wntvs .AND. wntuo ) THEN
540*
541* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542*
543 wrkbl = m + lwork_sgelqf
544 wrkbl = max( wrkbl, m+lwork_sorglq_m )
545 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
546 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
547 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
548 wrkbl = max( wrkbl, bdspac )
549 maxwrk = 2*m*m + wrkbl
550 minwrk = max( 3*m+n, bdspac )
551 maxwrk = max( maxwrk, minwrk )
552 ELSE IF( wntvs .AND. wntuas ) THEN
553*
554* Path 6t(N much larger than M, JOBU='S' or 'A',
555* JOBVT='S')
556*
557 wrkbl = m + lwork_sgelqf
558 wrkbl = max( wrkbl, m+lwork_sorglq_m )
559 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
560 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
561 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
562 wrkbl = max( wrkbl, bdspac )
563 maxwrk = m*m + wrkbl
564 minwrk = max( 3*m+n, bdspac )
565 ELSE IF( wntva .AND. wntun ) THEN
566*
567* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
568*
569 wrkbl = m + lwork_sgelqf
570 wrkbl = max( wrkbl, m+lwork_sorglq_n )
571 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
572 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
573 wrkbl = max( wrkbl, bdspac )
574 maxwrk = m*m + wrkbl
575 minwrk = max( 3*m+n, bdspac )
576 ELSE IF( wntva .AND. wntuo ) THEN
577*
578* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
579*
580 wrkbl = m + lwork_sgelqf
581 wrkbl = max( wrkbl, m+lwork_sorglq_n )
582 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
583 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
584 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
585 wrkbl = max( wrkbl, bdspac )
586 maxwrk = 2*m*m + wrkbl
587 minwrk = max( 3*m+n, bdspac )
588 ELSE IF( wntva .AND. wntuas ) THEN
589*
590* Path 9t(N much larger than M, JOBU='S' or 'A',
591* JOBVT='A')
592*
593 wrkbl = m + lwork_sgelqf
594 wrkbl = max( wrkbl, m+lwork_sorglq_n )
595 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
596 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
597 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
598 wrkbl = max( wrkbl, bdspac )
599 maxwrk = m*m + wrkbl
600 minwrk = max( 3*m+n, bdspac )
601 END IF
602 ELSE
603*
604* Path 10t(N greater than M, but not much larger)
605*
606 CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
607 $ dum(1), dum(1), -1, ierr )
608 lwork_sgebrd = int( dum(1) )
609 maxwrk = 3*m + lwork_sgebrd
610 IF( wntvs .OR. wntvo ) THEN
611* Compute space needed for SORGBR P
612 CALL sorgbr( 'P', m, n, m, a, n, dum(1),
613 $ dum(1), -1, ierr )
614 lwork_sorgbr_p = int( dum(1) )
615 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
616 END IF
617 IF( wntva ) THEN
618 CALL sorgbr( 'P', n, n, m, a, n, dum(1),
619 $ dum(1), -1, ierr )
620 lwork_sorgbr_p = int( dum(1) )
621 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
622 END IF
623 IF( .NOT.wntun ) THEN
624 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
625 END IF
626 maxwrk = max( maxwrk, bdspac )
627 minwrk = max( 3*m+n, bdspac )
628 END IF
629 END IF
630 maxwrk = max( maxwrk, minwrk )
631 work( 1 ) = sroundup_lwork(maxwrk)
632*
633 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
634 info = -13
635 END IF
636 END IF
637*
638 IF( info.NE.0 ) THEN
639 CALL xerbla( 'SGESVD', -info )
640 RETURN
641 ELSE IF( lquery ) THEN
642 RETURN
643 END IF
644*
645* Quick return if possible
646*
647 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
648 RETURN
649 END IF
650*
651* Get machine constants
652*
653 eps = slamch( 'P' )
654 smlnum = sqrt( slamch( 'S' ) ) / eps
655 bignum = one / smlnum
656*
657* Scale A if max element outside range [SMLNUM,BIGNUM]
658*
659 anrm = slange( 'M', m, n, a, lda, dum )
660 iscl = 0
661 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
662 iscl = 1
663 CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
664 ELSE IF( anrm.GT.bignum ) THEN
665 iscl = 1
666 CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
667 END IF
668*
669 IF( m.GE.n ) THEN
670*
671* A has at least as many rows as columns. If A has sufficiently
672* more rows than columns, first reduce using the QR
673* decomposition (if sufficient workspace available)
674*
675 IF( m.GE.mnthr ) THEN
676*
677 IF( wntun ) THEN
678*
679* Path 1 (M much larger than N, JOBU='N')
680* No left singular vectors to be computed
681*
682 itau = 1
683 iwork = itau + n
684*
685* Compute A=Q*R
686* (Workspace: need 2*N, prefer N+N*NB)
687*
688 CALL sgeqrf( m, n, a, lda, work( itau ), work( iwork ),
689 $ lwork-iwork+1, ierr )
690*
691* Zero out below R
692*
693 IF( n .GT. 1 ) THEN
694 CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
695 $ lda )
696 END IF
697 ie = 1
698 itauq = ie + n
699 itaup = itauq + n
700 iwork = itaup + n
701*
702* Bidiagonalize R in A
703* (Workspace: need 4*N, prefer 3*N+2*N*NB)
704*
705 CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
706 $ work( itaup ), work( iwork ), lwork-iwork+1,
707 $ ierr )
708 ncvt = 0
709 IF( wntvo .OR. wntvas ) THEN
710*
711* If right singular vectors desired, generate P'.
712* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
713*
714 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
715 $ work( iwork ), lwork-iwork+1, ierr )
716 ncvt = n
717 END IF
718 iwork = ie + n
719*
720* Perform bidiagonal QR iteration, computing right
721* singular vectors of A in A if desired
722* (Workspace: need BDSPAC)
723*
724 CALL sbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
725 $ dum, 1, dum, 1, work( iwork ), info )
726*
727* If right singular vectors desired in VT, copy them there
728*
729 IF( wntvas )
730 $ CALL slacpy( 'F', n, n, a, lda, vt, ldvt )
731*
732 ELSE IF( wntuo .AND. wntvn ) THEN
733*
734* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
735* N left singular vectors to be overwritten on A and
736* no right singular vectors to be computed
737*
738 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
739*
740* Sufficient workspace for a fast algorithm
741*
742 ir = 1
743 IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
744*
745* WORK(IU) is LDA by N, WORK(IR) is LDA by N
746*
747 ldwrku = lda
748 ldwrkr = lda
749 ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
750*
751* WORK(IU) is LDA by N, WORK(IR) is N by N
752*
753 ldwrku = lda
754 ldwrkr = n
755 ELSE
756*
757* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
758*
759 ldwrku = ( lwork-n*n-n ) / n
760 ldwrkr = n
761 END IF
762 itau = ir + ldwrkr*n
763 iwork = itau + n
764*
765* Compute A=Q*R
766* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
767*
768 CALL sgeqrf( m, n, a, lda, work( itau ),
769 $ work( iwork ), lwork-iwork+1, ierr )
770*
771* Copy R to WORK(IR) and zero out below it
772*
773 CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
774 CALL slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
775 $ ldwrkr )
776*
777* Generate Q in A
778* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
779*
780 CALL sorgqr( m, n, n, a, lda, work( itau ),
781 $ work( iwork ), lwork-iwork+1, ierr )
782 ie = itau
783 itauq = ie + n
784 itaup = itauq + n
785 iwork = itaup + n
786*
787* Bidiagonalize R in WORK(IR)
788* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
789*
790 CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
791 $ work( itauq ), work( itaup ),
792 $ work( iwork ), lwork-iwork+1, ierr )
793*
794* Generate left vectors bidiagonalizing R
795* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
796*
797 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
798 $ work( itauq ), work( iwork ),
799 $ lwork-iwork+1, ierr )
800 iwork = ie + n
801*
802* Perform bidiagonal QR iteration, computing left
803* singular vectors of R in WORK(IR)
804* (Workspace: need N*N+BDSPAC)
805*
806 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
807 $ work( ir ), ldwrkr, dum, 1,
808 $ work( iwork ), info )
809 iu = ie + n
810*
811* Multiply Q in A by left singular vectors of R in
812* WORK(IR), storing result in WORK(IU) and copying to A
813* (Workspace: need N*N+2*N, prefer N*N+M*N+N)
814*
815 DO 10 i = 1, m, ldwrku
816 chunk = min( m-i+1, ldwrku )
817 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
818 $ lda, work( ir ), ldwrkr, zero,
819 $ work( iu ), ldwrku )
820 CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
821 $ a( i, 1 ), lda )
822 10 CONTINUE
823*
824 ELSE
825*
826* Insufficient workspace for a fast algorithm
827*
828 ie = 1
829 itauq = ie + n
830 itaup = itauq + n
831 iwork = itaup + n
832*
833* Bidiagonalize A
834* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
835*
836 CALL sgebrd( m, n, a, lda, s, work( ie ),
837 $ work( itauq ), work( itaup ),
838 $ work( iwork ), lwork-iwork+1, ierr )
839*
840* Generate left vectors bidiagonalizing A
841* (Workspace: need 4*N, prefer 3*N+N*NB)
842*
843 CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
844 $ work( iwork ), lwork-iwork+1, ierr )
845 iwork = ie + n
846*
847* Perform bidiagonal QR iteration, computing left
848* singular vectors of A in A
849* (Workspace: need BDSPAC)
850*
851 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
852 $ a, lda, dum, 1, work( iwork ), info )
853*
854 END IF
855*
856 ELSE IF( wntuo .AND. wntvas ) THEN
857*
858* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859* N left singular vectors to be overwritten on A and
860* N right singular vectors to be computed in VT
861*
862 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
863*
864* Sufficient workspace for a fast algorithm
865*
866 ir = 1
867 IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
868*
869* WORK(IU) is LDA by N and WORK(IR) is LDA by N
870*
871 ldwrku = lda
872 ldwrkr = lda
873 ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
874*
875* WORK(IU) is LDA by N and WORK(IR) is N by N
876*
877 ldwrku = lda
878 ldwrkr = n
879 ELSE
880*
881* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882*
883 ldwrku = ( lwork-n*n-n ) / n
884 ldwrkr = n
885 END IF
886 itau = ir + ldwrkr*n
887 iwork = itau + n
888*
889* Compute A=Q*R
890* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
891*
892 CALL sgeqrf( m, n, a, lda, work( itau ),
893 $ work( iwork ), lwork-iwork+1, ierr )
894*
895* Copy R to VT, zeroing out below it
896*
897 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
898 IF( n.GT.1 )
899 $ CALL slaset( 'L', n-1, n-1, zero, zero,
900 $ vt( 2, 1 ), ldvt )
901*
902* Generate Q in A
903* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
904*
905 CALL sorgqr( m, n, n, a, lda, work( itau ),
906 $ work( iwork ), lwork-iwork+1, ierr )
907 ie = itau
908 itauq = ie + n
909 itaup = itauq + n
910 iwork = itaup + n
911*
912* Bidiagonalize R in VT, copying result to WORK(IR)
913* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
914*
915 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
916 $ work( itauq ), work( itaup ),
917 $ work( iwork ), lwork-iwork+1, ierr )
918 CALL slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
919*
920* Generate left vectors bidiagonalizing R in WORK(IR)
921* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
922*
923 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
924 $ work( itauq ), work( iwork ),
925 $ lwork-iwork+1, ierr )
926*
927* Generate right vectors bidiagonalizing R in VT
928* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
929*
930 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
931 $ work( iwork ), lwork-iwork+1, ierr )
932 iwork = ie + n
933*
934* Perform bidiagonal QR iteration, computing left
935* singular vectors of R in WORK(IR) and computing right
936* singular vectors of R in VT
937* (Workspace: need N*N+BDSPAC)
938*
939 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
940 $ work( ir ), ldwrkr, dum, 1,
941 $ work( iwork ), info )
942 iu = ie + n
943*
944* Multiply Q in A by left singular vectors of R in
945* WORK(IR), storing result in WORK(IU) and copying to A
946* (Workspace: need N*N+2*N, prefer N*N+M*N+N)
947*
948 DO 20 i = 1, m, ldwrku
949 chunk = min( m-i+1, ldwrku )
950 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
951 $ lda, work( ir ), ldwrkr, zero,
952 $ work( iu ), ldwrku )
953 CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
954 $ a( i, 1 ), lda )
955 20 CONTINUE
956*
957 ELSE
958*
959* Insufficient workspace for a fast algorithm
960*
961 itau = 1
962 iwork = itau + n
963*
964* Compute A=Q*R
965* (Workspace: need 2*N, prefer N+N*NB)
966*
967 CALL sgeqrf( m, n, a, lda, work( itau ),
968 $ work( iwork ), lwork-iwork+1, ierr )
969*
970* Copy R to VT, zeroing out below it
971*
972 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
973 IF( n.GT.1 )
974 $ CALL slaset( 'L', n-1, n-1, zero, zero,
975 $ vt( 2, 1 ), ldvt )
976*
977* Generate Q in A
978* (Workspace: need 2*N, prefer N+N*NB)
979*
980 CALL sorgqr( m, n, n, a, lda, work( itau ),
981 $ work( iwork ), lwork-iwork+1, ierr )
982 ie = itau
983 itauq = ie + n
984 itaup = itauq + n
985 iwork = itaup + n
986*
987* Bidiagonalize R in VT
988* (Workspace: need 4*N, prefer 3*N+2*N*NB)
989*
990 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
991 $ work( itauq ), work( itaup ),
992 $ work( iwork ), lwork-iwork+1, ierr )
993*
994* Multiply Q in A by left vectors bidiagonalizing R
995* (Workspace: need 3*N+M, prefer 3*N+M*NB)
996*
997 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
998 $ work( itauq ), a, lda, work( iwork ),
999 $ lwork-iwork+1, ierr )
1000*
1001* Generate right vectors bidiagonalizing R in VT
1002* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1003*
1004 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1005 $ work( iwork ), lwork-iwork+1, ierr )
1006 iwork = ie + n
1007*
1008* Perform bidiagonal QR iteration, computing left
1009* singular vectors of A in A and computing right
1010* singular vectors of A in VT
1011* (Workspace: need BDSPAC)
1012*
1013 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1014 $ a, lda, dum, 1, work( iwork ), info )
1015*
1016 END IF
1017*
1018 ELSE IF( wntus ) THEN
1019*
1020 IF( wntvn ) THEN
1021*
1022* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1023* N left singular vectors to be computed in U and
1024* no right singular vectors to be computed
1025*
1026 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1027*
1028* Sufficient workspace for a fast algorithm
1029*
1030 ir = 1
1031 IF( lwork.GE.wrkbl+lda*n ) THEN
1032*
1033* WORK(IR) is LDA by N
1034*
1035 ldwrkr = lda
1036 ELSE
1037*
1038* WORK(IR) is N by N
1039*
1040 ldwrkr = n
1041 END IF
1042 itau = ir + ldwrkr*n
1043 iwork = itau + n
1044*
1045* Compute A=Q*R
1046* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1047*
1048 CALL sgeqrf( m, n, a, lda, work( itau ),
1049 $ work( iwork ), lwork-iwork+1, ierr )
1050*
1051* Copy R to WORK(IR), zeroing out below it
1052*
1053 CALL slacpy( 'U', n, n, a, lda, work( ir ),
1054 $ ldwrkr )
1055 CALL slaset( 'L', n-1, n-1, zero, zero,
1056 $ work( ir+1 ), ldwrkr )
1057*
1058* Generate Q in A
1059* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1060*
1061 CALL sorgqr( m, n, n, a, lda, work( itau ),
1062 $ work( iwork ), lwork-iwork+1, ierr )
1063 ie = itau
1064 itauq = ie + n
1065 itaup = itauq + n
1066 iwork = itaup + n
1067*
1068* Bidiagonalize R in WORK(IR)
1069* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1070*
1071 CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1072 $ work( ie ), work( itauq ),
1073 $ work( itaup ), work( iwork ),
1074 $ lwork-iwork+1, ierr )
1075*
1076* Generate left vectors bidiagonalizing R in WORK(IR)
1077* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1078*
1079 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1080 $ work( itauq ), work( iwork ),
1081 $ lwork-iwork+1, ierr )
1082 iwork = ie + n
1083*
1084* Perform bidiagonal QR iteration, computing left
1085* singular vectors of R in WORK(IR)
1086* (Workspace: need N*N+BDSPAC)
1087*
1088 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1089 $ 1, work( ir ), ldwrkr, dum, 1,
1090 $ work( iwork ), info )
1091*
1092* Multiply Q in A by left singular vectors of R in
1093* WORK(IR), storing result in U
1094* (Workspace: need N*N)
1095*
1096 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1097 $ work( ir ), ldwrkr, zero, u, ldu )
1098*
1099 ELSE
1100*
1101* Insufficient workspace for a fast algorithm
1102*
1103 itau = 1
1104 iwork = itau + n
1105*
1106* Compute A=Q*R, copying result to U
1107* (Workspace: need 2*N, prefer N+N*NB)
1108*
1109 CALL sgeqrf( m, n, a, lda, work( itau ),
1110 $ work( iwork ), lwork-iwork+1, ierr )
1111 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1112*
1113* Generate Q in U
1114* (Workspace: need 2*N, prefer N+N*NB)
1115*
1116 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1117 $ work( iwork ), lwork-iwork+1, ierr )
1118 ie = itau
1119 itauq = ie + n
1120 itaup = itauq + n
1121 iwork = itaup + n
1122*
1123* Zero out below R in A
1124*
1125 IF( n .GT. 1 ) THEN
1126 CALL slaset( 'L', n-1, n-1, zero, zero,
1127 $ a( 2, 1 ), lda )
1128 END IF
1129*
1130* Bidiagonalize R in A
1131* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1132*
1133 CALL sgebrd( n, n, a, lda, s, work( ie ),
1134 $ work( itauq ), work( itaup ),
1135 $ work( iwork ), lwork-iwork+1, ierr )
1136*
1137* Multiply Q in U by left vectors bidiagonalizing R
1138* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1139*
1140 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1141 $ work( itauq ), u, ldu, work( iwork ),
1142 $ lwork-iwork+1, ierr )
1143 iwork = ie + n
1144*
1145* Perform bidiagonal QR iteration, computing left
1146* singular vectors of A in U
1147* (Workspace: need BDSPAC)
1148*
1149 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1150 $ 1, u, ldu, dum, 1, work( iwork ),
1151 $ info )
1152*
1153 END IF
1154*
1155 ELSE IF( wntvo ) THEN
1156*
1157* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1158* N left singular vectors to be computed in U and
1159* N right singular vectors to be overwritten on A
1160*
1161 IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1162*
1163* Sufficient workspace for a fast algorithm
1164*
1165 iu = 1
1166 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1167*
1168* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1169*
1170 ldwrku = lda
1171 ir = iu + ldwrku*n
1172 ldwrkr = lda
1173 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1174*
1175* WORK(IU) is LDA by N and WORK(IR) is N by N
1176*
1177 ldwrku = lda
1178 ir = iu + ldwrku*n
1179 ldwrkr = n
1180 ELSE
1181*
1182* WORK(IU) is N by N and WORK(IR) is N by N
1183*
1184 ldwrku = n
1185 ir = iu + ldwrku*n
1186 ldwrkr = n
1187 END IF
1188 itau = ir + ldwrkr*n
1189 iwork = itau + n
1190*
1191* Compute A=Q*R
1192* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1193*
1194 CALL sgeqrf( m, n, a, lda, work( itau ),
1195 $ work( iwork ), lwork-iwork+1, ierr )
1196*
1197* Copy R to WORK(IU), zeroing out below it
1198*
1199 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1200 $ ldwrku )
1201 CALL slaset( 'L', n-1, n-1, zero, zero,
1202 $ work( iu+1 ), ldwrku )
1203*
1204* Generate Q in A
1205* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1206*
1207 CALL sorgqr( m, n, n, a, lda, work( itau ),
1208 $ work( iwork ), lwork-iwork+1, ierr )
1209 ie = itau
1210 itauq = ie + n
1211 itaup = itauq + n
1212 iwork = itaup + n
1213*
1214* Bidiagonalize R in WORK(IU), copying result to
1215* WORK(IR)
1216* (Workspace: need 2*N*N+4*N,
1217* prefer 2*N*N+3*N+2*N*NB)
1218*
1219 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1220 $ work( ie ), work( itauq ),
1221 $ work( itaup ), work( iwork ),
1222 $ lwork-iwork+1, ierr )
1223 CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1224 $ work( ir ), ldwrkr )
1225*
1226* Generate left bidiagonalizing vectors in WORK(IU)
1227* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1228*
1229 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1230 $ work( itauq ), work( iwork ),
1231 $ lwork-iwork+1, ierr )
1232*
1233* Generate right bidiagonalizing vectors in WORK(IR)
1234* (Workspace: need 2*N*N+4*N-1,
1235* prefer 2*N*N+3*N+(N-1)*NB)
1236*
1237 CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1238 $ work( itaup ), work( iwork ),
1239 $ lwork-iwork+1, ierr )
1240 iwork = ie + n
1241*
1242* Perform bidiagonal QR iteration, computing left
1243* singular vectors of R in WORK(IU) and computing
1244* right singular vectors of R in WORK(IR)
1245* (Workspace: need 2*N*N+BDSPAC)
1246*
1247 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1248 $ work( ir ), ldwrkr, work( iu ),
1249 $ ldwrku, dum, 1, work( iwork ), info )
1250*
1251* Multiply Q in A by left singular vectors of R in
1252* WORK(IU), storing result in U
1253* (Workspace: need N*N)
1254*
1255 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1256 $ work( iu ), ldwrku, zero, u, ldu )
1257*
1258* Copy right singular vectors of R to A
1259* (Workspace: need N*N)
1260*
1261 CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1262 $ lda )
1263*
1264 ELSE
1265*
1266* Insufficient workspace for a fast algorithm
1267*
1268 itau = 1
1269 iwork = itau + n
1270*
1271* Compute A=Q*R, copying result to U
1272* (Workspace: need 2*N, prefer N+N*NB)
1273*
1274 CALL sgeqrf( m, n, a, lda, work( itau ),
1275 $ work( iwork ), lwork-iwork+1, ierr )
1276 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1277*
1278* Generate Q in U
1279* (Workspace: need 2*N, prefer N+N*NB)
1280*
1281 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1282 $ work( iwork ), lwork-iwork+1, ierr )
1283 ie = itau
1284 itauq = ie + n
1285 itaup = itauq + n
1286 iwork = itaup + n
1287*
1288* Zero out below R in A
1289*
1290 IF( n .GT. 1 ) THEN
1291 CALL slaset( 'L', n-1, n-1, zero, zero,
1292 $ a( 2, 1 ), lda )
1293 END IF
1294*
1295* Bidiagonalize R in A
1296* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1297*
1298 CALL sgebrd( n, n, a, lda, s, work( ie ),
1299 $ work( itauq ), work( itaup ),
1300 $ work( iwork ), lwork-iwork+1, ierr )
1301*
1302* Multiply Q in U by left vectors bidiagonalizing R
1303* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1304*
1305 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1306 $ work( itauq ), u, ldu, work( iwork ),
1307 $ lwork-iwork+1, ierr )
1308*
1309* Generate right vectors bidiagonalizing R in A
1310* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1311*
1312 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1313 $ work( iwork ), lwork-iwork+1, ierr )
1314 iwork = ie + n
1315*
1316* Perform bidiagonal QR iteration, computing left
1317* singular vectors of A in U and computing right
1318* singular vectors of A in A
1319* (Workspace: need BDSPAC)
1320*
1321 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1322 $ lda, u, ldu, dum, 1, work( iwork ),
1323 $ info )
1324*
1325 END IF
1326*
1327 ELSE IF( wntvas ) THEN
1328*
1329* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1330* or 'A')
1331* N left singular vectors to be computed in U and
1332* N right singular vectors to be computed in VT
1333*
1334 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1335*
1336* Sufficient workspace for a fast algorithm
1337*
1338 iu = 1
1339 IF( lwork.GE.wrkbl+lda*n ) THEN
1340*
1341* WORK(IU) is LDA by N
1342*
1343 ldwrku = lda
1344 ELSE
1345*
1346* WORK(IU) is N by N
1347*
1348 ldwrku = n
1349 END IF
1350 itau = iu + ldwrku*n
1351 iwork = itau + n
1352*
1353* Compute A=Q*R
1354* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1355*
1356 CALL sgeqrf( m, n, a, lda, work( itau ),
1357 $ work( iwork ), lwork-iwork+1, ierr )
1358*
1359* Copy R to WORK(IU), zeroing out below it
1360*
1361 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1362 $ ldwrku )
1363 CALL slaset( 'L', n-1, n-1, zero, zero,
1364 $ work( iu+1 ), ldwrku )
1365*
1366* Generate Q in A
1367* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1368*
1369 CALL sorgqr( m, n, n, a, lda, work( itau ),
1370 $ work( iwork ), lwork-iwork+1, ierr )
1371 ie = itau
1372 itauq = ie + n
1373 itaup = itauq + n
1374 iwork = itaup + n
1375*
1376* Bidiagonalize R in WORK(IU), copying result to VT
1377* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1378*
1379 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1380 $ work( ie ), work( itauq ),
1381 $ work( itaup ), work( iwork ),
1382 $ lwork-iwork+1, ierr )
1383 CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1384 $ ldvt )
1385*
1386* Generate left bidiagonalizing vectors in WORK(IU)
1387* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1388*
1389 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1390 $ work( itauq ), work( iwork ),
1391 $ lwork-iwork+1, ierr )
1392*
1393* Generate right bidiagonalizing vectors in VT
1394* (Workspace: need N*N+4*N-1,
1395* prefer N*N+3*N+(N-1)*NB)
1396*
1397 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1398 $ work( iwork ), lwork-iwork+1, ierr )
1399 iwork = ie + n
1400*
1401* Perform bidiagonal QR iteration, computing left
1402* singular vectors of R in WORK(IU) and computing
1403* right singular vectors of R in VT
1404* (Workspace: need N*N+BDSPAC)
1405*
1406 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1407 $ ldvt, work( iu ), ldwrku, dum, 1,
1408 $ work( iwork ), info )
1409*
1410* Multiply Q in A by left singular vectors of R in
1411* WORK(IU), storing result in U
1412* (Workspace: need N*N)
1413*
1414 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1415 $ work( iu ), ldwrku, zero, u, ldu )
1416*
1417 ELSE
1418*
1419* Insufficient workspace for a fast algorithm
1420*
1421 itau = 1
1422 iwork = itau + n
1423*
1424* Compute A=Q*R, copying result to U
1425* (Workspace: need 2*N, prefer N+N*NB)
1426*
1427 CALL sgeqrf( m, n, a, lda, work( itau ),
1428 $ work( iwork ), lwork-iwork+1, ierr )
1429 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1430*
1431* Generate Q in U
1432* (Workspace: need 2*N, prefer N+N*NB)
1433*
1434 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1435 $ work( iwork ), lwork-iwork+1, ierr )
1436*
1437* Copy R to VT, zeroing out below it
1438*
1439 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1440 IF( n.GT.1 )
1441 $ CALL slaset( 'L', n-1, n-1, zero, zero,
1442 $ vt( 2, 1 ), ldvt )
1443 ie = itau
1444 itauq = ie + n
1445 itaup = itauq + n
1446 iwork = itaup + n
1447*
1448* Bidiagonalize R in VT
1449* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1450*
1451 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1452 $ work( itauq ), work( itaup ),
1453 $ work( iwork ), lwork-iwork+1, ierr )
1454*
1455* Multiply Q in U by left bidiagonalizing vectors
1456* in VT
1457* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1458*
1459 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1460 $ work( itauq ), u, ldu, work( iwork ),
1461 $ lwork-iwork+1, ierr )
1462*
1463* Generate right bidiagonalizing vectors in VT
1464* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1465*
1466 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1467 $ work( iwork ), lwork-iwork+1, ierr )
1468 iwork = ie + n
1469*
1470* Perform bidiagonal QR iteration, computing left
1471* singular vectors of A in U and computing right
1472* singular vectors of A in VT
1473* (Workspace: need BDSPAC)
1474*
1475 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1476 $ ldvt, u, ldu, dum, 1, work( iwork ),
1477 $ info )
1478*
1479 END IF
1480*
1481 END IF
1482*
1483 ELSE IF( wntua ) THEN
1484*
1485 IF( wntvn ) THEN
1486*
1487* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1488* M left singular vectors to be computed in U and
1489* no right singular vectors to be computed
1490*
1491 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1492*
1493* Sufficient workspace for a fast algorithm
1494*
1495 ir = 1
1496 IF( lwork.GE.wrkbl+lda*n ) THEN
1497*
1498* WORK(IR) is LDA by N
1499*
1500 ldwrkr = lda
1501 ELSE
1502*
1503* WORK(IR) is N by N
1504*
1505 ldwrkr = n
1506 END IF
1507 itau = ir + ldwrkr*n
1508 iwork = itau + n
1509*
1510* Compute A=Q*R, copying result to U
1511* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1512*
1513 CALL sgeqrf( m, n, a, lda, work( itau ),
1514 $ work( iwork ), lwork-iwork+1, ierr )
1515 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1516*
1517* Copy R to WORK(IR), zeroing out below it
1518*
1519 CALL slacpy( 'U', n, n, a, lda, work( ir ),
1520 $ ldwrkr )
1521 CALL slaset( 'L', n-1, n-1, zero, zero,
1522 $ work( ir+1 ), ldwrkr )
1523*
1524* Generate Q in U
1525* (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1526*
1527 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1528 $ work( iwork ), lwork-iwork+1, ierr )
1529 ie = itau
1530 itauq = ie + n
1531 itaup = itauq + n
1532 iwork = itaup + n
1533*
1534* Bidiagonalize R in WORK(IR)
1535* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1536*
1537 CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1538 $ work( ie ), work( itauq ),
1539 $ work( itaup ), work( iwork ),
1540 $ lwork-iwork+1, ierr )
1541*
1542* Generate left bidiagonalizing vectors in WORK(IR)
1543* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1544*
1545 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1546 $ work( itauq ), work( iwork ),
1547 $ lwork-iwork+1, ierr )
1548 iwork = ie + n
1549*
1550* Perform bidiagonal QR iteration, computing left
1551* singular vectors of R in WORK(IR)
1552* (Workspace: need N*N+BDSPAC)
1553*
1554 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1555 $ 1, work( ir ), ldwrkr, dum, 1,
1556 $ work( iwork ), info )
1557*
1558* Multiply Q in U by left singular vectors of R in
1559* WORK(IR), storing result in A
1560* (Workspace: need N*N)
1561*
1562 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1563 $ work( ir ), ldwrkr, zero, a, lda )
1564*
1565* Copy left singular vectors of A from A to U
1566*
1567 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1568*
1569 ELSE
1570*
1571* Insufficient workspace for a fast algorithm
1572*
1573 itau = 1
1574 iwork = itau + n
1575*
1576* Compute A=Q*R, copying result to U
1577* (Workspace: need 2*N, prefer N+N*NB)
1578*
1579 CALL sgeqrf( m, n, a, lda, work( itau ),
1580 $ work( iwork ), lwork-iwork+1, ierr )
1581 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1582*
1583* Generate Q in U
1584* (Workspace: need N+M, prefer N+M*NB)
1585*
1586 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1587 $ work( iwork ), lwork-iwork+1, ierr )
1588 ie = itau
1589 itauq = ie + n
1590 itaup = itauq + n
1591 iwork = itaup + n
1592*
1593* Zero out below R in A
1594*
1595 IF( n .GT. 1 ) THEN
1596 CALL slaset( 'L', n-1, n-1, zero, zero,
1597 $ a( 2, 1 ), lda )
1598 END IF
1599*
1600* Bidiagonalize R in A
1601* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1602*
1603 CALL sgebrd( n, n, a, lda, s, work( ie ),
1604 $ work( itauq ), work( itaup ),
1605 $ work( iwork ), lwork-iwork+1, ierr )
1606*
1607* Multiply Q in U by left bidiagonalizing vectors
1608* in A
1609* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1610*
1611 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1612 $ work( itauq ), u, ldu, work( iwork ),
1613 $ lwork-iwork+1, ierr )
1614 iwork = ie + n
1615*
1616* Perform bidiagonal QR iteration, computing left
1617* singular vectors of A in U
1618* (Workspace: need BDSPAC)
1619*
1620 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1621 $ 1, u, ldu, dum, 1, work( iwork ),
1622 $ info )
1623*
1624 END IF
1625*
1626 ELSE IF( wntvo ) THEN
1627*
1628* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1629* M left singular vectors to be computed in U and
1630* N right singular vectors to be overwritten on A
1631*
1632 IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1633*
1634* Sufficient workspace for a fast algorithm
1635*
1636 iu = 1
1637 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1638*
1639* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1640*
1641 ldwrku = lda
1642 ir = iu + ldwrku*n
1643 ldwrkr = lda
1644 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1645*
1646* WORK(IU) is LDA by N and WORK(IR) is N by N
1647*
1648 ldwrku = lda
1649 ir = iu + ldwrku*n
1650 ldwrkr = n
1651 ELSE
1652*
1653* WORK(IU) is N by N and WORK(IR) is N by N
1654*
1655 ldwrku = n
1656 ir = iu + ldwrku*n
1657 ldwrkr = n
1658 END IF
1659 itau = ir + ldwrkr*n
1660 iwork = itau + n
1661*
1662* Compute A=Q*R, copying result to U
1663* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1664*
1665 CALL sgeqrf( m, n, a, lda, work( itau ),
1666 $ work( iwork ), lwork-iwork+1, ierr )
1667 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1668*
1669* Generate Q in U
1670* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1671*
1672 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1673 $ work( iwork ), lwork-iwork+1, ierr )
1674*
1675* Copy R to WORK(IU), zeroing out below it
1676*
1677 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1678 $ ldwrku )
1679 CALL slaset( 'L', n-1, n-1, zero, zero,
1680 $ work( iu+1 ), ldwrku )
1681 ie = itau
1682 itauq = ie + n
1683 itaup = itauq + n
1684 iwork = itaup + n
1685*
1686* Bidiagonalize R in WORK(IU), copying result to
1687* WORK(IR)
1688* (Workspace: need 2*N*N+4*N,
1689* prefer 2*N*N+3*N+2*N*NB)
1690*
1691 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1692 $ work( ie ), work( itauq ),
1693 $ work( itaup ), work( iwork ),
1694 $ lwork-iwork+1, ierr )
1695 CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1696 $ work( ir ), ldwrkr )
1697*
1698* Generate left bidiagonalizing vectors in WORK(IU)
1699* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1700*
1701 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1702 $ work( itauq ), work( iwork ),
1703 $ lwork-iwork+1, ierr )
1704*
1705* Generate right bidiagonalizing vectors in WORK(IR)
1706* (Workspace: need 2*N*N+4*N-1,
1707* prefer 2*N*N+3*N+(N-1)*NB)
1708*
1709 CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1710 $ work( itaup ), work( iwork ),
1711 $ lwork-iwork+1, ierr )
1712 iwork = ie + n
1713*
1714* Perform bidiagonal QR iteration, computing left
1715* singular vectors of R in WORK(IU) and computing
1716* right singular vectors of R in WORK(IR)
1717* (Workspace: need 2*N*N+BDSPAC)
1718*
1719 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1720 $ work( ir ), ldwrkr, work( iu ),
1721 $ ldwrku, dum, 1, work( iwork ), info )
1722*
1723* Multiply Q in U by left singular vectors of R in
1724* WORK(IU), storing result in A
1725* (Workspace: need N*N)
1726*
1727 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1728 $ work( iu ), ldwrku, zero, a, lda )
1729*
1730* Copy left singular vectors of A from A to U
1731*
1732 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1733*
1734* Copy right singular vectors of R from WORK(IR) to A
1735*
1736 CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1737 $ lda )
1738*
1739 ELSE
1740*
1741* Insufficient workspace for a fast algorithm
1742*
1743 itau = 1
1744 iwork = itau + n
1745*
1746* Compute A=Q*R, copying result to U
1747* (Workspace: need 2*N, prefer N+N*NB)
1748*
1749 CALL sgeqrf( m, n, a, lda, work( itau ),
1750 $ work( iwork ), lwork-iwork+1, ierr )
1751 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1752*
1753* Generate Q in U
1754* (Workspace: need N+M, prefer N+M*NB)
1755*
1756 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1757 $ work( iwork ), lwork-iwork+1, ierr )
1758 ie = itau
1759 itauq = ie + n
1760 itaup = itauq + n
1761 iwork = itaup + n
1762*
1763* Zero out below R in A
1764*
1765 IF( n .GT. 1 ) THEN
1766 CALL slaset( 'L', n-1, n-1, zero, zero,
1767 $ a( 2, 1 ), lda )
1768 END IF
1769*
1770* Bidiagonalize R in A
1771* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1772*
1773 CALL sgebrd( n, n, a, lda, s, work( ie ),
1774 $ work( itauq ), work( itaup ),
1775 $ work( iwork ), lwork-iwork+1, ierr )
1776*
1777* Multiply Q in U by left bidiagonalizing vectors
1778* in A
1779* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1780*
1781 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1782 $ work( itauq ), u, ldu, work( iwork ),
1783 $ lwork-iwork+1, ierr )
1784*
1785* Generate right bidiagonalizing vectors in A
1786* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1787*
1788 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1789 $ work( iwork ), lwork-iwork+1, ierr )
1790 iwork = ie + n
1791*
1792* Perform bidiagonal QR iteration, computing left
1793* singular vectors of A in U and computing right
1794* singular vectors of A in A
1795* (Workspace: need BDSPAC)
1796*
1797 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1798 $ lda, u, ldu, dum, 1, work( iwork ),
1799 $ info )
1800*
1801 END IF
1802*
1803 ELSE IF( wntvas ) THEN
1804*
1805* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1806* or 'A')
1807* M left singular vectors to be computed in U and
1808* N right singular vectors to be computed in VT
1809*
1810 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1811*
1812* Sufficient workspace for a fast algorithm
1813*
1814 iu = 1
1815 IF( lwork.GE.wrkbl+lda*n ) THEN
1816*
1817* WORK(IU) is LDA by N
1818*
1819 ldwrku = lda
1820 ELSE
1821*
1822* WORK(IU) is N by N
1823*
1824 ldwrku = n
1825 END IF
1826 itau = iu + ldwrku*n
1827 iwork = itau + n
1828*
1829* Compute A=Q*R, copying result to U
1830* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1831*
1832 CALL sgeqrf( m, n, a, lda, work( itau ),
1833 $ work( iwork ), lwork-iwork+1, ierr )
1834 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1835*
1836* Generate Q in U
1837* (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1838*
1839 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1840 $ work( iwork ), lwork-iwork+1, ierr )
1841*
1842* Copy R to WORK(IU), zeroing out below it
1843*
1844 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1845 $ ldwrku )
1846 CALL slaset( 'L', n-1, n-1, zero, zero,
1847 $ work( iu+1 ), ldwrku )
1848 ie = itau
1849 itauq = ie + n
1850 itaup = itauq + n
1851 iwork = itaup + n
1852*
1853* Bidiagonalize R in WORK(IU), copying result to VT
1854* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1855*
1856 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1857 $ work( ie ), work( itauq ),
1858 $ work( itaup ), work( iwork ),
1859 $ lwork-iwork+1, ierr )
1860 CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1861 $ ldvt )
1862*
1863* Generate left bidiagonalizing vectors in WORK(IU)
1864* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1865*
1866 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1867 $ work( itauq ), work( iwork ),
1868 $ lwork-iwork+1, ierr )
1869*
1870* Generate right bidiagonalizing vectors in VT
1871* (Workspace: need N*N+4*N-1,
1872* prefer N*N+3*N+(N-1)*NB)
1873*
1874 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1875 $ work( iwork ), lwork-iwork+1, ierr )
1876 iwork = ie + n
1877*
1878* Perform bidiagonal QR iteration, computing left
1879* singular vectors of R in WORK(IU) and computing
1880* right singular vectors of R in VT
1881* (Workspace: need N*N+BDSPAC)
1882*
1883 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1884 $ ldvt, work( iu ), ldwrku, dum, 1,
1885 $ work( iwork ), info )
1886*
1887* Multiply Q in U by left singular vectors of R in
1888* WORK(IU), storing result in A
1889* (Workspace: need N*N)
1890*
1891 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1892 $ work( iu ), ldwrku, zero, a, lda )
1893*
1894* Copy left singular vectors of A from A to U
1895*
1896 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1897*
1898 ELSE
1899*
1900* Insufficient workspace for a fast algorithm
1901*
1902 itau = 1
1903 iwork = itau + n
1904*
1905* Compute A=Q*R, copying result to U
1906* (Workspace: need 2*N, prefer N+N*NB)
1907*
1908 CALL sgeqrf( m, n, a, lda, work( itau ),
1909 $ work( iwork ), lwork-iwork+1, ierr )
1910 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1911*
1912* Generate Q in U
1913* (Workspace: need N+M, prefer N+M*NB)
1914*
1915 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1916 $ work( iwork ), lwork-iwork+1, ierr )
1917*
1918* Copy R from A to VT, zeroing out below it
1919*
1920 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1921 IF( n.GT.1 )
1922 $ CALL slaset( 'L', n-1, n-1, zero, zero,
1923 $ vt( 2, 1 ), ldvt )
1924 ie = itau
1925 itauq = ie + n
1926 itaup = itauq + n
1927 iwork = itaup + n
1928*
1929* Bidiagonalize R in VT
1930* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1931*
1932 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1933 $ work( itauq ), work( itaup ),
1934 $ work( iwork ), lwork-iwork+1, ierr )
1935*
1936* Multiply Q in U by left bidiagonalizing vectors
1937* in VT
1938* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1939*
1940 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1941 $ work( itauq ), u, ldu, work( iwork ),
1942 $ lwork-iwork+1, ierr )
1943*
1944* Generate right bidiagonalizing vectors in VT
1945* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1946*
1947 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1948 $ work( iwork ), lwork-iwork+1, ierr )
1949 iwork = ie + n
1950*
1951* Perform bidiagonal QR iteration, computing left
1952* singular vectors of A in U and computing right
1953* singular vectors of A in VT
1954* (Workspace: need BDSPAC)
1955*
1956 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1957 $ ldvt, u, ldu, dum, 1, work( iwork ),
1958 $ info )
1959*
1960 END IF
1961*
1962 END IF
1963*
1964 END IF
1965*
1966 ELSE
1967*
1968* M .LT. MNTHR
1969*
1970* Path 10 (M at least N, but not much larger)
1971* Reduce to bidiagonal form without QR decomposition
1972*
1973 ie = 1
1974 itauq = ie + n
1975 itaup = itauq + n
1976 iwork = itaup + n
1977*
1978* Bidiagonalize A
1979* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1980*
1981 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1982 $ work( itaup ), work( iwork ), lwork-iwork+1,
1983 $ ierr )
1984 IF( wntuas ) THEN
1985*
1986* If left singular vectors desired in U, copy result to U
1987* and generate left bidiagonalizing vectors in U
1988* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1989*
1990 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1991 IF( wntus )
1992 $ ncu = n
1993 IF( wntua )
1994 $ ncu = m
1995 CALL sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1996 $ work( iwork ), lwork-iwork+1, ierr )
1997 END IF
1998 IF( wntvas ) THEN
1999*
2000* If right singular vectors desired in VT, copy result to
2001* VT and generate right bidiagonalizing vectors in VT
2002* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2003*
2004 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
2005 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2006 $ work( iwork ), lwork-iwork+1, ierr )
2007 END IF
2008 IF( wntuo ) THEN
2009*
2010* If left singular vectors desired in A, generate left
2011* bidiagonalizing vectors in A
2012* (Workspace: need 4*N, prefer 3*N+N*NB)
2013*
2014 CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2015 $ work( iwork ), lwork-iwork+1, ierr )
2016 END IF
2017 IF( wntvo ) THEN
2018*
2019* If right singular vectors desired in A, generate right
2020* bidiagonalizing vectors in A
2021* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2022*
2023 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
2024 $ work( iwork ), lwork-iwork+1, ierr )
2025 END IF
2026 iwork = ie + n
2027 IF( wntuas .OR. wntuo )
2028 $ nru = m
2029 IF( wntun )
2030 $ nru = 0
2031 IF( wntvas .OR. wntvo )
2032 $ ncvt = n
2033 IF( wntvn )
2034 $ ncvt = 0
2035 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2036*
2037* Perform bidiagonal QR iteration, if desired, computing
2038* left singular vectors in U and computing right singular
2039* vectors in VT
2040* (Workspace: need BDSPAC)
2041*
2042 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2043 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2044 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2045*
2046* Perform bidiagonal QR iteration, if desired, computing
2047* left singular vectors in U and computing right singular
2048* vectors in A
2049* (Workspace: need BDSPAC)
2050*
2051 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2052 $ u, ldu, dum, 1, work( iwork ), info )
2053 ELSE
2054*
2055* Perform bidiagonal QR iteration, if desired, computing
2056* left singular vectors in A and computing right singular
2057* vectors in VT
2058* (Workspace: need BDSPAC)
2059*
2060 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2061 $ ldvt, a, lda, dum, 1, work( iwork ), info )
2062 END IF
2063*
2064 END IF
2065*
2066 ELSE
2067*
2068* A has more columns than rows. If A has sufficiently more
2069* columns than rows, first reduce using the LQ decomposition (if
2070* sufficient workspace available)
2071*
2072 IF( n.GE.mnthr ) THEN
2073*
2074 IF( wntvn ) THEN
2075*
2076* Path 1t(N much larger than M, JOBVT='N')
2077* No right singular vectors to be computed
2078*
2079 itau = 1
2080 iwork = itau + m
2081*
2082* Compute A=L*Q
2083* (Workspace: need 2*M, prefer M+M*NB)
2084*
2085 CALL sgelqf( m, n, a, lda, work( itau ), work( iwork ),
2086 $ lwork-iwork+1, ierr )
2087*
2088* Zero out above L
2089*
2090 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2091 ie = 1
2092 itauq = ie + m
2093 itaup = itauq + m
2094 iwork = itaup + m
2095*
2096* Bidiagonalize L in A
2097* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2098*
2099 CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2100 $ work( itaup ), work( iwork ), lwork-iwork+1,
2101 $ ierr )
2102 IF( wntuo .OR. wntuas ) THEN
2103*
2104* If left singular vectors desired, generate Q
2105* (Workspace: need 4*M, prefer 3*M+M*NB)
2106*
2107 CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2108 $ work( iwork ), lwork-iwork+1, ierr )
2109 END IF
2110 iwork = ie + m
2111 nru = 0
2112 IF( wntuo .OR. wntuas )
2113 $ nru = m
2114*
2115* Perform bidiagonal QR iteration, computing left singular
2116* vectors of A in A if desired
2117* (Workspace: need BDSPAC)
2118*
2119 CALL sbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2120 $ lda, dum, 1, work( iwork ), info )
2121*
2122* If left singular vectors desired in U, copy them there
2123*
2124 IF( wntuas )
2125 $ CALL slacpy( 'F', m, m, a, lda, u, ldu )
2126*
2127 ELSE IF( wntvo .AND. wntun ) THEN
2128*
2129* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2130* M right singular vectors to be overwritten on A and
2131* no left singular vectors to be computed
2132*
2133 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2134*
2135* Sufficient workspace for a fast algorithm
2136*
2137 ir = 1
2138 IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2139*
2140* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2141*
2142 ldwrku = lda
2143 chunk = n
2144 ldwrkr = lda
2145 ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2146*
2147* WORK(IU) is LDA by N and WORK(IR) is M by M
2148*
2149 ldwrku = lda
2150 chunk = n
2151 ldwrkr = m
2152 ELSE
2153*
2154* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2155*
2156 ldwrku = m
2157 chunk = ( lwork-m*m-m ) / m
2158 ldwrkr = m
2159 END IF
2160 itau = ir + ldwrkr*m
2161 iwork = itau + m
2162*
2163* Compute A=L*Q
2164* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2165*
2166 CALL sgelqf( m, n, a, lda, work( itau ),
2167 $ work( iwork ), lwork-iwork+1, ierr )
2168*
2169* Copy L to WORK(IR) and zero out above it
2170*
2171 CALL slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2172 CALL slaset( 'U', m-1, m-1, zero, zero,
2173 $ work( ir+ldwrkr ), ldwrkr )
2174*
2175* Generate Q in A
2176* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2177*
2178 CALL sorglq( m, n, m, a, lda, work( itau ),
2179 $ work( iwork ), lwork-iwork+1, ierr )
2180 ie = itau
2181 itauq = ie + m
2182 itaup = itauq + m
2183 iwork = itaup + m
2184*
2185* Bidiagonalize L in WORK(IR)
2186* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2187*
2188 CALL sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2189 $ work( itauq ), work( itaup ),
2190 $ work( iwork ), lwork-iwork+1, ierr )
2191*
2192* Generate right vectors bidiagonalizing L
2193* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2194*
2195 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2196 $ work( itaup ), work( iwork ),
2197 $ lwork-iwork+1, ierr )
2198 iwork = ie + m
2199*
2200* Perform bidiagonal QR iteration, computing right
2201* singular vectors of L in WORK(IR)
2202* (Workspace: need M*M+BDSPAC)
2203*
2204 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2205 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2206 $ work( iwork ), info )
2207 iu = ie + m
2208*
2209* Multiply right singular vectors of L in WORK(IR) by Q
2210* in A, storing result in WORK(IU) and copying to A
2211* (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2212*
2213 DO 30 i = 1, n, chunk
2214 blk = min( n-i+1, chunk )
2215 CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2216 $ ldwrkr, a( 1, i ), lda, zero,
2217 $ work( iu ), ldwrku )
2218 CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2219 $ a( 1, i ), lda )
2220 30 CONTINUE
2221*
2222 ELSE
2223*
2224* Insufficient workspace for a fast algorithm
2225*
2226 ie = 1
2227 itauq = ie + m
2228 itaup = itauq + m
2229 iwork = itaup + m
2230*
2231* Bidiagonalize A
2232* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2233*
2234 CALL sgebrd( m, n, a, lda, s, work( ie ),
2235 $ work( itauq ), work( itaup ),
2236 $ work( iwork ), lwork-iwork+1, ierr )
2237*
2238* Generate right vectors bidiagonalizing A
2239* (Workspace: need 4*M, prefer 3*M+M*NB)
2240*
2241 CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
2242 $ work( iwork ), lwork-iwork+1, ierr )
2243 iwork = ie + m
2244*
2245* Perform bidiagonal QR iteration, computing right
2246* singular vectors of A in A
2247* (Workspace: need BDSPAC)
2248*
2249 CALL sbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2250 $ dum, 1, dum, 1, work( iwork ), info )
2251*
2252 END IF
2253*
2254 ELSE IF( wntvo .AND. wntuas ) THEN
2255*
2256* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2257* M right singular vectors to be overwritten on A and
2258* M left singular vectors to be computed in U
2259*
2260 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2261*
2262* Sufficient workspace for a fast algorithm
2263*
2264 ir = 1
2265 IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2266*
2267* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2268*
2269 ldwrku = lda
2270 chunk = n
2271 ldwrkr = lda
2272 ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2273*
2274* WORK(IU) is LDA by N and WORK(IR) is M by M
2275*
2276 ldwrku = lda
2277 chunk = n
2278 ldwrkr = m
2279 ELSE
2280*
2281* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2282*
2283 ldwrku = m
2284 chunk = ( lwork-m*m-m ) / m
2285 ldwrkr = m
2286 END IF
2287 itau = ir + ldwrkr*m
2288 iwork = itau + m
2289*
2290* Compute A=L*Q
2291* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2292*
2293 CALL sgelqf( m, n, a, lda, work( itau ),
2294 $ work( iwork ), lwork-iwork+1, ierr )
2295*
2296* Copy L to U, zeroing about above it
2297*
2298 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2299 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2300 $ ldu )
2301*
2302* Generate Q in A
2303* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2304*
2305 CALL sorglq( m, n, m, a, lda, work( itau ),
2306 $ work( iwork ), lwork-iwork+1, ierr )
2307 ie = itau
2308 itauq = ie + m
2309 itaup = itauq + m
2310 iwork = itaup + m
2311*
2312* Bidiagonalize L in U, copying result to WORK(IR)
2313* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2314*
2315 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2316 $ work( itauq ), work( itaup ),
2317 $ work( iwork ), lwork-iwork+1, ierr )
2318 CALL slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2319*
2320* Generate right vectors bidiagonalizing L in WORK(IR)
2321* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2322*
2323 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2324 $ work( itaup ), work( iwork ),
2325 $ lwork-iwork+1, ierr )
2326*
2327* Generate left vectors bidiagonalizing L in U
2328* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2329*
2330 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2331 $ work( iwork ), lwork-iwork+1, ierr )
2332 iwork = ie + m
2333*
2334* Perform bidiagonal QR iteration, computing left
2335* singular vectors of L in U, and computing right
2336* singular vectors of L in WORK(IR)
2337* (Workspace: need M*M+BDSPAC)
2338*
2339 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2340 $ work( ir ), ldwrkr, u, ldu, dum, 1,
2341 $ work( iwork ), info )
2342 iu = ie + m
2343*
2344* Multiply right singular vectors of L in WORK(IR) by Q
2345* in A, storing result in WORK(IU) and copying to A
2346* (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2347*
2348 DO 40 i = 1, n, chunk
2349 blk = min( n-i+1, chunk )
2350 CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2351 $ ldwrkr, a( 1, i ), lda, zero,
2352 $ work( iu ), ldwrku )
2353 CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2354 $ a( 1, i ), lda )
2355 40 CONTINUE
2356*
2357 ELSE
2358*
2359* Insufficient workspace for a fast algorithm
2360*
2361 itau = 1
2362 iwork = itau + m
2363*
2364* Compute A=L*Q
2365* (Workspace: need 2*M, prefer M+M*NB)
2366*
2367 CALL sgelqf( m, n, a, lda, work( itau ),
2368 $ work( iwork ), lwork-iwork+1, ierr )
2369*
2370* Copy L to U, zeroing out above it
2371*
2372 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2373 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2374 $ ldu )
2375*
2376* Generate Q in A
2377* (Workspace: need 2*M, prefer M+M*NB)
2378*
2379 CALL sorglq( m, n, m, a, lda, work( itau ),
2380 $ work( iwork ), lwork-iwork+1, ierr )
2381 ie = itau
2382 itauq = ie + m
2383 itaup = itauq + m
2384 iwork = itaup + m
2385*
2386* Bidiagonalize L in U
2387* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2388*
2389 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2390 $ work( itauq ), work( itaup ),
2391 $ work( iwork ), lwork-iwork+1, ierr )
2392*
2393* Multiply right vectors bidiagonalizing L by Q in A
2394* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2395*
2396 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2397 $ work( itaup ), a, lda, work( iwork ),
2398 $ lwork-iwork+1, ierr )
2399*
2400* Generate left vectors bidiagonalizing L in U
2401* (Workspace: need 4*M, prefer 3*M+M*NB)
2402*
2403 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2404 $ work( iwork ), lwork-iwork+1, ierr )
2405 iwork = ie + m
2406*
2407* Perform bidiagonal QR iteration, computing left
2408* singular vectors of A in U and computing right
2409* singular vectors of A in A
2410* (Workspace: need BDSPAC)
2411*
2412 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2413 $ u, ldu, dum, 1, work( iwork ), info )
2414*
2415 END IF
2416*
2417 ELSE IF( wntvs ) THEN
2418*
2419 IF( wntun ) THEN
2420*
2421* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2422* M right singular vectors to be computed in VT and
2423* no left singular vectors to be computed
2424*
2425 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2426*
2427* Sufficient workspace for a fast algorithm
2428*
2429 ir = 1
2430 IF( lwork.GE.wrkbl+lda*m ) THEN
2431*
2432* WORK(IR) is LDA by M
2433*
2434 ldwrkr = lda
2435 ELSE
2436*
2437* WORK(IR) is M by M
2438*
2439 ldwrkr = m
2440 END IF
2441 itau = ir + ldwrkr*m
2442 iwork = itau + m
2443*
2444* Compute A=L*Q
2445* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2446*
2447 CALL sgelqf( m, n, a, lda, work( itau ),
2448 $ work( iwork ), lwork-iwork+1, ierr )
2449*
2450* Copy L to WORK(IR), zeroing out above it
2451*
2452 CALL slacpy( 'L', m, m, a, lda, work( ir ),
2453 $ ldwrkr )
2454 CALL slaset( 'U', m-1, m-1, zero, zero,
2455 $ work( ir+ldwrkr ), ldwrkr )
2456*
2457* Generate Q in A
2458* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2459*
2460 CALL sorglq( m, n, m, a, lda, work( itau ),
2461 $ work( iwork ), lwork-iwork+1, ierr )
2462 ie = itau
2463 itauq = ie + m
2464 itaup = itauq + m
2465 iwork = itaup + m
2466*
2467* Bidiagonalize L in WORK(IR)
2468* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2469*
2470 CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2471 $ work( ie ), work( itauq ),
2472 $ work( itaup ), work( iwork ),
2473 $ lwork-iwork+1, ierr )
2474*
2475* Generate right vectors bidiagonalizing L in
2476* WORK(IR)
2477* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2478*
2479 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2480 $ work( itaup ), work( iwork ),
2481 $ lwork-iwork+1, ierr )
2482 iwork = ie + m
2483*
2484* Perform bidiagonal QR iteration, computing right
2485* singular vectors of L in WORK(IR)
2486* (Workspace: need M*M+BDSPAC)
2487*
2488 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2489 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2490 $ work( iwork ), info )
2491*
2492* Multiply right singular vectors of L in WORK(IR) by
2493* Q in A, storing result in VT
2494* (Workspace: need M*M)
2495*
2496 CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2497 $ ldwrkr, a, lda, zero, vt, ldvt )
2498*
2499 ELSE
2500*
2501* Insufficient workspace for a fast algorithm
2502*
2503 itau = 1
2504 iwork = itau + m
2505*
2506* Compute A=L*Q
2507* (Workspace: need 2*M, prefer M+M*NB)
2508*
2509 CALL sgelqf( m, n, a, lda, work( itau ),
2510 $ work( iwork ), lwork-iwork+1, ierr )
2511*
2512* Copy result to VT
2513*
2514 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2515*
2516* Generate Q in VT
2517* (Workspace: need 2*M, prefer M+M*NB)
2518*
2519 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2520 $ work( iwork ), lwork-iwork+1, ierr )
2521 ie = itau
2522 itauq = ie + m
2523 itaup = itauq + m
2524 iwork = itaup + m
2525*
2526* Zero out above L in A
2527*
2528 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2529 $ lda )
2530*
2531* Bidiagonalize L in A
2532* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2533*
2534 CALL sgebrd( m, m, a, lda, s, work( ie ),
2535 $ work( itauq ), work( itaup ),
2536 $ work( iwork ), lwork-iwork+1, ierr )
2537*
2538* Multiply right vectors bidiagonalizing L by Q in VT
2539* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2540*
2541 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2542 $ work( itaup ), vt, ldvt,
2543 $ work( iwork ), lwork-iwork+1, ierr )
2544 iwork = ie + m
2545*
2546* Perform bidiagonal QR iteration, computing right
2547* singular vectors of A in VT
2548* (Workspace: need BDSPAC)
2549*
2550 CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2551 $ ldvt, dum, 1, dum, 1, work( iwork ),
2552 $ info )
2553*
2554 END IF
2555*
2556 ELSE IF( wntuo ) THEN
2557*
2558* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2559* M right singular vectors to be computed in VT and
2560* M left singular vectors to be overwritten on A
2561*
2562 IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2563*
2564* Sufficient workspace for a fast algorithm
2565*
2566 iu = 1
2567 IF( lwork.GE.wrkbl+2*lda*m ) THEN
2568*
2569* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2570*
2571 ldwrku = lda
2572 ir = iu + ldwrku*m
2573 ldwrkr = lda
2574 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2575*
2576* WORK(IU) is LDA by M and WORK(IR) is M by M
2577*
2578 ldwrku = lda
2579 ir = iu + ldwrku*m
2580 ldwrkr = m
2581 ELSE
2582*
2583* WORK(IU) is M by M and WORK(IR) is M by M
2584*
2585 ldwrku = m
2586 ir = iu + ldwrku*m
2587 ldwrkr = m
2588 END IF
2589 itau = ir + ldwrkr*m
2590 iwork = itau + m
2591*
2592* Compute A=L*Q
2593* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2594*
2595 CALL sgelqf( m, n, a, lda, work( itau ),
2596 $ work( iwork ), lwork-iwork+1, ierr )
2597*
2598* Copy L to WORK(IU), zeroing out below it
2599*
2600 CALL slacpy( 'L', m, m, a, lda, work( iu ),
2601 $ ldwrku )
2602 CALL slaset( 'U', m-1, m-1, zero, zero,
2603 $ work( iu+ldwrku ), ldwrku )
2604*
2605* Generate Q in A
2606* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2607*
2608 CALL sorglq( m, n, m, a, lda, work( itau ),
2609 $ work( iwork ), lwork-iwork+1, ierr )
2610 ie = itau
2611 itauq = ie + m
2612 itaup = itauq + m
2613 iwork = itaup + m
2614*
2615* Bidiagonalize L in WORK(IU), copying result to
2616* WORK(IR)
2617* (Workspace: need 2*M*M+4*M,
2618* prefer 2*M*M+3*M+2*M*NB)
2619*
2620 CALL sgebrd( m, m, work( iu ), ldwrku, s,
2621 $ work( ie ), work( itauq ),
2622 $ work( itaup ), work( iwork ),
2623 $ lwork-iwork+1, ierr )
2624 CALL slacpy( 'L', m, m, work( iu ), ldwrku,
2625 $ work( ir ), ldwrkr )
2626*
2627* Generate right bidiagonalizing vectors in WORK(IU)
2628* (Workspace: need 2*M*M+4*M-1,
2629* prefer 2*M*M+3*M+(M-1)*NB)
2630*
2631 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2632 $ work( itaup ), work( iwork ),
2633 $ lwork-iwork+1, ierr )
2634*
2635* Generate left bidiagonalizing vectors in WORK(IR)
2636* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2637*
2638 CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2639 $ work( itauq ), work( iwork ),
2640 $ lwork-iwork+1, ierr )
2641 iwork = ie + m
2642*
2643* Perform bidiagonal QR iteration, computing left
2644* singular vectors of L in WORK(IR) and computing
2645* right singular vectors of L in WORK(IU)
2646* (Workspace: need 2*M*M+BDSPAC)
2647*
2648 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2649 $ work( iu ), ldwrku, work( ir ),
2650 $ ldwrkr, dum, 1, work( iwork ), info )
2651*
2652* Multiply right singular vectors of L in WORK(IU) by
2653* Q in A, storing result in VT
2654* (Workspace: need M*M)
2655*
2656 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2657 $ ldwrku, a, lda, zero, vt, ldvt )
2658*
2659* Copy left singular vectors of L to A
2660* (Workspace: need M*M)
2661*
2662 CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
2663 $ lda )
2664*
2665 ELSE
2666*
2667* Insufficient workspace for a fast algorithm
2668*
2669 itau = 1
2670 iwork = itau + m
2671*
2672* Compute A=L*Q, copying result to VT
2673* (Workspace: need 2*M, prefer M+M*NB)
2674*
2675 CALL sgelqf( m, n, a, lda, work( itau ),
2676 $ work( iwork ), lwork-iwork+1, ierr )
2677 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2678*
2679* Generate Q in VT
2680* (Workspace: need 2*M, prefer M+M*NB)
2681*
2682 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2683 $ work( iwork ), lwork-iwork+1, ierr )
2684 ie = itau
2685 itauq = ie + m
2686 itaup = itauq + m
2687 iwork = itaup + m
2688*
2689* Zero out above L in A
2690*
2691 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2692 $ lda )
2693*
2694* Bidiagonalize L in A
2695* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2696*
2697 CALL sgebrd( m, m, a, lda, s, work( ie ),
2698 $ work( itauq ), work( itaup ),
2699 $ work( iwork ), lwork-iwork+1, ierr )
2700*
2701* Multiply right vectors bidiagonalizing L by Q in VT
2702* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2703*
2704 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2705 $ work( itaup ), vt, ldvt,
2706 $ work( iwork ), lwork-iwork+1, ierr )
2707*
2708* Generate left bidiagonalizing vectors of L in A
2709* (Workspace: need 4*M, prefer 3*M+M*NB)
2710*
2711 CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2712 $ work( iwork ), lwork-iwork+1, ierr )
2713 iwork = ie + m
2714*
2715* Perform bidiagonal QR iteration, compute left
2716* singular vectors of A in A and compute right
2717* singular vectors of A in VT
2718* (Workspace: need BDSPAC)
2719*
2720 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2721 $ ldvt, a, lda, dum, 1, work( iwork ),
2722 $ info )
2723*
2724 END IF
2725*
2726 ELSE IF( wntuas ) THEN
2727*
2728* Path 6t(N much larger than M, JOBU='S' or 'A',
2729* JOBVT='S')
2730* M right singular vectors to be computed in VT and
2731* M left singular vectors to be computed in U
2732*
2733 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2734*
2735* Sufficient workspace for a fast algorithm
2736*
2737 iu = 1
2738 IF( lwork.GE.wrkbl+lda*m ) THEN
2739*
2740* WORK(IU) is LDA by N
2741*
2742 ldwrku = lda
2743 ELSE
2744*
2745* WORK(IU) is LDA by M
2746*
2747 ldwrku = m
2748 END IF
2749 itau = iu + ldwrku*m
2750 iwork = itau + m
2751*
2752* Compute A=L*Q
2753* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2754*
2755 CALL sgelqf( m, n, a, lda, work( itau ),
2756 $ work( iwork ), lwork-iwork+1, ierr )
2757*
2758* Copy L to WORK(IU), zeroing out above it
2759*
2760 CALL slacpy( 'L', m, m, a, lda, work( iu ),
2761 $ ldwrku )
2762 CALL slaset( 'U', m-1, m-1, zero, zero,
2763 $ work( iu+ldwrku ), ldwrku )
2764*
2765* Generate Q in A
2766* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2767*
2768 CALL sorglq( m, n, m, a, lda, work( itau ),
2769 $ work( iwork ), lwork-iwork+1, ierr )
2770 ie = itau
2771 itauq = ie + m
2772 itaup = itauq + m
2773 iwork = itaup + m
2774*
2775* Bidiagonalize L in WORK(IU), copying result to U
2776* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2777*
2778 CALL sgebrd( m, m, work( iu ), ldwrku, s,
2779 $ work( ie ), work( itauq ),
2780 $ work( itaup ), work( iwork ),
2781 $ lwork-iwork+1, ierr )
2782 CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
2783 $ ldu )
2784*
2785* Generate right bidiagonalizing vectors in WORK(IU)
2786* (Workspace: need M*M+4*M-1,
2787* prefer M*M+3*M+(M-1)*NB)
2788*
2789 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2790 $ work( itaup ), work( iwork ),
2791 $ lwork-iwork+1, ierr )
2792*
2793* Generate left bidiagonalizing vectors in U
2794* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2795*
2796 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2797 $ work( iwork ), lwork-iwork+1, ierr )
2798 iwork = ie + m
2799*
2800* Perform bidiagonal QR iteration, computing left
2801* singular vectors of L in U and computing right
2802* singular vectors of L in WORK(IU)
2803* (Workspace: need M*M+BDSPAC)
2804*
2805 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2806 $ work( iu ), ldwrku, u, ldu, dum, 1,
2807 $ work( iwork ), info )
2808*
2809* Multiply right singular vectors of L in WORK(IU) by
2810* Q in A, storing result in VT
2811* (Workspace: need M*M)
2812*
2813 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2814 $ ldwrku, a, lda, zero, vt, ldvt )
2815*
2816 ELSE
2817*
2818* Insufficient workspace for a fast algorithm
2819*
2820 itau = 1
2821 iwork = itau + m
2822*
2823* Compute A=L*Q, copying result to VT
2824* (Workspace: need 2*M, prefer M+M*NB)
2825*
2826 CALL sgelqf( m, n, a, lda, work( itau ),
2827 $ work( iwork ), lwork-iwork+1, ierr )
2828 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2829*
2830* Generate Q in VT
2831* (Workspace: need 2*M, prefer M+M*NB)
2832*
2833 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2834 $ work( iwork ), lwork-iwork+1, ierr )
2835*
2836* Copy L to U, zeroing out above it
2837*
2838 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2839 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2840 $ ldu )
2841 ie = itau
2842 itauq = ie + m
2843 itaup = itauq + m
2844 iwork = itaup + m
2845*
2846* Bidiagonalize L in U
2847* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2848*
2849 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2850 $ work( itauq ), work( itaup ),
2851 $ work( iwork ), lwork-iwork+1, ierr )
2852*
2853* Multiply right bidiagonalizing vectors in U by Q
2854* in VT
2855* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2856*
2857 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2858 $ work( itaup ), vt, ldvt,
2859 $ work( iwork ), lwork-iwork+1, ierr )
2860*
2861* Generate left bidiagonalizing vectors in U
2862* (Workspace: need 4*M, prefer 3*M+M*NB)
2863*
2864 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2865 $ work( iwork ), lwork-iwork+1, ierr )
2866 iwork = ie + m
2867*
2868* Perform bidiagonal QR iteration, computing left
2869* singular vectors of A in U and computing right
2870* singular vectors of A in VT
2871* (Workspace: need BDSPAC)
2872*
2873 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2874 $ ldvt, u, ldu, dum, 1, work( iwork ),
2875 $ info )
2876*
2877 END IF
2878*
2879 END IF
2880*
2881 ELSE IF( wntva ) THEN
2882*
2883 IF( wntun ) THEN
2884*
2885* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2886* N right singular vectors to be computed in VT and
2887* no left singular vectors to be computed
2888*
2889 IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
2890*
2891* Sufficient workspace for a fast algorithm
2892*
2893 ir = 1
2894 IF( lwork.GE.wrkbl+lda*m ) THEN
2895*
2896* WORK(IR) is LDA by M
2897*
2898 ldwrkr = lda
2899 ELSE
2900*
2901* WORK(IR) is M by M
2902*
2903 ldwrkr = m
2904 END IF
2905 itau = ir + ldwrkr*m
2906 iwork = itau + m
2907*
2908* Compute A=L*Q, copying result to VT
2909* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2910*
2911 CALL sgelqf( m, n, a, lda, work( itau ),
2912 $ work( iwork ), lwork-iwork+1, ierr )
2913 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2914*
2915* Copy L to WORK(IR), zeroing out above it
2916*
2917 CALL slacpy( 'L', m, m, a, lda, work( ir ),
2918 $ ldwrkr )
2919 CALL slaset( 'U', m-1, m-1, zero, zero,
2920 $ work( ir+ldwrkr ), ldwrkr )
2921*
2922* Generate Q in VT
2923* (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2924*
2925 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2926 $ work( iwork ), lwork-iwork+1, ierr )
2927 ie = itau
2928 itauq = ie + m
2929 itaup = itauq + m
2930 iwork = itaup + m
2931*
2932* Bidiagonalize L in WORK(IR)
2933* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2934*
2935 CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2936 $ work( ie ), work( itauq ),
2937 $ work( itaup ), work( iwork ),
2938 $ lwork-iwork+1, ierr )
2939*
2940* Generate right bidiagonalizing vectors in WORK(IR)
2941* (Workspace: need M*M+4*M-1,
2942* prefer M*M+3*M+(M-1)*NB)
2943*
2944 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2945 $ work( itaup ), work( iwork ),
2946 $ lwork-iwork+1, ierr )
2947 iwork = ie + m
2948*
2949* Perform bidiagonal QR iteration, computing right
2950* singular vectors of L in WORK(IR)
2951* (Workspace: need M*M+BDSPAC)
2952*
2953 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2954 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2955 $ work( iwork ), info )
2956*
2957* Multiply right singular vectors of L in WORK(IR) by
2958* Q in VT, storing result in A
2959* (Workspace: need M*M)
2960*
2961 CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2962 $ ldwrkr, vt, ldvt, zero, a, lda )
2963*
2964* Copy right singular vectors of A from A to VT
2965*
2966 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
2967*
2968 ELSE
2969*
2970* Insufficient workspace for a fast algorithm
2971*
2972 itau = 1
2973 iwork = itau + m
2974*
2975* Compute A=L*Q, copying result to VT
2976* (Workspace: need 2*M, prefer M+M*NB)
2977*
2978 CALL sgelqf( m, n, a, lda, work( itau ),
2979 $ work( iwork ), lwork-iwork+1, ierr )
2980 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2981*
2982* Generate Q in VT
2983* (Workspace: need M+N, prefer M+N*NB)
2984*
2985 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2986 $ work( iwork ), lwork-iwork+1, ierr )
2987 ie = itau
2988 itauq = ie + m
2989 itaup = itauq + m
2990 iwork = itaup + m
2991*
2992* Zero out above L in A
2993*
2994 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2995 $ lda )
2996*
2997* Bidiagonalize L in A
2998* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2999*
3000 CALL sgebrd( m, m, a, lda, s, work( ie ),
3001 $ work( itauq ), work( itaup ),
3002 $ work( iwork ), lwork-iwork+1, ierr )
3003*
3004* Multiply right bidiagonalizing vectors in A by Q
3005* in VT
3006* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3007*
3008 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3009 $ work( itaup ), vt, ldvt,
3010 $ work( iwork ), lwork-iwork+1, ierr )
3011 iwork = ie + m
3012*
3013* Perform bidiagonal QR iteration, computing right
3014* singular vectors of A in VT
3015* (Workspace: need BDSPAC)
3016*
3017 CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3018 $ ldvt, dum, 1, dum, 1, work( iwork ),
3019 $ info )
3020*
3021 END IF
3022*
3023 ELSE IF( wntuo ) THEN
3024*
3025* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3026* N right singular vectors to be computed in VT and
3027* M left singular vectors to be overwritten on A
3028*
3029 IF( lwork.GE.2*m*m+max( n+m, 4*m, bdspac ) ) THEN
3030*
3031* Sufficient workspace for a fast algorithm
3032*
3033 iu = 1
3034 IF( lwork.GE.wrkbl+2*lda*m ) THEN
3035*
3036* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3037*
3038 ldwrku = lda
3039 ir = iu + ldwrku*m
3040 ldwrkr = lda
3041 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3042*
3043* WORK(IU) is LDA by M and WORK(IR) is M by M
3044*
3045 ldwrku = lda
3046 ir = iu + ldwrku*m
3047 ldwrkr = m
3048 ELSE
3049*
3050* WORK(IU) is M by M and WORK(IR) is M by M
3051*
3052 ldwrku = m
3053 ir = iu + ldwrku*m
3054 ldwrkr = m
3055 END IF
3056 itau = ir + ldwrkr*m
3057 iwork = itau + m
3058*
3059* Compute A=L*Q, copying result to VT
3060* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3061*
3062 CALL sgelqf( m, n, a, lda, work( itau ),
3063 $ work( iwork ), lwork-iwork+1, ierr )
3064 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3065*
3066* Generate Q in VT
3067* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3068*
3069 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3070 $ work( iwork ), lwork-iwork+1, ierr )
3071*
3072* Copy L to WORK(IU), zeroing out above it
3073*
3074 CALL slacpy( 'L', m, m, a, lda, work( iu ),
3075 $ ldwrku )
3076 CALL slaset( 'U', m-1, m-1, zero, zero,
3077 $ work( iu+ldwrku ), ldwrku )
3078 ie = itau
3079 itauq = ie + m
3080 itaup = itauq + m
3081 iwork = itaup + m
3082*
3083* Bidiagonalize L in WORK(IU), copying result to
3084* WORK(IR)
3085* (Workspace: need 2*M*M+4*M,
3086* prefer 2*M*M+3*M+2*M*NB)
3087*
3088 CALL sgebrd( m, m, work( iu ), ldwrku, s,
3089 $ work( ie ), work( itauq ),
3090 $ work( itaup ), work( iwork ),
3091 $ lwork-iwork+1, ierr )
3092 CALL slacpy( 'L', m, m, work( iu ), ldwrku,
3093 $ work( ir ), ldwrkr )
3094*
3095* Generate right bidiagonalizing vectors in WORK(IU)
3096* (Workspace: need 2*M*M+4*M-1,
3097* prefer 2*M*M+3*M+(M-1)*NB)
3098*
3099 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3100 $ work( itaup ), work( iwork ),
3101 $ lwork-iwork+1, ierr )
3102*
3103* Generate left bidiagonalizing vectors in WORK(IR)
3104* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3105*
3106 CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3107 $ work( itauq ), work( iwork ),
3108 $ lwork-iwork+1, ierr )
3109 iwork = ie + m
3110*
3111* Perform bidiagonal QR iteration, computing left
3112* singular vectors of L in WORK(IR) and computing
3113* right singular vectors of L in WORK(IU)
3114* (Workspace: need 2*M*M+BDSPAC)
3115*
3116 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3117 $ work( iu ), ldwrku, work( ir ),
3118 $ ldwrkr, dum, 1, work( iwork ), info )
3119*
3120* Multiply right singular vectors of L in WORK(IU) by
3121* Q in VT, storing result in A
3122* (Workspace: need M*M)
3123*
3124 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3125 $ ldwrku, vt, ldvt, zero, a, lda )
3126*
3127* Copy right singular vectors of A from A to VT
3128*
3129 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3130*
3131* Copy left singular vectors of A from WORK(IR) to A
3132*
3133 CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
3134 $ lda )
3135*
3136 ELSE
3137*
3138* Insufficient workspace for a fast algorithm
3139*
3140 itau = 1
3141 iwork = itau + m
3142*
3143* Compute A=L*Q, copying result to VT
3144* (Workspace: need 2*M, prefer M+M*NB)
3145*
3146 CALL sgelqf( m, n, a, lda, work( itau ),
3147 $ work( iwork ), lwork-iwork+1, ierr )
3148 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3149*
3150* Generate Q in VT
3151* (Workspace: need M+N, prefer M+N*NB)
3152*
3153 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3154 $ work( iwork ), lwork-iwork+1, ierr )
3155 ie = itau
3156 itauq = ie + m
3157 itaup = itauq + m
3158 iwork = itaup + m
3159*
3160* Zero out above L in A
3161*
3162 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3163 $ lda )
3164*
3165* Bidiagonalize L in A
3166* (Workspace: need 4*M, prefer 3*M+2*M*NB)
3167*
3168 CALL sgebrd( m, m, a, lda, s, work( ie ),
3169 $ work( itauq ), work( itaup ),
3170 $ work( iwork ), lwork-iwork+1, ierr )
3171*
3172* Multiply right bidiagonalizing vectors in A by Q
3173* in VT
3174* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3175*
3176 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3177 $ work( itaup ), vt, ldvt,
3178 $ work( iwork ), lwork-iwork+1, ierr )
3179*
3180* Generate left bidiagonalizing vectors in A
3181* (Workspace: need 4*M, prefer 3*M+M*NB)
3182*
3183 CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3184 $ work( iwork ), lwork-iwork+1, ierr )
3185 iwork = ie + m
3186*
3187* Perform bidiagonal QR iteration, computing left
3188* singular vectors of A in A and computing right
3189* singular vectors of A in VT
3190* (Workspace: need BDSPAC)
3191*
3192 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3193 $ ldvt, a, lda, dum, 1, work( iwork ),
3194 $ info )
3195*
3196 END IF
3197*
3198 ELSE IF( wntuas ) THEN
3199*
3200* Path 9t(N much larger than M, JOBU='S' or 'A',
3201* JOBVT='A')
3202* N right singular vectors to be computed in VT and
3203* M left singular vectors to be computed in U
3204*
3205 IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
3206*
3207* Sufficient workspace for a fast algorithm
3208*
3209 iu = 1
3210 IF( lwork.GE.wrkbl+lda*m ) THEN
3211*
3212* WORK(IU) is LDA by M
3213*
3214 ldwrku = lda
3215 ELSE
3216*
3217* WORK(IU) is M by M
3218*
3219 ldwrku = m
3220 END IF
3221 itau = iu + ldwrku*m
3222 iwork = itau + m
3223*
3224* Compute A=L*Q, copying result to VT
3225* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3226*
3227 CALL sgelqf( m, n, a, lda, work( itau ),
3228 $ work( iwork ), lwork-iwork+1, ierr )
3229 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3230*
3231* Generate Q in VT
3232* (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3233*
3234 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3235 $ work( iwork ), lwork-iwork+1, ierr )
3236*
3237* Copy L to WORK(IU), zeroing out above it
3238*
3239 CALL slacpy( 'L', m, m, a, lda, work( iu ),
3240 $ ldwrku )
3241 CALL slaset( 'U', m-1, m-1, zero, zero,
3242 $ work( iu+ldwrku ), ldwrku )
3243 ie = itau
3244 itauq = ie + m
3245 itaup = itauq + m
3246 iwork = itaup + m
3247*
3248* Bidiagonalize L in WORK(IU), copying result to U
3249* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3250*
3251 CALL sgebrd( m, m, work( iu ), ldwrku, s,
3252 $ work( ie ), work( itauq ),
3253 $ work( itaup ), work( iwork ),
3254 $ lwork-iwork+1, ierr )
3255 CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
3256 $ ldu )
3257*
3258* Generate right bidiagonalizing vectors in WORK(IU)
3259* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3260*
3261 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3262 $ work( itaup ), work( iwork ),
3263 $ lwork-iwork+1, ierr )
3264*
3265* Generate left bidiagonalizing vectors in U
3266* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3267*
3268 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3269 $ work( iwork ), lwork-iwork+1, ierr )
3270 iwork = ie + m
3271*
3272* Perform bidiagonal QR iteration, computing left
3273* singular vectors of L in U and computing right
3274* singular vectors of L in WORK(IU)
3275* (Workspace: need M*M+BDSPAC)
3276*
3277 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3278 $ work( iu ), ldwrku, u, ldu, dum, 1,
3279 $ work( iwork ), info )
3280*
3281* Multiply right singular vectors of L in WORK(IU) by
3282* Q in VT, storing result in A
3283* (Workspace: need M*M)
3284*
3285 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3286 $ ldwrku, vt, ldvt, zero, a, lda )
3287*
3288* Copy right singular vectors of A from A to VT
3289*
3290 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3291*
3292 ELSE
3293*
3294* Insufficient workspace for a fast algorithm
3295*
3296 itau = 1
3297 iwork = itau + m
3298*
3299* Compute A=L*Q, copying result to VT
3300* (Workspace: need 2*M, prefer M+M*NB)
3301*
3302 CALL sgelqf( m, n, a, lda, work( itau ),
3303 $ work( iwork ), lwork-iwork+1, ierr )
3304 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3305*
3306* Generate Q in VT
3307* (Workspace: need M+N, prefer M+N*NB)
3308*
3309 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3310 $ work( iwork ), lwork-iwork+1, ierr )
3311*
3312* Copy L to U, zeroing out above it
3313*
3314 CALL slacpy( 'L', m, m, a, lda, u, ldu )
3315 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3316 $ ldu )
3317 ie = itau
3318 itauq = ie + m
3319 itaup = itauq + m
3320 iwork = itaup + m
3321*
3322* Bidiagonalize L in U
3323* (Workspace: need 4*M, prefer 3*M+2*M*NB)
3324*
3325 CALL sgebrd( m, m, u, ldu, s, work( ie ),
3326 $ work( itauq ), work( itaup ),
3327 $ work( iwork ), lwork-iwork+1, ierr )
3328*
3329* Multiply right bidiagonalizing vectors in U by Q
3330* in VT
3331* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3332*
3333 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3334 $ work( itaup ), vt, ldvt,
3335 $ work( iwork ), lwork-iwork+1, ierr )
3336*
3337* Generate left bidiagonalizing vectors in U
3338* (Workspace: need 4*M, prefer 3*M+M*NB)
3339*
3340 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3341 $ work( iwork ), lwork-iwork+1, ierr )
3342 iwork = ie + m
3343*
3344* Perform bidiagonal QR iteration, computing left
3345* singular vectors of A in U and computing right
3346* singular vectors of A in VT
3347* (Workspace: need BDSPAC)
3348*
3349 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3350 $ ldvt, u, ldu, dum, 1, work( iwork ),
3351 $ info )
3352*
3353 END IF
3354*
3355 END IF
3356*
3357 END IF
3358*
3359 ELSE
3360*
3361* N .LT. MNTHR
3362*
3363* Path 10t(N greater than M, but not much larger)
3364* Reduce to bidiagonal form without LQ decomposition
3365*
3366 ie = 1
3367 itauq = ie + m
3368 itaup = itauq + m
3369 iwork = itaup + m
3370*
3371* Bidiagonalize A
3372* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3373*
3374 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3375 $ work( itaup ), work( iwork ), lwork-iwork+1,
3376 $ ierr )
3377 IF( wntuas ) THEN
3378*
3379* If left singular vectors desired in U, copy result to U
3380* and generate left bidiagonalizing vectors in U
3381* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3382*
3383 CALL slacpy( 'L', m, m, a, lda, u, ldu )
3384 CALL sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3385 $ work( iwork ), lwork-iwork+1, ierr )
3386 END IF
3387 IF( wntvas ) THEN
3388*
3389* If right singular vectors desired in VT, copy result to
3390* VT and generate right bidiagonalizing vectors in VT
3391* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3392*
3393 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3394 IF( wntva )
3395 $ nrvt = n
3396 IF( wntvs )
3397 $ nrvt = m
3398 CALL sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3399 $ work( iwork ), lwork-iwork+1, ierr )
3400 END IF
3401 IF( wntuo ) THEN
3402*
3403* If left singular vectors desired in A, generate left
3404* bidiagonalizing vectors in A
3405* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3406*
3407 CALL sorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3408 $ work( iwork ), lwork-iwork+1, ierr )
3409 END IF
3410 IF( wntvo ) THEN
3411*
3412* If right singular vectors desired in A, generate right
3413* bidiagonalizing vectors in A
3414* (Workspace: need 4*M, prefer 3*M+M*NB)
3415*
3416 CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
3417 $ work( iwork ), lwork-iwork+1, ierr )
3418 END IF
3419 iwork = ie + m
3420 IF( wntuas .OR. wntuo )
3421 $ nru = m
3422 IF( wntun )
3423 $ nru = 0
3424 IF( wntvas .OR. wntvo )
3425 $ ncvt = n
3426 IF( wntvn )
3427 $ ncvt = 0
3428 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3429*
3430* Perform bidiagonal QR iteration, if desired, computing
3431* left singular vectors in U and computing right singular
3432* vectors in VT
3433* (Workspace: need BDSPAC)
3434*
3435 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3436 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3437 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3438*
3439* Perform bidiagonal QR iteration, if desired, computing
3440* left singular vectors in U and computing right singular
3441* vectors in A
3442* (Workspace: need BDSPAC)
3443*
3444 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3445 $ u, ldu, dum, 1, work( iwork ), info )
3446 ELSE
3447*
3448* Perform bidiagonal QR iteration, if desired, computing
3449* left singular vectors in A and computing right singular
3450* vectors in VT
3451* (Workspace: need BDSPAC)
3452*
3453 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3454 $ ldvt, a, lda, dum, 1, work( iwork ), info )
3455 END IF
3456*
3457 END IF
3458*
3459 END IF
3460*
3461* If SBDSQR failed to converge, copy unconverged superdiagonals
3462* to WORK( 2:MINMN )
3463*
3464 IF( info.NE.0 ) THEN
3465 IF( ie.GT.2 ) THEN
3466 DO 50 i = 1, minmn - 1
3467 work( i+1 ) = work( i+ie-1 )
3468 50 CONTINUE
3469 END IF
3470 IF( ie.LT.2 ) THEN
3471 DO 60 i = minmn - 1, 1, -1
3472 work( i+1 ) = work( i+ie-1 )
3473 60 CONTINUE
3474 END IF
3475 END IF
3476*
3477* Undo scaling if necessary
3478*
3479 IF( iscl.EQ.1 ) THEN
3480 IF( anrm.GT.bignum )
3481 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3482 $ ierr )
3483 IF( info.NE.0 .AND. anrm.GT.bignum )
3484 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3485 $ minmn, ierr )
3486 IF( anrm.LT.smlnum )
3487 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3488 $ ierr )
3489 IF( info.NE.0 .AND. anrm.LT.smlnum )
3490 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3491 $ minmn, ierr )
3492 END IF
3493*
3494* Return optimal workspace in WORK(1)
3495*
3496 work( 1 ) = sroundup_lwork(maxwrk)
3497*
3498 RETURN
3499*
3500* End of SGESVD
3501*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine sbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, work, info)
SBDSQR
Definition sbdsqr.f:240
subroutine sgebrd(m, n, a, lda, d, e, tauq, taup, work, lwork, info)
SGEBRD
Definition sgebrd.f:205
subroutine sgelqf(m, n, a, lda, tau, work, lwork, info)
SGELQF
Definition sgelqf.f:143
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188
subroutine sgeqrf(m, n, a, lda, tau, work, lwork, info)
SGEQRF
Definition sgeqrf.f:146
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:103
real function slamch(cmach)
SLAMCH
Definition slamch.f:68
real function slange(norm, m, n, a, lda, work)
SLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition slange.f:114
subroutine slascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition slascl.f:143
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:110
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
real function sroundup_lwork(lwork)
SROUNDUP_LWORK
subroutine sorgbr(vect, m, n, k, a, lda, tau, work, lwork, info)
SORGBR
Definition sorgbr.f:157
subroutine sorglq(m, n, k, a, lda, tau, work, lwork, info)
SORGLQ
Definition sorglq.f:127
subroutine sorgqr(m, n, k, a, lda, tau, work, lwork, info)
SORGQR
Definition sorgqr.f:128
subroutine sormbr(vect, side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
SORMBR
Definition sormbr.f:196
Here is the call graph for this function:
Here is the caller graph for this function: