LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ dgesvd()

subroutine dgesvd ( character  jobu,
character  jobvt,
integer  m,
integer  n,
double precision, dimension( lda, * )  a,
integer  lda,
double precision, dimension( * )  s,
double precision, dimension( ldu, * )  u,
integer  ldu,
double precision, dimension( ldvt, * )  vt,
integer  ldvt,
double precision, dimension( * )  work,
integer  lwork,
integer  info 
)

DGESVD computes the singular value decomposition (SVD) for GE matrices

Download DGESVD + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 DGESVD computes the singular value decomposition (SVD) of a real
 M-by-N matrix A, optionally computing the left and/or right singular
 vectors. The SVD is written

      A = U * SIGMA * transpose(V)

 where SIGMA is an M-by-N matrix which is zero except for its
 min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
 V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
 are the singular values of A; they are real and non-negative, and
 are returned in descending order.  The first min(m,n) columns of
 U and V are the left and right singular vectors of A.

 Note that the routine returns V**T, not V.
Parameters
[in]JOBU
          JOBU is CHARACTER*1
          Specifies options for computing all or part of the matrix U:
          = 'A':  all M columns of U are returned in array U:
          = 'S':  the first min(m,n) columns of U (the left singular
                  vectors) are returned in the array U;
          = 'O':  the first min(m,n) columns of U (the left singular
                  vectors) are overwritten on the array A;
          = 'N':  no columns of U (no left singular vectors) are
                  computed.
[in]JOBVT
          JOBVT is CHARACTER*1
          Specifies options for computing all or part of the matrix
          V**T:
          = 'A':  all N rows of V**T are returned in the array VT;
          = 'S':  the first min(m,n) rows of V**T (the right singular
                  vectors) are returned in the array VT;
          = 'O':  the first min(m,n) rows of V**T (the right singular
                  vectors) are overwritten on the array A;
          = 'N':  no rows of V**T (no right singular vectors) are
                  computed.

          JOBVT and JOBU cannot both be 'O'.
[in]M
          M is INTEGER
          The number of rows of the input matrix A.  M >= 0.
[in]N
          N is INTEGER
          The number of columns of the input matrix A.  N >= 0.
[in,out]A
          A is DOUBLE PRECISION array, dimension (LDA,N)
          On entry, the M-by-N matrix A.
          On exit,
          if JOBU = 'O',  A is overwritten with the first min(m,n)
                          columns of U (the left singular vectors,
                          stored columnwise);
          if JOBVT = 'O', A is overwritten with the first min(m,n)
                          rows of V**T (the right singular vectors,
                          stored rowwise);
          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
                          are destroyed.
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,M).
[out]S
          S is DOUBLE PRECISION array, dimension (min(M,N))
          The singular values of A, sorted so that S(i) >= S(i+1).
[out]U
          U is DOUBLE PRECISION array, dimension (LDU,UCOL)
          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
          if JOBU = 'S', U contains the first min(m,n) columns of U
          (the left singular vectors, stored columnwise);
          if JOBU = 'N' or 'O', U is not referenced.
[in]LDU
          LDU is INTEGER
          The leading dimension of the array U.  LDU >= 1; if
          JOBU = 'S' or 'A', LDU >= M.
[out]VT
          VT is DOUBLE PRECISION array, dimension (LDVT,N)
          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
          V**T;
          if JOBVT = 'S', VT contains the first min(m,n) rows of
          V**T (the right singular vectors, stored rowwise);
          if JOBVT = 'N' or 'O', VT is not referenced.
[in]LDVT
          LDVT is INTEGER
          The leading dimension of the array VT.  LDVT >= 1; if
          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
[out]WORK
          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
          superdiagonal elements of an upper bidiagonal matrix B
          whose diagonal is in S (not necessarily sorted). B
          satisfies A = U * B * VT, so it has the same singular values
          as A, and singular vectors related by U and VT.
[in]LWORK
          LWORK is INTEGER
          The dimension of the array WORK.
          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
             - PATH 1  (M much larger than N, JOBU='N')
             - PATH 1t (N much larger than M, JOBVT='N')
          LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths
          For good performance, LWORK should generally be larger.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit.
          < 0:  if INFO = -i, the i-th argument had an illegal value.
          > 0:  if DBDSQR did not converge, INFO specifies how many
                superdiagonals of an intermediate bidiagonal form B
                did not converge to zero. See the description of WORK
                above for details.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 209 of file dgesvd.f.

211*
212* -- LAPACK driver routine --
213* -- LAPACK is a software package provided by Univ. of Tennessee, --
214* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
215*
216* .. Scalar Arguments ..
217 CHARACTER JOBU, JOBVT
218 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
219* ..
220* .. Array Arguments ..
221 DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
222 $ VT( LDVT, * ), WORK( * )
223* ..
224*
225* =====================================================================
226*
227* .. Parameters ..
228 DOUBLE PRECISION ZERO, ONE
229 parameter( zero = 0.0d0, one = 1.0d0 )
230* ..
231* .. Local Scalars ..
232 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
233 $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
234 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
235 $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
236 $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
237 $ NRVT, WRKBL
238 INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
239 $ LWORK_DGEBRD, LWORK_DORGBR_P, LWORK_DORGBR_Q,
240 $ LWORK_DGELQF, LWORK_DORGLQ_N, LWORK_DORGLQ_M
241 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
242* ..
243* .. Local Arrays ..
244 DOUBLE PRECISION DUM( 1 )
245* ..
246* .. External Subroutines ..
247 EXTERNAL dbdsqr, dgebrd, dgelqf, dgemm, dgeqrf, dlacpy,
249 $ xerbla
250* ..
251* .. External Functions ..
252 LOGICAL LSAME
253 INTEGER ILAENV
254 DOUBLE PRECISION DLAMCH, DLANGE
255 EXTERNAL lsame, ilaenv, dlamch, dlange
256* ..
257* .. Intrinsic Functions ..
258 INTRINSIC max, min, sqrt
259* ..
260* .. Executable Statements ..
261*
262* Test the input arguments
263*
264 info = 0
265 minmn = min( m, n )
266 wntua = lsame( jobu, 'A' )
267 wntus = lsame( jobu, 'S' )
268 wntuas = wntua .OR. wntus
269 wntuo = lsame( jobu, 'O' )
270 wntun = lsame( jobu, 'N' )
271 wntva = lsame( jobvt, 'A' )
272 wntvs = lsame( jobvt, 'S' )
273 wntvas = wntva .OR. wntvs
274 wntvo = lsame( jobvt, 'O' )
275 wntvn = lsame( jobvt, 'N' )
276 lquery = ( lwork.EQ.-1 )
277*
278 IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
279 info = -1
280 ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
281 $ ( wntvo .AND. wntuo ) ) THEN
282 info = -2
283 ELSE IF( m.LT.0 ) THEN
284 info = -3
285 ELSE IF( n.LT.0 ) THEN
286 info = -4
287 ELSE IF( lda.LT.max( 1, m ) ) THEN
288 info = -6
289 ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
290 info = -9
291 ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
292 $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
293 info = -11
294 END IF
295*
296* Compute workspace
297* (Note: Comments in the code beginning "Workspace:" describe the
298* minimal amount of workspace needed at that point in the code,
299* as well as the preferred amount for good performance.
300* NB refers to the optimal block size for the immediately
301* following subroutine, as returned by ILAENV.)
302*
303 IF( info.EQ.0 ) THEN
304 minwrk = 1
305 maxwrk = 1
306 IF( m.GE.n .AND. minmn.GT.0 ) THEN
307*
308* Compute space needed for DBDSQR
309*
310 mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
311 bdspac = 5*n
312* Compute space needed for DGEQRF
313 CALL dgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
314 lwork_dgeqrf = int( dum(1) )
315* Compute space needed for DORGQR
316 CALL dorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
317 lwork_dorgqr_n = int( dum(1) )
318 CALL dorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
319 lwork_dorgqr_m = int( dum(1) )
320* Compute space needed for DGEBRD
321 CALL dgebrd( n, n, a, lda, s, dum(1), dum(1),
322 $ dum(1), dum(1), -1, ierr )
323 lwork_dgebrd = int( dum(1) )
324* Compute space needed for DORGBR P
325 CALL dorgbr( 'P', n, n, n, a, lda, dum(1),
326 $ dum(1), -1, ierr )
327 lwork_dorgbr_p = int( dum(1) )
328* Compute space needed for DORGBR Q
329 CALL dorgbr( 'Q', n, n, n, a, lda, dum(1),
330 $ dum(1), -1, ierr )
331 lwork_dorgbr_q = int( dum(1) )
332*
333 IF( m.GE.mnthr ) THEN
334 IF( wntun ) THEN
335*
336* Path 1 (M much larger than N, JOBU='N')
337*
338 maxwrk = n + lwork_dgeqrf
339 maxwrk = max( maxwrk, 3*n + lwork_dgebrd )
340 IF( wntvo .OR. wntvas )
341 $ maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
342 maxwrk = max( maxwrk, bdspac )
343 minwrk = max( 4*n, bdspac )
344 ELSE IF( wntuo .AND. wntvn ) THEN
345*
346* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
347*
348 wrkbl = n + lwork_dgeqrf
349 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
350 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
351 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
352 wrkbl = max( wrkbl, bdspac )
353 maxwrk = max( n*n + wrkbl, n*n + m*n + n )
354 minwrk = max( 3*n + m, bdspac )
355 ELSE IF( wntuo .AND. wntvas ) THEN
356*
357* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
358* 'A')
359*
360 wrkbl = n + lwork_dgeqrf
361 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
362 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
363 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
364 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
365 wrkbl = max( wrkbl, bdspac )
366 maxwrk = max( n*n + wrkbl, n*n + m*n + n )
367 minwrk = max( 3*n + m, bdspac )
368 ELSE IF( wntus .AND. wntvn ) THEN
369*
370* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
371*
372 wrkbl = n + lwork_dgeqrf
373 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
374 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
375 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
376 wrkbl = max( wrkbl, bdspac )
377 maxwrk = n*n + wrkbl
378 minwrk = max( 3*n + m, bdspac )
379 ELSE IF( wntus .AND. wntvo ) THEN
380*
381* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
382*
383 wrkbl = n + lwork_dgeqrf
384 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
385 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
386 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
387 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
388 wrkbl = max( wrkbl, bdspac )
389 maxwrk = 2*n*n + wrkbl
390 minwrk = max( 3*n + m, bdspac )
391 ELSE IF( wntus .AND. wntvas ) THEN
392*
393* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
394* 'A')
395*
396 wrkbl = n + lwork_dgeqrf
397 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
398 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
399 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
400 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
401 wrkbl = max( wrkbl, bdspac )
402 maxwrk = n*n + wrkbl
403 minwrk = max( 3*n + m, bdspac )
404 ELSE IF( wntua .AND. wntvn ) THEN
405*
406* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
407*
408 wrkbl = n + lwork_dgeqrf
409 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
410 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
411 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
412 wrkbl = max( wrkbl, bdspac )
413 maxwrk = n*n + wrkbl
414 minwrk = max( 3*n + m, bdspac )
415 ELSE IF( wntua .AND. wntvo ) THEN
416*
417* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418*
419 wrkbl = n + lwork_dgeqrf
420 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
421 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
422 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
423 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
424 wrkbl = max( wrkbl, bdspac )
425 maxwrk = 2*n*n + wrkbl
426 minwrk = max( 3*n + m, bdspac )
427 ELSE IF( wntua .AND. wntvas ) THEN
428*
429* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430* 'A')
431*
432 wrkbl = n + lwork_dgeqrf
433 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
434 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
435 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
436 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
437 wrkbl = max( wrkbl, bdspac )
438 maxwrk = n*n + wrkbl
439 minwrk = max( 3*n + m, bdspac )
440 END IF
441 ELSE
442*
443* Path 10 (M at least N, but not much larger)
444*
445 CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
446 $ dum(1), dum(1), -1, ierr )
447 lwork_dgebrd = int( dum(1) )
448 maxwrk = 3*n + lwork_dgebrd
449 IF( wntus .OR. wntuo ) THEN
450 CALL dorgbr( 'Q', m, n, n, a, lda, dum(1),
451 $ dum(1), -1, ierr )
452 lwork_dorgbr_q = int( dum(1) )
453 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
454 END IF
455 IF( wntua ) THEN
456 CALL dorgbr( 'Q', m, m, n, a, lda, dum(1),
457 $ dum(1), -1, ierr )
458 lwork_dorgbr_q = int( dum(1) )
459 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
460 END IF
461 IF( .NOT.wntvn ) THEN
462 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
463 END IF
464 maxwrk = max( maxwrk, bdspac )
465 minwrk = max( 3*n + m, bdspac )
466 END IF
467 ELSE IF( minmn.GT.0 ) THEN
468*
469* Compute space needed for DBDSQR
470*
471 mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
472 bdspac = 5*m
473* Compute space needed for DGELQF
474 CALL dgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
475 lwork_dgelqf = int( dum(1) )
476* Compute space needed for DORGLQ
477 CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
478 lwork_dorglq_n = int( dum(1) )
479 CALL dorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480 lwork_dorglq_m = int( dum(1) )
481* Compute space needed for DGEBRD
482 CALL dgebrd( m, m, a, lda, s, dum(1), dum(1),
483 $ dum(1), dum(1), -1, ierr )
484 lwork_dgebrd = int( dum(1) )
485* Compute space needed for DORGBR P
486 CALL dorgbr( 'P', m, m, m, a, n, dum(1),
487 $ dum(1), -1, ierr )
488 lwork_dorgbr_p = int( dum(1) )
489* Compute space needed for DORGBR Q
490 CALL dorgbr( 'Q', m, m, m, a, n, dum(1),
491 $ dum(1), -1, ierr )
492 lwork_dorgbr_q = int( dum(1) )
493 IF( n.GE.mnthr ) THEN
494 IF( wntvn ) THEN
495*
496* Path 1t(N much larger than M, JOBVT='N')
497*
498 maxwrk = m + lwork_dgelqf
499 maxwrk = max( maxwrk, 3*m + lwork_dgebrd )
500 IF( wntuo .OR. wntuas )
501 $ maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
502 maxwrk = max( maxwrk, bdspac )
503 minwrk = max( 4*m, bdspac )
504 ELSE IF( wntvo .AND. wntun ) THEN
505*
506* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507*
508 wrkbl = m + lwork_dgelqf
509 wrkbl = max( wrkbl, m + lwork_dorglq_m )
510 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
511 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
512 wrkbl = max( wrkbl, bdspac )
513 maxwrk = max( m*m + wrkbl, m*m + m*n + m )
514 minwrk = max( 3*m + n, bdspac )
515 ELSE IF( wntvo .AND. wntuas ) THEN
516*
517* Path 3t(N much larger than M, JOBU='S' or 'A',
518* JOBVT='O')
519*
520 wrkbl = m + lwork_dgelqf
521 wrkbl = max( wrkbl, m + lwork_dorglq_m )
522 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
523 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
524 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
525 wrkbl = max( wrkbl, bdspac )
526 maxwrk = max( m*m + wrkbl, m*m + m*n + m )
527 minwrk = max( 3*m + n, bdspac )
528 ELSE IF( wntvs .AND. wntun ) THEN
529*
530* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531*
532 wrkbl = m + lwork_dgelqf
533 wrkbl = max( wrkbl, m + lwork_dorglq_m )
534 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
535 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
536 wrkbl = max( wrkbl, bdspac )
537 maxwrk = m*m + wrkbl
538 minwrk = max( 3*m + n, bdspac )
539 ELSE IF( wntvs .AND. wntuo ) THEN
540*
541* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542*
543 wrkbl = m + lwork_dgelqf
544 wrkbl = max( wrkbl, m + lwork_dorglq_m )
545 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
546 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
547 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
548 wrkbl = max( wrkbl, bdspac )
549 maxwrk = 2*m*m + wrkbl
550 minwrk = max( 3*m + n, bdspac )
551 ELSE IF( wntvs .AND. wntuas ) THEN
552*
553* Path 6t(N much larger than M, JOBU='S' or 'A',
554* JOBVT='S')
555*
556 wrkbl = m + lwork_dgelqf
557 wrkbl = max( wrkbl, m + lwork_dorglq_m )
558 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
559 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
560 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
561 wrkbl = max( wrkbl, bdspac )
562 maxwrk = m*m + wrkbl
563 minwrk = max( 3*m + n, bdspac )
564 ELSE IF( wntva .AND. wntun ) THEN
565*
566* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
567*
568 wrkbl = m + lwork_dgelqf
569 wrkbl = max( wrkbl, m + lwork_dorglq_n )
570 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
571 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
572 wrkbl = max( wrkbl, bdspac )
573 maxwrk = m*m + wrkbl
574 minwrk = max( 3*m + n, bdspac )
575 ELSE IF( wntva .AND. wntuo ) THEN
576*
577* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
578*
579 wrkbl = m + lwork_dgelqf
580 wrkbl = max( wrkbl, m + lwork_dorglq_n )
581 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
582 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
583 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
584 wrkbl = max( wrkbl, bdspac )
585 maxwrk = 2*m*m + wrkbl
586 minwrk = max( 3*m + n, bdspac )
587 ELSE IF( wntva .AND. wntuas ) THEN
588*
589* Path 9t(N much larger than M, JOBU='S' or 'A',
590* JOBVT='A')
591*
592 wrkbl = m + lwork_dgelqf
593 wrkbl = max( wrkbl, m + lwork_dorglq_n )
594 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
595 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
596 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
597 wrkbl = max( wrkbl, bdspac )
598 maxwrk = m*m + wrkbl
599 minwrk = max( 3*m + n, bdspac )
600 END IF
601 ELSE
602*
603* Path 10t(N greater than M, but not much larger)
604*
605 CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
606 $ dum(1), dum(1), -1, ierr )
607 lwork_dgebrd = int( dum(1) )
608 maxwrk = 3*m + lwork_dgebrd
609 IF( wntvs .OR. wntvo ) THEN
610* Compute space needed for DORGBR P
611 CALL dorgbr( 'P', m, n, m, a, n, dum(1),
612 $ dum(1), -1, ierr )
613 lwork_dorgbr_p = int( dum(1) )
614 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
615 END IF
616 IF( wntva ) THEN
617 CALL dorgbr( 'P', n, n, m, a, n, dum(1),
618 $ dum(1), -1, ierr )
619 lwork_dorgbr_p = int( dum(1) )
620 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
621 END IF
622 IF( .NOT.wntun ) THEN
623 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
624 END IF
625 maxwrk = max( maxwrk, bdspac )
626 minwrk = max( 3*m + n, bdspac )
627 END IF
628 END IF
629 maxwrk = max( maxwrk, minwrk )
630 work( 1 ) = maxwrk
631*
632 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
633 info = -13
634 END IF
635 END IF
636*
637 IF( info.NE.0 ) THEN
638 CALL xerbla( 'DGESVD', -info )
639 RETURN
640 ELSE IF( lquery ) THEN
641 RETURN
642 END IF
643*
644* Quick return if possible
645*
646 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
647 RETURN
648 END IF
649*
650* Get machine constants
651*
652 eps = dlamch( 'P' )
653 smlnum = sqrt( dlamch( 'S' ) ) / eps
654 bignum = one / smlnum
655*
656* Scale A if max element outside range [SMLNUM,BIGNUM]
657*
658 anrm = dlange( 'M', m, n, a, lda, dum )
659 iscl = 0
660 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
661 iscl = 1
662 CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
663 ELSE IF( anrm.GT.bignum ) THEN
664 iscl = 1
665 CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
666 END IF
667*
668 IF( m.GE.n ) THEN
669*
670* A has at least as many rows as columns. If A has sufficiently
671* more rows than columns, first reduce using the QR
672* decomposition (if sufficient workspace available)
673*
674 IF( m.GE.mnthr ) THEN
675*
676 IF( wntun ) THEN
677*
678* Path 1 (M much larger than N, JOBU='N')
679* No left singular vectors to be computed
680*
681 itau = 1
682 iwork = itau + n
683*
684* Compute A=Q*R
685* (Workspace: need 2*N, prefer N + N*NB)
686*
687 CALL dgeqrf( m, n, a, lda, work( itau ), work( iwork ),
688 $ lwork-iwork+1, ierr )
689*
690* Zero out below R
691*
692 IF( n .GT. 1 ) THEN
693 CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
694 $ lda )
695 END IF
696 ie = 1
697 itauq = ie + n
698 itaup = itauq + n
699 iwork = itaup + n
700*
701* Bidiagonalize R in A
702* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
703*
704 CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
705 $ work( itaup ), work( iwork ), lwork-iwork+1,
706 $ ierr )
707 ncvt = 0
708 IF( wntvo .OR. wntvas ) THEN
709*
710* If right singular vectors desired, generate P'.
711* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
712*
713 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
714 $ work( iwork ), lwork-iwork+1, ierr )
715 ncvt = n
716 END IF
717 iwork = ie + n
718*
719* Perform bidiagonal QR iteration, computing right
720* singular vectors of A in A if desired
721* (Workspace: need BDSPAC)
722*
723 CALL dbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
724 $ dum, 1, dum, 1, work( iwork ), info )
725*
726* If right singular vectors desired in VT, copy them there
727*
728 IF( wntvas )
729 $ CALL dlacpy( 'F', n, n, a, lda, vt, ldvt )
730*
731 ELSE IF( wntuo .AND. wntvn ) THEN
732*
733* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
734* N left singular vectors to be overwritten on A and
735* no right singular vectors to be computed
736*
737 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
738*
739* Sufficient workspace for a fast algorithm
740*
741 ir = 1
742 IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
743*
744* WORK(IU) is LDA by N, WORK(IR) is LDA by N
745*
746 ldwrku = lda
747 ldwrkr = lda
748 ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
749*
750* WORK(IU) is LDA by N, WORK(IR) is N by N
751*
752 ldwrku = lda
753 ldwrkr = n
754 ELSE
755*
756* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
757*
758 ldwrku = ( lwork-n*n-n ) / n
759 ldwrkr = n
760 END IF
761 itau = ir + ldwrkr*n
762 iwork = itau + n
763*
764* Compute A=Q*R
765* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
766*
767 CALL dgeqrf( m, n, a, lda, work( itau ),
768 $ work( iwork ), lwork-iwork+1, ierr )
769*
770* Copy R to WORK(IR) and zero out below it
771*
772 CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
773 CALL dlaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
774 $ ldwrkr )
775*
776* Generate Q in A
777* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
778*
779 CALL dorgqr( m, n, n, a, lda, work( itau ),
780 $ work( iwork ), lwork-iwork+1, ierr )
781 ie = itau
782 itauq = ie + n
783 itaup = itauq + n
784 iwork = itaup + n
785*
786* Bidiagonalize R in WORK(IR)
787* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
788*
789 CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
790 $ work( itauq ), work( itaup ),
791 $ work( iwork ), lwork-iwork+1, ierr )
792*
793* Generate left vectors bidiagonalizing R
794* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
795*
796 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
797 $ work( itauq ), work( iwork ),
798 $ lwork-iwork+1, ierr )
799 iwork = ie + n
800*
801* Perform bidiagonal QR iteration, computing left
802* singular vectors of R in WORK(IR)
803* (Workspace: need N*N + BDSPAC)
804*
805 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
806 $ work( ir ), ldwrkr, dum, 1,
807 $ work( iwork ), info )
808 iu = ie + n
809*
810* Multiply Q in A by left singular vectors of R in
811* WORK(IR), storing result in WORK(IU) and copying to A
812* (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
813*
814 DO 10 i = 1, m, ldwrku
815 chunk = min( m-i+1, ldwrku )
816 CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
817 $ lda, work( ir ), ldwrkr, zero,
818 $ work( iu ), ldwrku )
819 CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
820 $ a( i, 1 ), lda )
821 10 CONTINUE
822*
823 ELSE
824*
825* Insufficient workspace for a fast algorithm
826*
827 ie = 1
828 itauq = ie + n
829 itaup = itauq + n
830 iwork = itaup + n
831*
832* Bidiagonalize A
833* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
834*
835 CALL dgebrd( m, n, a, lda, s, work( ie ),
836 $ work( itauq ), work( itaup ),
837 $ work( iwork ), lwork-iwork+1, ierr )
838*
839* Generate left vectors bidiagonalizing A
840* (Workspace: need 4*N, prefer 3*N + N*NB)
841*
842 CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
843 $ work( iwork ), lwork-iwork+1, ierr )
844 iwork = ie + n
845*
846* Perform bidiagonal QR iteration, computing left
847* singular vectors of A in A
848* (Workspace: need BDSPAC)
849*
850 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
851 $ a, lda, dum, 1, work( iwork ), info )
852*
853 END IF
854*
855 ELSE IF( wntuo .AND. wntvas ) THEN
856*
857* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858* N left singular vectors to be overwritten on A and
859* N right singular vectors to be computed in VT
860*
861 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
862*
863* Sufficient workspace for a fast algorithm
864*
865 ir = 1
866 IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
867*
868* WORK(IU) is LDA by N and WORK(IR) is LDA by N
869*
870 ldwrku = lda
871 ldwrkr = lda
872 ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
873*
874* WORK(IU) is LDA by N and WORK(IR) is N by N
875*
876 ldwrku = lda
877 ldwrkr = n
878 ELSE
879*
880* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881*
882 ldwrku = ( lwork-n*n-n ) / n
883 ldwrkr = n
884 END IF
885 itau = ir + ldwrkr*n
886 iwork = itau + n
887*
888* Compute A=Q*R
889* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
890*
891 CALL dgeqrf( m, n, a, lda, work( itau ),
892 $ work( iwork ), lwork-iwork+1, ierr )
893*
894* Copy R to VT, zeroing out below it
895*
896 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
897 IF( n.GT.1 )
898 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
899 $ vt( 2, 1 ), ldvt )
900*
901* Generate Q in A
902* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
903*
904 CALL dorgqr( m, n, n, a, lda, work( itau ),
905 $ work( iwork ), lwork-iwork+1, ierr )
906 ie = itau
907 itauq = ie + n
908 itaup = itauq + n
909 iwork = itaup + n
910*
911* Bidiagonalize R in VT, copying result to WORK(IR)
912* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
913*
914 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
915 $ work( itauq ), work( itaup ),
916 $ work( iwork ), lwork-iwork+1, ierr )
917 CALL dlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
918*
919* Generate left vectors bidiagonalizing R in WORK(IR)
920* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
921*
922 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
923 $ work( itauq ), work( iwork ),
924 $ lwork-iwork+1, ierr )
925*
926* Generate right vectors bidiagonalizing R in VT
927* (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB)
928*
929 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
930 $ work( iwork ), lwork-iwork+1, ierr )
931 iwork = ie + n
932*
933* Perform bidiagonal QR iteration, computing left
934* singular vectors of R in WORK(IR) and computing right
935* singular vectors of R in VT
936* (Workspace: need N*N + BDSPAC)
937*
938 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
939 $ work( ir ), ldwrkr, dum, 1,
940 $ work( iwork ), info )
941 iu = ie + n
942*
943* Multiply Q in A by left singular vectors of R in
944* WORK(IR), storing result in WORK(IU) and copying to A
945* (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
946*
947 DO 20 i = 1, m, ldwrku
948 chunk = min( m-i+1, ldwrku )
949 CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
950 $ lda, work( ir ), ldwrkr, zero,
951 $ work( iu ), ldwrku )
952 CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
953 $ a( i, 1 ), lda )
954 20 CONTINUE
955*
956 ELSE
957*
958* Insufficient workspace for a fast algorithm
959*
960 itau = 1
961 iwork = itau + n
962*
963* Compute A=Q*R
964* (Workspace: need 2*N, prefer N + N*NB)
965*
966 CALL dgeqrf( m, n, a, lda, work( itau ),
967 $ work( iwork ), lwork-iwork+1, ierr )
968*
969* Copy R to VT, zeroing out below it
970*
971 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
972 IF( n.GT.1 )
973 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
974 $ vt( 2, 1 ), ldvt )
975*
976* Generate Q in A
977* (Workspace: need 2*N, prefer N + N*NB)
978*
979 CALL dorgqr( m, n, n, a, lda, work( itau ),
980 $ work( iwork ), lwork-iwork+1, ierr )
981 ie = itau
982 itauq = ie + n
983 itaup = itauq + n
984 iwork = itaup + n
985*
986* Bidiagonalize R in VT
987* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
988*
989 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
990 $ work( itauq ), work( itaup ),
991 $ work( iwork ), lwork-iwork+1, ierr )
992*
993* Multiply Q in A by left vectors bidiagonalizing R
994* (Workspace: need 3*N + M, prefer 3*N + M*NB)
995*
996 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
997 $ work( itauq ), a, lda, work( iwork ),
998 $ lwork-iwork+1, ierr )
999*
1000* Generate right vectors bidiagonalizing R in VT
1001* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1002*
1003 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1004 $ work( iwork ), lwork-iwork+1, ierr )
1005 iwork = ie + n
1006*
1007* Perform bidiagonal QR iteration, computing left
1008* singular vectors of A in A and computing right
1009* singular vectors of A in VT
1010* (Workspace: need BDSPAC)
1011*
1012 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1013 $ a, lda, dum, 1, work( iwork ), info )
1014*
1015 END IF
1016*
1017 ELSE IF( wntus ) THEN
1018*
1019 IF( wntvn ) THEN
1020*
1021* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1022* N left singular vectors to be computed in U and
1023* no right singular vectors to be computed
1024*
1025 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1026*
1027* Sufficient workspace for a fast algorithm
1028*
1029 ir = 1
1030 IF( lwork.GE.wrkbl+lda*n ) THEN
1031*
1032* WORK(IR) is LDA by N
1033*
1034 ldwrkr = lda
1035 ELSE
1036*
1037* WORK(IR) is N by N
1038*
1039 ldwrkr = n
1040 END IF
1041 itau = ir + ldwrkr*n
1042 iwork = itau + n
1043*
1044* Compute A=Q*R
1045* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1046*
1047 CALL dgeqrf( m, n, a, lda, work( itau ),
1048 $ work( iwork ), lwork-iwork+1, ierr )
1049*
1050* Copy R to WORK(IR), zeroing out below it
1051*
1052 CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1053 $ ldwrkr )
1054 CALL dlaset( 'L', n-1, n-1, zero, zero,
1055 $ work( ir+1 ), ldwrkr )
1056*
1057* Generate Q in A
1058* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1059*
1060 CALL dorgqr( m, n, n, a, lda, work( itau ),
1061 $ work( iwork ), lwork-iwork+1, ierr )
1062 ie = itau
1063 itauq = ie + n
1064 itaup = itauq + n
1065 iwork = itaup + n
1066*
1067* Bidiagonalize R in WORK(IR)
1068* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1069*
1070 CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1071 $ work( ie ), work( itauq ),
1072 $ work( itaup ), work( iwork ),
1073 $ lwork-iwork+1, ierr )
1074*
1075* Generate left vectors bidiagonalizing R in WORK(IR)
1076* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1077*
1078 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1079 $ work( itauq ), work( iwork ),
1080 $ lwork-iwork+1, ierr )
1081 iwork = ie + n
1082*
1083* Perform bidiagonal QR iteration, computing left
1084* singular vectors of R in WORK(IR)
1085* (Workspace: need N*N + BDSPAC)
1086*
1087 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1088 $ 1, work( ir ), ldwrkr, dum, 1,
1089 $ work( iwork ), info )
1090*
1091* Multiply Q in A by left singular vectors of R in
1092* WORK(IR), storing result in U
1093* (Workspace: need N*N)
1094*
1095 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1096 $ work( ir ), ldwrkr, zero, u, ldu )
1097*
1098 ELSE
1099*
1100* Insufficient workspace for a fast algorithm
1101*
1102 itau = 1
1103 iwork = itau + n
1104*
1105* Compute A=Q*R, copying result to U
1106* (Workspace: need 2*N, prefer N + N*NB)
1107*
1108 CALL dgeqrf( m, n, a, lda, work( itau ),
1109 $ work( iwork ), lwork-iwork+1, ierr )
1110 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1111*
1112* Generate Q in U
1113* (Workspace: need 2*N, prefer N + N*NB)
1114*
1115 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1116 $ work( iwork ), lwork-iwork+1, ierr )
1117 ie = itau
1118 itauq = ie + n
1119 itaup = itauq + n
1120 iwork = itaup + n
1121*
1122* Zero out below R in A
1123*
1124 IF( n .GT. 1 ) THEN
1125 CALL dlaset( 'L', n-1, n-1, zero, zero,
1126 $ a( 2, 1 ), lda )
1127 END IF
1128*
1129* Bidiagonalize R in A
1130* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1131*
1132 CALL dgebrd( n, n, a, lda, s, work( ie ),
1133 $ work( itauq ), work( itaup ),
1134 $ work( iwork ), lwork-iwork+1, ierr )
1135*
1136* Multiply Q in U by left vectors bidiagonalizing R
1137* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1138*
1139 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1140 $ work( itauq ), u, ldu, work( iwork ),
1141 $ lwork-iwork+1, ierr )
1142 iwork = ie + n
1143*
1144* Perform bidiagonal QR iteration, computing left
1145* singular vectors of A in U
1146* (Workspace: need BDSPAC)
1147*
1148 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1149 $ 1, u, ldu, dum, 1, work( iwork ),
1150 $ info )
1151*
1152 END IF
1153*
1154 ELSE IF( wntvo ) THEN
1155*
1156* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1157* N left singular vectors to be computed in U and
1158* N right singular vectors to be overwritten on A
1159*
1160 IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1161*
1162* Sufficient workspace for a fast algorithm
1163*
1164 iu = 1
1165 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1166*
1167* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1168*
1169 ldwrku = lda
1170 ir = iu + ldwrku*n
1171 ldwrkr = lda
1172 ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1173*
1174* WORK(IU) is LDA by N and WORK(IR) is N by N
1175*
1176 ldwrku = lda
1177 ir = iu + ldwrku*n
1178 ldwrkr = n
1179 ELSE
1180*
1181* WORK(IU) is N by N and WORK(IR) is N by N
1182*
1183 ldwrku = n
1184 ir = iu + ldwrku*n
1185 ldwrkr = n
1186 END IF
1187 itau = ir + ldwrkr*n
1188 iwork = itau + n
1189*
1190* Compute A=Q*R
1191* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1192*
1193 CALL dgeqrf( m, n, a, lda, work( itau ),
1194 $ work( iwork ), lwork-iwork+1, ierr )
1195*
1196* Copy R to WORK(IU), zeroing out below it
1197*
1198 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1199 $ ldwrku )
1200 CALL dlaset( 'L', n-1, n-1, zero, zero,
1201 $ work( iu+1 ), ldwrku )
1202*
1203* Generate Q in A
1204* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1205*
1206 CALL dorgqr( m, n, n, a, lda, work( itau ),
1207 $ work( iwork ), lwork-iwork+1, ierr )
1208 ie = itau
1209 itauq = ie + n
1210 itaup = itauq + n
1211 iwork = itaup + n
1212*
1213* Bidiagonalize R in WORK(IU), copying result to
1214* WORK(IR)
1215* (Workspace: need 2*N*N + 4*N,
1216* prefer 2*N*N+3*N+2*N*NB)
1217*
1218 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1219 $ work( ie ), work( itauq ),
1220 $ work( itaup ), work( iwork ),
1221 $ lwork-iwork+1, ierr )
1222 CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1223 $ work( ir ), ldwrkr )
1224*
1225* Generate left bidiagonalizing vectors in WORK(IU)
1226* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1227*
1228 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1229 $ work( itauq ), work( iwork ),
1230 $ lwork-iwork+1, ierr )
1231*
1232* Generate right bidiagonalizing vectors in WORK(IR)
1233* (Workspace: need 2*N*N + 4*N-1,
1234* prefer 2*N*N+3*N+(N-1)*NB)
1235*
1236 CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1237 $ work( itaup ), work( iwork ),
1238 $ lwork-iwork+1, ierr )
1239 iwork = ie + n
1240*
1241* Perform bidiagonal QR iteration, computing left
1242* singular vectors of R in WORK(IU) and computing
1243* right singular vectors of R in WORK(IR)
1244* (Workspace: need 2*N*N + BDSPAC)
1245*
1246 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1247 $ work( ir ), ldwrkr, work( iu ),
1248 $ ldwrku, dum, 1, work( iwork ), info )
1249*
1250* Multiply Q in A by left singular vectors of R in
1251* WORK(IU), storing result in U
1252* (Workspace: need N*N)
1253*
1254 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1255 $ work( iu ), ldwrku, zero, u, ldu )
1256*
1257* Copy right singular vectors of R to A
1258* (Workspace: need N*N)
1259*
1260 CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1261 $ lda )
1262*
1263 ELSE
1264*
1265* Insufficient workspace for a fast algorithm
1266*
1267 itau = 1
1268 iwork = itau + n
1269*
1270* Compute A=Q*R, copying result to U
1271* (Workspace: need 2*N, prefer N + N*NB)
1272*
1273 CALL dgeqrf( m, n, a, lda, work( itau ),
1274 $ work( iwork ), lwork-iwork+1, ierr )
1275 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1276*
1277* Generate Q in U
1278* (Workspace: need 2*N, prefer N + N*NB)
1279*
1280 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1281 $ work( iwork ), lwork-iwork+1, ierr )
1282 ie = itau
1283 itauq = ie + n
1284 itaup = itauq + n
1285 iwork = itaup + n
1286*
1287* Zero out below R in A
1288*
1289 IF( n .GT. 1 ) THEN
1290 CALL dlaset( 'L', n-1, n-1, zero, zero,
1291 $ a( 2, 1 ), lda )
1292 END IF
1293*
1294* Bidiagonalize R in A
1295* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1296*
1297 CALL dgebrd( n, n, a, lda, s, work( ie ),
1298 $ work( itauq ), work( itaup ),
1299 $ work( iwork ), lwork-iwork+1, ierr )
1300*
1301* Multiply Q in U by left vectors bidiagonalizing R
1302* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1303*
1304 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1305 $ work( itauq ), u, ldu, work( iwork ),
1306 $ lwork-iwork+1, ierr )
1307*
1308* Generate right vectors bidiagonalizing R in A
1309* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1310*
1311 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1312 $ work( iwork ), lwork-iwork+1, ierr )
1313 iwork = ie + n
1314*
1315* Perform bidiagonal QR iteration, computing left
1316* singular vectors of A in U and computing right
1317* singular vectors of A in A
1318* (Workspace: need BDSPAC)
1319*
1320 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1321 $ lda, u, ldu, dum, 1, work( iwork ),
1322 $ info )
1323*
1324 END IF
1325*
1326 ELSE IF( wntvas ) THEN
1327*
1328* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1329* or 'A')
1330* N left singular vectors to be computed in U and
1331* N right singular vectors to be computed in VT
1332*
1333 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1334*
1335* Sufficient workspace for a fast algorithm
1336*
1337 iu = 1
1338 IF( lwork.GE.wrkbl+lda*n ) THEN
1339*
1340* WORK(IU) is LDA by N
1341*
1342 ldwrku = lda
1343 ELSE
1344*
1345* WORK(IU) is N by N
1346*
1347 ldwrku = n
1348 END IF
1349 itau = iu + ldwrku*n
1350 iwork = itau + n
1351*
1352* Compute A=Q*R
1353* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1354*
1355 CALL dgeqrf( m, n, a, lda, work( itau ),
1356 $ work( iwork ), lwork-iwork+1, ierr )
1357*
1358* Copy R to WORK(IU), zeroing out below it
1359*
1360 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1361 $ ldwrku )
1362 CALL dlaset( 'L', n-1, n-1, zero, zero,
1363 $ work( iu+1 ), ldwrku )
1364*
1365* Generate Q in A
1366* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1367*
1368 CALL dorgqr( m, n, n, a, lda, work( itau ),
1369 $ work( iwork ), lwork-iwork+1, ierr )
1370 ie = itau
1371 itauq = ie + n
1372 itaup = itauq + n
1373 iwork = itaup + n
1374*
1375* Bidiagonalize R in WORK(IU), copying result to VT
1376* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1377*
1378 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1379 $ work( ie ), work( itauq ),
1380 $ work( itaup ), work( iwork ),
1381 $ lwork-iwork+1, ierr )
1382 CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1383 $ ldvt )
1384*
1385* Generate left bidiagonalizing vectors in WORK(IU)
1386* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1387*
1388 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1389 $ work( itauq ), work( iwork ),
1390 $ lwork-iwork+1, ierr )
1391*
1392* Generate right bidiagonalizing vectors in VT
1393* (Workspace: need N*N + 4*N-1,
1394* prefer N*N+3*N+(N-1)*NB)
1395*
1396 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1397 $ work( iwork ), lwork-iwork+1, ierr )
1398 iwork = ie + n
1399*
1400* Perform bidiagonal QR iteration, computing left
1401* singular vectors of R in WORK(IU) and computing
1402* right singular vectors of R in VT
1403* (Workspace: need N*N + BDSPAC)
1404*
1405 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1406 $ ldvt, work( iu ), ldwrku, dum, 1,
1407 $ work( iwork ), info )
1408*
1409* Multiply Q in A by left singular vectors of R in
1410* WORK(IU), storing result in U
1411* (Workspace: need N*N)
1412*
1413 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1414 $ work( iu ), ldwrku, zero, u, ldu )
1415*
1416 ELSE
1417*
1418* Insufficient workspace for a fast algorithm
1419*
1420 itau = 1
1421 iwork = itau + n
1422*
1423* Compute A=Q*R, copying result to U
1424* (Workspace: need 2*N, prefer N + N*NB)
1425*
1426 CALL dgeqrf( m, n, a, lda, work( itau ),
1427 $ work( iwork ), lwork-iwork+1, ierr )
1428 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1429*
1430* Generate Q in U
1431* (Workspace: need 2*N, prefer N + N*NB)
1432*
1433 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1434 $ work( iwork ), lwork-iwork+1, ierr )
1435*
1436* Copy R to VT, zeroing out below it
1437*
1438 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1439 IF( n.GT.1 )
1440 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1441 $ vt( 2, 1 ), ldvt )
1442 ie = itau
1443 itauq = ie + n
1444 itaup = itauq + n
1445 iwork = itaup + n
1446*
1447* Bidiagonalize R in VT
1448* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1449*
1450 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1451 $ work( itauq ), work( itaup ),
1452 $ work( iwork ), lwork-iwork+1, ierr )
1453*
1454* Multiply Q in U by left bidiagonalizing vectors
1455* in VT
1456* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1457*
1458 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1459 $ work( itauq ), u, ldu, work( iwork ),
1460 $ lwork-iwork+1, ierr )
1461*
1462* Generate right bidiagonalizing vectors in VT
1463* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1464*
1465 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1466 $ work( iwork ), lwork-iwork+1, ierr )
1467 iwork = ie + n
1468*
1469* Perform bidiagonal QR iteration, computing left
1470* singular vectors of A in U and computing right
1471* singular vectors of A in VT
1472* (Workspace: need BDSPAC)
1473*
1474 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1475 $ ldvt, u, ldu, dum, 1, work( iwork ),
1476 $ info )
1477*
1478 END IF
1479*
1480 END IF
1481*
1482 ELSE IF( wntua ) THEN
1483*
1484 IF( wntvn ) THEN
1485*
1486* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1487* M left singular vectors to be computed in U and
1488* no right singular vectors to be computed
1489*
1490 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1491*
1492* Sufficient workspace for a fast algorithm
1493*
1494 ir = 1
1495 IF( lwork.GE.wrkbl+lda*n ) THEN
1496*
1497* WORK(IR) is LDA by N
1498*
1499 ldwrkr = lda
1500 ELSE
1501*
1502* WORK(IR) is N by N
1503*
1504 ldwrkr = n
1505 END IF
1506 itau = ir + ldwrkr*n
1507 iwork = itau + n
1508*
1509* Compute A=Q*R, copying result to U
1510* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1511*
1512 CALL dgeqrf( m, n, a, lda, work( itau ),
1513 $ work( iwork ), lwork-iwork+1, ierr )
1514 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1515*
1516* Copy R to WORK(IR), zeroing out below it
1517*
1518 CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1519 $ ldwrkr )
1520 CALL dlaset( 'L', n-1, n-1, zero, zero,
1521 $ work( ir+1 ), ldwrkr )
1522*
1523* Generate Q in U
1524* (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1525*
1526 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1527 $ work( iwork ), lwork-iwork+1, ierr )
1528 ie = itau
1529 itauq = ie + n
1530 itaup = itauq + n
1531 iwork = itaup + n
1532*
1533* Bidiagonalize R in WORK(IR)
1534* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1535*
1536 CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1537 $ work( ie ), work( itauq ),
1538 $ work( itaup ), work( iwork ),
1539 $ lwork-iwork+1, ierr )
1540*
1541* Generate left bidiagonalizing vectors in WORK(IR)
1542* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1543*
1544 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1545 $ work( itauq ), work( iwork ),
1546 $ lwork-iwork+1, ierr )
1547 iwork = ie + n
1548*
1549* Perform bidiagonal QR iteration, computing left
1550* singular vectors of R in WORK(IR)
1551* (Workspace: need N*N + BDSPAC)
1552*
1553 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1554 $ 1, work( ir ), ldwrkr, dum, 1,
1555 $ work( iwork ), info )
1556*
1557* Multiply Q in U by left singular vectors of R in
1558* WORK(IR), storing result in A
1559* (Workspace: need N*N)
1560*
1561 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1562 $ work( ir ), ldwrkr, zero, a, lda )
1563*
1564* Copy left singular vectors of A from A to U
1565*
1566 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1567*
1568 ELSE
1569*
1570* Insufficient workspace for a fast algorithm
1571*
1572 itau = 1
1573 iwork = itau + n
1574*
1575* Compute A=Q*R, copying result to U
1576* (Workspace: need 2*N, prefer N + N*NB)
1577*
1578 CALL dgeqrf( m, n, a, lda, work( itau ),
1579 $ work( iwork ), lwork-iwork+1, ierr )
1580 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1581*
1582* Generate Q in U
1583* (Workspace: need N + M, prefer N + M*NB)
1584*
1585 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1586 $ work( iwork ), lwork-iwork+1, ierr )
1587 ie = itau
1588 itauq = ie + n
1589 itaup = itauq + n
1590 iwork = itaup + n
1591*
1592* Zero out below R in A
1593*
1594 IF( n .GT. 1 ) THEN
1595 CALL dlaset( 'L', n-1, n-1, zero, zero,
1596 $ a( 2, 1 ), lda )
1597 END IF
1598*
1599* Bidiagonalize R in A
1600* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1601*
1602 CALL dgebrd( n, n, a, lda, s, work( ie ),
1603 $ work( itauq ), work( itaup ),
1604 $ work( iwork ), lwork-iwork+1, ierr )
1605*
1606* Multiply Q in U by left bidiagonalizing vectors
1607* in A
1608* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1609*
1610 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1611 $ work( itauq ), u, ldu, work( iwork ),
1612 $ lwork-iwork+1, ierr )
1613 iwork = ie + n
1614*
1615* Perform bidiagonal QR iteration, computing left
1616* singular vectors of A in U
1617* (Workspace: need BDSPAC)
1618*
1619 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1620 $ 1, u, ldu, dum, 1, work( iwork ),
1621 $ info )
1622*
1623 END IF
1624*
1625 ELSE IF( wntvo ) THEN
1626*
1627* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1628* M left singular vectors to be computed in U and
1629* N right singular vectors to be overwritten on A
1630*
1631 IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1632*
1633* Sufficient workspace for a fast algorithm
1634*
1635 iu = 1
1636 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1637*
1638* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1639*
1640 ldwrku = lda
1641 ir = iu + ldwrku*n
1642 ldwrkr = lda
1643 ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1644*
1645* WORK(IU) is LDA by N and WORK(IR) is N by N
1646*
1647 ldwrku = lda
1648 ir = iu + ldwrku*n
1649 ldwrkr = n
1650 ELSE
1651*
1652* WORK(IU) is N by N and WORK(IR) is N by N
1653*
1654 ldwrku = n
1655 ir = iu + ldwrku*n
1656 ldwrkr = n
1657 END IF
1658 itau = ir + ldwrkr*n
1659 iwork = itau + n
1660*
1661* Compute A=Q*R, copying result to U
1662* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1663*
1664 CALL dgeqrf( m, n, a, lda, work( itau ),
1665 $ work( iwork ), lwork-iwork+1, ierr )
1666 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1667*
1668* Generate Q in U
1669* (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB)
1670*
1671 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1672 $ work( iwork ), lwork-iwork+1, ierr )
1673*
1674* Copy R to WORK(IU), zeroing out below it
1675*
1676 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1677 $ ldwrku )
1678 CALL dlaset( 'L', n-1, n-1, zero, zero,
1679 $ work( iu+1 ), ldwrku )
1680 ie = itau
1681 itauq = ie + n
1682 itaup = itauq + n
1683 iwork = itaup + n
1684*
1685* Bidiagonalize R in WORK(IU), copying result to
1686* WORK(IR)
1687* (Workspace: need 2*N*N + 4*N,
1688* prefer 2*N*N+3*N+2*N*NB)
1689*
1690 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1691 $ work( ie ), work( itauq ),
1692 $ work( itaup ), work( iwork ),
1693 $ lwork-iwork+1, ierr )
1694 CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1695 $ work( ir ), ldwrkr )
1696*
1697* Generate left bidiagonalizing vectors in WORK(IU)
1698* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1699*
1700 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1701 $ work( itauq ), work( iwork ),
1702 $ lwork-iwork+1, ierr )
1703*
1704* Generate right bidiagonalizing vectors in WORK(IR)
1705* (Workspace: need 2*N*N + 4*N-1,
1706* prefer 2*N*N+3*N+(N-1)*NB)
1707*
1708 CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1709 $ work( itaup ), work( iwork ),
1710 $ lwork-iwork+1, ierr )
1711 iwork = ie + n
1712*
1713* Perform bidiagonal QR iteration, computing left
1714* singular vectors of R in WORK(IU) and computing
1715* right singular vectors of R in WORK(IR)
1716* (Workspace: need 2*N*N + BDSPAC)
1717*
1718 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1719 $ work( ir ), ldwrkr, work( iu ),
1720 $ ldwrku, dum, 1, work( iwork ), info )
1721*
1722* Multiply Q in U by left singular vectors of R in
1723* WORK(IU), storing result in A
1724* (Workspace: need N*N)
1725*
1726 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1727 $ work( iu ), ldwrku, zero, a, lda )
1728*
1729* Copy left singular vectors of A from A to U
1730*
1731 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1732*
1733* Copy right singular vectors of R from WORK(IR) to A
1734*
1735 CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1736 $ lda )
1737*
1738 ELSE
1739*
1740* Insufficient workspace for a fast algorithm
1741*
1742 itau = 1
1743 iwork = itau + n
1744*
1745* Compute A=Q*R, copying result to U
1746* (Workspace: need 2*N, prefer N + N*NB)
1747*
1748 CALL dgeqrf( m, n, a, lda, work( itau ),
1749 $ work( iwork ), lwork-iwork+1, ierr )
1750 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1751*
1752* Generate Q in U
1753* (Workspace: need N + M, prefer N + M*NB)
1754*
1755 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1756 $ work( iwork ), lwork-iwork+1, ierr )
1757 ie = itau
1758 itauq = ie + n
1759 itaup = itauq + n
1760 iwork = itaup + n
1761*
1762* Zero out below R in A
1763*
1764 IF( n .GT. 1 ) THEN
1765 CALL dlaset( 'L', n-1, n-1, zero, zero,
1766 $ a( 2, 1 ), lda )
1767 END IF
1768*
1769* Bidiagonalize R in A
1770* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1771*
1772 CALL dgebrd( n, n, a, lda, s, work( ie ),
1773 $ work( itauq ), work( itaup ),
1774 $ work( iwork ), lwork-iwork+1, ierr )
1775*
1776* Multiply Q in U by left bidiagonalizing vectors
1777* in A
1778* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1779*
1780 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1781 $ work( itauq ), u, ldu, work( iwork ),
1782 $ lwork-iwork+1, ierr )
1783*
1784* Generate right bidiagonalizing vectors in A
1785* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1786*
1787 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1788 $ work( iwork ), lwork-iwork+1, ierr )
1789 iwork = ie + n
1790*
1791* Perform bidiagonal QR iteration, computing left
1792* singular vectors of A in U and computing right
1793* singular vectors of A in A
1794* (Workspace: need BDSPAC)
1795*
1796 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1797 $ lda, u, ldu, dum, 1, work( iwork ),
1798 $ info )
1799*
1800 END IF
1801*
1802 ELSE IF( wntvas ) THEN
1803*
1804* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1805* or 'A')
1806* M left singular vectors to be computed in U and
1807* N right singular vectors to be computed in VT
1808*
1809 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1810*
1811* Sufficient workspace for a fast algorithm
1812*
1813 iu = 1
1814 IF( lwork.GE.wrkbl+lda*n ) THEN
1815*
1816* WORK(IU) is LDA by N
1817*
1818 ldwrku = lda
1819 ELSE
1820*
1821* WORK(IU) is N by N
1822*
1823 ldwrku = n
1824 END IF
1825 itau = iu + ldwrku*n
1826 iwork = itau + n
1827*
1828* Compute A=Q*R, copying result to U
1829* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1830*
1831 CALL dgeqrf( m, n, a, lda, work( itau ),
1832 $ work( iwork ), lwork-iwork+1, ierr )
1833 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1834*
1835* Generate Q in U
1836* (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1837*
1838 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1839 $ work( iwork ), lwork-iwork+1, ierr )
1840*
1841* Copy R to WORK(IU), zeroing out below it
1842*
1843 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1844 $ ldwrku )
1845 CALL dlaset( 'L', n-1, n-1, zero, zero,
1846 $ work( iu+1 ), ldwrku )
1847 ie = itau
1848 itauq = ie + n
1849 itaup = itauq + n
1850 iwork = itaup + n
1851*
1852* Bidiagonalize R in WORK(IU), copying result to VT
1853* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1854*
1855 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1856 $ work( ie ), work( itauq ),
1857 $ work( itaup ), work( iwork ),
1858 $ lwork-iwork+1, ierr )
1859 CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1860 $ ldvt )
1861*
1862* Generate left bidiagonalizing vectors in WORK(IU)
1863* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1864*
1865 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1866 $ work( itauq ), work( iwork ),
1867 $ lwork-iwork+1, ierr )
1868*
1869* Generate right bidiagonalizing vectors in VT
1870* (Workspace: need N*N + 4*N-1,
1871* prefer N*N+3*N+(N-1)*NB)
1872*
1873 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1874 $ work( iwork ), lwork-iwork+1, ierr )
1875 iwork = ie + n
1876*
1877* Perform bidiagonal QR iteration, computing left
1878* singular vectors of R in WORK(IU) and computing
1879* right singular vectors of R in VT
1880* (Workspace: need N*N + BDSPAC)
1881*
1882 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1883 $ ldvt, work( iu ), ldwrku, dum, 1,
1884 $ work( iwork ), info )
1885*
1886* Multiply Q in U by left singular vectors of R in
1887* WORK(IU), storing result in A
1888* (Workspace: need N*N)
1889*
1890 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1891 $ work( iu ), ldwrku, zero, a, lda )
1892*
1893* Copy left singular vectors of A from A to U
1894*
1895 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1896*
1897 ELSE
1898*
1899* Insufficient workspace for a fast algorithm
1900*
1901 itau = 1
1902 iwork = itau + n
1903*
1904* Compute A=Q*R, copying result to U
1905* (Workspace: need 2*N, prefer N + N*NB)
1906*
1907 CALL dgeqrf( m, n, a, lda, work( itau ),
1908 $ work( iwork ), lwork-iwork+1, ierr )
1909 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1910*
1911* Generate Q in U
1912* (Workspace: need N + M, prefer N + M*NB)
1913*
1914 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1915 $ work( iwork ), lwork-iwork+1, ierr )
1916*
1917* Copy R from A to VT, zeroing out below it
1918*
1919 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1920 IF( n.GT.1 )
1921 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1922 $ vt( 2, 1 ), ldvt )
1923 ie = itau
1924 itauq = ie + n
1925 itaup = itauq + n
1926 iwork = itaup + n
1927*
1928* Bidiagonalize R in VT
1929* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1930*
1931 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1932 $ work( itauq ), work( itaup ),
1933 $ work( iwork ), lwork-iwork+1, ierr )
1934*
1935* Multiply Q in U by left bidiagonalizing vectors
1936* in VT
1937* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1938*
1939 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1940 $ work( itauq ), u, ldu, work( iwork ),
1941 $ lwork-iwork+1, ierr )
1942*
1943* Generate right bidiagonalizing vectors in VT
1944* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1945*
1946 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1947 $ work( iwork ), lwork-iwork+1, ierr )
1948 iwork = ie + n
1949*
1950* Perform bidiagonal QR iteration, computing left
1951* singular vectors of A in U and computing right
1952* singular vectors of A in VT
1953* (Workspace: need BDSPAC)
1954*
1955 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1956 $ ldvt, u, ldu, dum, 1, work( iwork ),
1957 $ info )
1958*
1959 END IF
1960*
1961 END IF
1962*
1963 END IF
1964*
1965 ELSE
1966*
1967* M .LT. MNTHR
1968*
1969* Path 10 (M at least N, but not much larger)
1970* Reduce to bidiagonal form without QR decomposition
1971*
1972 ie = 1
1973 itauq = ie + n
1974 itaup = itauq + n
1975 iwork = itaup + n
1976*
1977* Bidiagonalize A
1978* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
1979*
1980 CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1981 $ work( itaup ), work( iwork ), lwork-iwork+1,
1982 $ ierr )
1983 IF( wntuas ) THEN
1984*
1985* If left singular vectors desired in U, copy result to U
1986* and generate left bidiagonalizing vectors in U
1987* (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB)
1988*
1989 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1990 IF( wntus )
1991 $ ncu = n
1992 IF( wntua )
1993 $ ncu = m
1994 CALL dorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1995 $ work( iwork ), lwork-iwork+1, ierr )
1996 END IF
1997 IF( wntvas ) THEN
1998*
1999* If right singular vectors desired in VT, copy result to
2000* VT and generate right bidiagonalizing vectors in VT
2001* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2002*
2003 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
2004 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2005 $ work( iwork ), lwork-iwork+1, ierr )
2006 END IF
2007 IF( wntuo ) THEN
2008*
2009* If left singular vectors desired in A, generate left
2010* bidiagonalizing vectors in A
2011* (Workspace: need 4*N, prefer 3*N + N*NB)
2012*
2013 CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2014 $ work( iwork ), lwork-iwork+1, ierr )
2015 END IF
2016 IF( wntvo ) THEN
2017*
2018* If right singular vectors desired in A, generate right
2019* bidiagonalizing vectors in A
2020* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2021*
2022 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
2023 $ work( iwork ), lwork-iwork+1, ierr )
2024 END IF
2025 iwork = ie + n
2026 IF( wntuas .OR. wntuo )
2027 $ nru = m
2028 IF( wntun )
2029 $ nru = 0
2030 IF( wntvas .OR. wntvo )
2031 $ ncvt = n
2032 IF( wntvn )
2033 $ ncvt = 0
2034 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2035*
2036* Perform bidiagonal QR iteration, if desired, computing
2037* left singular vectors in U and computing right singular
2038* vectors in VT
2039* (Workspace: need BDSPAC)
2040*
2041 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2042 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2043 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2044*
2045* Perform bidiagonal QR iteration, if desired, computing
2046* left singular vectors in U and computing right singular
2047* vectors in A
2048* (Workspace: need BDSPAC)
2049*
2050 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2051 $ u, ldu, dum, 1, work( iwork ), info )
2052 ELSE
2053*
2054* Perform bidiagonal QR iteration, if desired, computing
2055* left singular vectors in A and computing right singular
2056* vectors in VT
2057* (Workspace: need BDSPAC)
2058*
2059 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2060 $ ldvt, a, lda, dum, 1, work( iwork ), info )
2061 END IF
2062*
2063 END IF
2064*
2065 ELSE
2066*
2067* A has more columns than rows. If A has sufficiently more
2068* columns than rows, first reduce using the LQ decomposition (if
2069* sufficient workspace available)
2070*
2071 IF( n.GE.mnthr ) THEN
2072*
2073 IF( wntvn ) THEN
2074*
2075* Path 1t(N much larger than M, JOBVT='N')
2076* No right singular vectors to be computed
2077*
2078 itau = 1
2079 iwork = itau + m
2080*
2081* Compute A=L*Q
2082* (Workspace: need 2*M, prefer M + M*NB)
2083*
2084 CALL dgelqf( m, n, a, lda, work( itau ), work( iwork ),
2085 $ lwork-iwork+1, ierr )
2086*
2087* Zero out above L
2088*
2089 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2090 ie = 1
2091 itauq = ie + m
2092 itaup = itauq + m
2093 iwork = itaup + m
2094*
2095* Bidiagonalize L in A
2096* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2097*
2098 CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2099 $ work( itaup ), work( iwork ), lwork-iwork+1,
2100 $ ierr )
2101 IF( wntuo .OR. wntuas ) THEN
2102*
2103* If left singular vectors desired, generate Q
2104* (Workspace: need 4*M, prefer 3*M + M*NB)
2105*
2106 CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2107 $ work( iwork ), lwork-iwork+1, ierr )
2108 END IF
2109 iwork = ie + m
2110 nru = 0
2111 IF( wntuo .OR. wntuas )
2112 $ nru = m
2113*
2114* Perform bidiagonal QR iteration, computing left singular
2115* vectors of A in A if desired
2116* (Workspace: need BDSPAC)
2117*
2118 CALL dbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2119 $ lda, dum, 1, work( iwork ), info )
2120*
2121* If left singular vectors desired in U, copy them there
2122*
2123 IF( wntuas )
2124 $ CALL dlacpy( 'F', m, m, a, lda, u, ldu )
2125*
2126 ELSE IF( wntvo .AND. wntun ) THEN
2127*
2128* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2129* M right singular vectors to be overwritten on A and
2130* no left singular vectors to be computed
2131*
2132 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2133*
2134* Sufficient workspace for a fast algorithm
2135*
2136 ir = 1
2137 IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2138*
2139* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2140*
2141 ldwrku = lda
2142 chunk = n
2143 ldwrkr = lda
2144 ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2145*
2146* WORK(IU) is LDA by N and WORK(IR) is M by M
2147*
2148 ldwrku = lda
2149 chunk = n
2150 ldwrkr = m
2151 ELSE
2152*
2153* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2154*
2155 ldwrku = m
2156 chunk = ( lwork-m*m-m ) / m
2157 ldwrkr = m
2158 END IF
2159 itau = ir + ldwrkr*m
2160 iwork = itau + m
2161*
2162* Compute A=L*Q
2163* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2164*
2165 CALL dgelqf( m, n, a, lda, work( itau ),
2166 $ work( iwork ), lwork-iwork+1, ierr )
2167*
2168* Copy L to WORK(IR) and zero out above it
2169*
2170 CALL dlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2171 CALL dlaset( 'U', m-1, m-1, zero, zero,
2172 $ work( ir+ldwrkr ), ldwrkr )
2173*
2174* Generate Q in A
2175* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2176*
2177 CALL dorglq( m, n, m, a, lda, work( itau ),
2178 $ work( iwork ), lwork-iwork+1, ierr )
2179 ie = itau
2180 itauq = ie + m
2181 itaup = itauq + m
2182 iwork = itaup + m
2183*
2184* Bidiagonalize L in WORK(IR)
2185* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2186*
2187 CALL dgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2188 $ work( itauq ), work( itaup ),
2189 $ work( iwork ), lwork-iwork+1, ierr )
2190*
2191* Generate right vectors bidiagonalizing L
2192* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2193*
2194 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2195 $ work( itaup ), work( iwork ),
2196 $ lwork-iwork+1, ierr )
2197 iwork = ie + m
2198*
2199* Perform bidiagonal QR iteration, computing right
2200* singular vectors of L in WORK(IR)
2201* (Workspace: need M*M + BDSPAC)
2202*
2203 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2204 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2205 $ work( iwork ), info )
2206 iu = ie + m
2207*
2208* Multiply right singular vectors of L in WORK(IR) by Q
2209* in A, storing result in WORK(IU) and copying to A
2210* (Workspace: need M*M + 2*M, prefer M*M + M*N + M)
2211*
2212 DO 30 i = 1, n, chunk
2213 blk = min( n-i+1, chunk )
2214 CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2215 $ ldwrkr, a( 1, i ), lda, zero,
2216 $ work( iu ), ldwrku )
2217 CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2218 $ a( 1, i ), lda )
2219 30 CONTINUE
2220*
2221 ELSE
2222*
2223* Insufficient workspace for a fast algorithm
2224*
2225 ie = 1
2226 itauq = ie + m
2227 itaup = itauq + m
2228 iwork = itaup + m
2229*
2230* Bidiagonalize A
2231* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
2232*
2233 CALL dgebrd( m, n, a, lda, s, work( ie ),
2234 $ work( itauq ), work( itaup ),
2235 $ work( iwork ), lwork-iwork+1, ierr )
2236*
2237* Generate right vectors bidiagonalizing A
2238* (Workspace: need 4*M, prefer 3*M + M*NB)
2239*
2240 CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
2241 $ work( iwork ), lwork-iwork+1, ierr )
2242 iwork = ie + m
2243*
2244* Perform bidiagonal QR iteration, computing right
2245* singular vectors of A in A
2246* (Workspace: need BDSPAC)
2247*
2248 CALL dbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2249 $ dum, 1, dum, 1, work( iwork ), info )
2250*
2251 END IF
2252*
2253 ELSE IF( wntvo .AND. wntuas ) THEN
2254*
2255* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2256* M right singular vectors to be overwritten on A and
2257* M left singular vectors to be computed in U
2258*
2259 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2260*
2261* Sufficient workspace for a fast algorithm
2262*
2263 ir = 1
2264 IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2265*
2266* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2267*
2268 ldwrku = lda
2269 chunk = n
2270 ldwrkr = lda
2271 ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2272*
2273* WORK(IU) is LDA by N and WORK(IR) is M by M
2274*
2275 ldwrku = lda
2276 chunk = n
2277 ldwrkr = m
2278 ELSE
2279*
2280* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2281*
2282 ldwrku = m
2283 chunk = ( lwork-m*m-m ) / m
2284 ldwrkr = m
2285 END IF
2286 itau = ir + ldwrkr*m
2287 iwork = itau + m
2288*
2289* Compute A=L*Q
2290* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2291*
2292 CALL dgelqf( m, n, a, lda, work( itau ),
2293 $ work( iwork ), lwork-iwork+1, ierr )
2294*
2295* Copy L to U, zeroing about above it
2296*
2297 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2298 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2299 $ ldu )
2300*
2301* Generate Q in A
2302* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2303*
2304 CALL dorglq( m, n, m, a, lda, work( itau ),
2305 $ work( iwork ), lwork-iwork+1, ierr )
2306 ie = itau
2307 itauq = ie + m
2308 itaup = itauq + m
2309 iwork = itaup + m
2310*
2311* Bidiagonalize L in U, copying result to WORK(IR)
2312* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2313*
2314 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2315 $ work( itauq ), work( itaup ),
2316 $ work( iwork ), lwork-iwork+1, ierr )
2317 CALL dlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2318*
2319* Generate right vectors bidiagonalizing L in WORK(IR)
2320* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2321*
2322 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2323 $ work( itaup ), work( iwork ),
2324 $ lwork-iwork+1, ierr )
2325*
2326* Generate left vectors bidiagonalizing L in U
2327* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2328*
2329 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2330 $ work( iwork ), lwork-iwork+1, ierr )
2331 iwork = ie + m
2332*
2333* Perform bidiagonal QR iteration, computing left
2334* singular vectors of L in U, and computing right
2335* singular vectors of L in WORK(IR)
2336* (Workspace: need M*M + BDSPAC)
2337*
2338 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2339 $ work( ir ), ldwrkr, u, ldu, dum, 1,
2340 $ work( iwork ), info )
2341 iu = ie + m
2342*
2343* Multiply right singular vectors of L in WORK(IR) by Q
2344* in A, storing result in WORK(IU) and copying to A
2345* (Workspace: need M*M + 2*M, prefer M*M + M*N + M))
2346*
2347 DO 40 i = 1, n, chunk
2348 blk = min( n-i+1, chunk )
2349 CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2350 $ ldwrkr, a( 1, i ), lda, zero,
2351 $ work( iu ), ldwrku )
2352 CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2353 $ a( 1, i ), lda )
2354 40 CONTINUE
2355*
2356 ELSE
2357*
2358* Insufficient workspace for a fast algorithm
2359*
2360 itau = 1
2361 iwork = itau + m
2362*
2363* Compute A=L*Q
2364* (Workspace: need 2*M, prefer M + M*NB)
2365*
2366 CALL dgelqf( m, n, a, lda, work( itau ),
2367 $ work( iwork ), lwork-iwork+1, ierr )
2368*
2369* Copy L to U, zeroing out above it
2370*
2371 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2372 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2373 $ ldu )
2374*
2375* Generate Q in A
2376* (Workspace: need 2*M, prefer M + M*NB)
2377*
2378 CALL dorglq( m, n, m, a, lda, work( itau ),
2379 $ work( iwork ), lwork-iwork+1, ierr )
2380 ie = itau
2381 itauq = ie + m
2382 itaup = itauq + m
2383 iwork = itaup + m
2384*
2385* Bidiagonalize L in U
2386* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2387*
2388 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2389 $ work( itauq ), work( itaup ),
2390 $ work( iwork ), lwork-iwork+1, ierr )
2391*
2392* Multiply right vectors bidiagonalizing L by Q in A
2393* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2394*
2395 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2396 $ work( itaup ), a, lda, work( iwork ),
2397 $ lwork-iwork+1, ierr )
2398*
2399* Generate left vectors bidiagonalizing L in U
2400* (Workspace: need 4*M, prefer 3*M + M*NB)
2401*
2402 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2403 $ work( iwork ), lwork-iwork+1, ierr )
2404 iwork = ie + m
2405*
2406* Perform bidiagonal QR iteration, computing left
2407* singular vectors of A in U and computing right
2408* singular vectors of A in A
2409* (Workspace: need BDSPAC)
2410*
2411 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2412 $ u, ldu, dum, 1, work( iwork ), info )
2413*
2414 END IF
2415*
2416 ELSE IF( wntvs ) THEN
2417*
2418 IF( wntun ) THEN
2419*
2420* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2421* M right singular vectors to be computed in VT and
2422* no left singular vectors to be computed
2423*
2424 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2425*
2426* Sufficient workspace for a fast algorithm
2427*
2428 ir = 1
2429 IF( lwork.GE.wrkbl+lda*m ) THEN
2430*
2431* WORK(IR) is LDA by M
2432*
2433 ldwrkr = lda
2434 ELSE
2435*
2436* WORK(IR) is M by M
2437*
2438 ldwrkr = m
2439 END IF
2440 itau = ir + ldwrkr*m
2441 iwork = itau + m
2442*
2443* Compute A=L*Q
2444* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2445*
2446 CALL dgelqf( m, n, a, lda, work( itau ),
2447 $ work( iwork ), lwork-iwork+1, ierr )
2448*
2449* Copy L to WORK(IR), zeroing out above it
2450*
2451 CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2452 $ ldwrkr )
2453 CALL dlaset( 'U', m-1, m-1, zero, zero,
2454 $ work( ir+ldwrkr ), ldwrkr )
2455*
2456* Generate Q in A
2457* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2458*
2459 CALL dorglq( m, n, m, a, lda, work( itau ),
2460 $ work( iwork ), lwork-iwork+1, ierr )
2461 ie = itau
2462 itauq = ie + m
2463 itaup = itauq + m
2464 iwork = itaup + m
2465*
2466* Bidiagonalize L in WORK(IR)
2467* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2468*
2469 CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2470 $ work( ie ), work( itauq ),
2471 $ work( itaup ), work( iwork ),
2472 $ lwork-iwork+1, ierr )
2473*
2474* Generate right vectors bidiagonalizing L in
2475* WORK(IR)
2476* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
2477*
2478 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2479 $ work( itaup ), work( iwork ),
2480 $ lwork-iwork+1, ierr )
2481 iwork = ie + m
2482*
2483* Perform bidiagonal QR iteration, computing right
2484* singular vectors of L in WORK(IR)
2485* (Workspace: need M*M + BDSPAC)
2486*
2487 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2488 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2489 $ work( iwork ), info )
2490*
2491* Multiply right singular vectors of L in WORK(IR) by
2492* Q in A, storing result in VT
2493* (Workspace: need M*M)
2494*
2495 CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2496 $ ldwrkr, a, lda, zero, vt, ldvt )
2497*
2498 ELSE
2499*
2500* Insufficient workspace for a fast algorithm
2501*
2502 itau = 1
2503 iwork = itau + m
2504*
2505* Compute A=L*Q
2506* (Workspace: need 2*M, prefer M + M*NB)
2507*
2508 CALL dgelqf( m, n, a, lda, work( itau ),
2509 $ work( iwork ), lwork-iwork+1, ierr )
2510*
2511* Copy result to VT
2512*
2513 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2514*
2515* Generate Q in VT
2516* (Workspace: need 2*M, prefer M + M*NB)
2517*
2518 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2519 $ work( iwork ), lwork-iwork+1, ierr )
2520 ie = itau
2521 itauq = ie + m
2522 itaup = itauq + m
2523 iwork = itaup + m
2524*
2525* Zero out above L in A
2526*
2527 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2528 $ lda )
2529*
2530* Bidiagonalize L in A
2531* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2532*
2533 CALL dgebrd( m, m, a, lda, s, work( ie ),
2534 $ work( itauq ), work( itaup ),
2535 $ work( iwork ), lwork-iwork+1, ierr )
2536*
2537* Multiply right vectors bidiagonalizing L by Q in VT
2538* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2539*
2540 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2541 $ work( itaup ), vt, ldvt,
2542 $ work( iwork ), lwork-iwork+1, ierr )
2543 iwork = ie + m
2544*
2545* Perform bidiagonal QR iteration, computing right
2546* singular vectors of A in VT
2547* (Workspace: need BDSPAC)
2548*
2549 CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2550 $ ldvt, dum, 1, dum, 1, work( iwork ),
2551 $ info )
2552*
2553 END IF
2554*
2555 ELSE IF( wntuo ) THEN
2556*
2557* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2558* M right singular vectors to be computed in VT and
2559* M left singular vectors to be overwritten on A
2560*
2561 IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2562*
2563* Sufficient workspace for a fast algorithm
2564*
2565 iu = 1
2566 IF( lwork.GE.wrkbl+2*lda*m ) THEN
2567*
2568* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2569*
2570 ldwrku = lda
2571 ir = iu + ldwrku*m
2572 ldwrkr = lda
2573 ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
2574*
2575* WORK(IU) is LDA by M and WORK(IR) is M by M
2576*
2577 ldwrku = lda
2578 ir = iu + ldwrku*m
2579 ldwrkr = m
2580 ELSE
2581*
2582* WORK(IU) is M by M and WORK(IR) is M by M
2583*
2584 ldwrku = m
2585 ir = iu + ldwrku*m
2586 ldwrkr = m
2587 END IF
2588 itau = ir + ldwrkr*m
2589 iwork = itau + m
2590*
2591* Compute A=L*Q
2592* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2593*
2594 CALL dgelqf( m, n, a, lda, work( itau ),
2595 $ work( iwork ), lwork-iwork+1, ierr )
2596*
2597* Copy L to WORK(IU), zeroing out below it
2598*
2599 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2600 $ ldwrku )
2601 CALL dlaset( 'U', m-1, m-1, zero, zero,
2602 $ work( iu+ldwrku ), ldwrku )
2603*
2604* Generate Q in A
2605* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2606*
2607 CALL dorglq( m, n, m, a, lda, work( itau ),
2608 $ work( iwork ), lwork-iwork+1, ierr )
2609 ie = itau
2610 itauq = ie + m
2611 itaup = itauq + m
2612 iwork = itaup + m
2613*
2614* Bidiagonalize L in WORK(IU), copying result to
2615* WORK(IR)
2616* (Workspace: need 2*M*M + 4*M,
2617* prefer 2*M*M+3*M+2*M*NB)
2618*
2619 CALL dgebrd( m, m, work( iu ), ldwrku, s,
2620 $ work( ie ), work( itauq ),
2621 $ work( itaup ), work( iwork ),
2622 $ lwork-iwork+1, ierr )
2623 CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
2624 $ work( ir ), ldwrkr )
2625*
2626* Generate right bidiagonalizing vectors in WORK(IU)
2627* (Workspace: need 2*M*M + 4*M-1,
2628* prefer 2*M*M+3*M+(M-1)*NB)
2629*
2630 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2631 $ work( itaup ), work( iwork ),
2632 $ lwork-iwork+1, ierr )
2633*
2634* Generate left bidiagonalizing vectors in WORK(IR)
2635* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
2636*
2637 CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2638 $ work( itauq ), work( iwork ),
2639 $ lwork-iwork+1, ierr )
2640 iwork = ie + m
2641*
2642* Perform bidiagonal QR iteration, computing left
2643* singular vectors of L in WORK(IR) and computing
2644* right singular vectors of L in WORK(IU)
2645* (Workspace: need 2*M*M + BDSPAC)
2646*
2647 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2648 $ work( iu ), ldwrku, work( ir ),
2649 $ ldwrkr, dum, 1, work( iwork ), info )
2650*
2651* Multiply right singular vectors of L in WORK(IU) by
2652* Q in A, storing result in VT
2653* (Workspace: need M*M)
2654*
2655 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2656 $ ldwrku, a, lda, zero, vt, ldvt )
2657*
2658* Copy left singular vectors of L to A
2659* (Workspace: need M*M)
2660*
2661 CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2662 $ lda )
2663*
2664 ELSE
2665*
2666* Insufficient workspace for a fast algorithm
2667*
2668 itau = 1
2669 iwork = itau + m
2670*
2671* Compute A=L*Q, copying result to VT
2672* (Workspace: need 2*M, prefer M + M*NB)
2673*
2674 CALL dgelqf( m, n, a, lda, work( itau ),
2675 $ work( iwork ), lwork-iwork+1, ierr )
2676 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2677*
2678* Generate Q in VT
2679* (Workspace: need 2*M, prefer M + M*NB)
2680*
2681 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2682 $ work( iwork ), lwork-iwork+1, ierr )
2683 ie = itau
2684 itauq = ie + m
2685 itaup = itauq + m
2686 iwork = itaup + m
2687*
2688* Zero out above L in A
2689*
2690 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2691 $ lda )
2692*
2693* Bidiagonalize L in A
2694* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2695*
2696 CALL dgebrd( m, m, a, lda, s, work( ie ),
2697 $ work( itauq ), work( itaup ),
2698 $ work( iwork ), lwork-iwork+1, ierr )
2699*
2700* Multiply right vectors bidiagonalizing L by Q in VT
2701* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2702*
2703 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2704 $ work( itaup ), vt, ldvt,
2705 $ work( iwork ), lwork-iwork+1, ierr )
2706*
2707* Generate left bidiagonalizing vectors of L in A
2708* (Workspace: need 4*M, prefer 3*M + M*NB)
2709*
2710 CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2711 $ work( iwork ), lwork-iwork+1, ierr )
2712 iwork = ie + m
2713*
2714* Perform bidiagonal QR iteration, compute left
2715* singular vectors of A in A and compute right
2716* singular vectors of A in VT
2717* (Workspace: need BDSPAC)
2718*
2719 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2720 $ ldvt, a, lda, dum, 1, work( iwork ),
2721 $ info )
2722*
2723 END IF
2724*
2725 ELSE IF( wntuas ) THEN
2726*
2727* Path 6t(N much larger than M, JOBU='S' or 'A',
2728* JOBVT='S')
2729* M right singular vectors to be computed in VT and
2730* M left singular vectors to be computed in U
2731*
2732 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2733*
2734* Sufficient workspace for a fast algorithm
2735*
2736 iu = 1
2737 IF( lwork.GE.wrkbl+lda*m ) THEN
2738*
2739* WORK(IU) is LDA by N
2740*
2741 ldwrku = lda
2742 ELSE
2743*
2744* WORK(IU) is LDA by M
2745*
2746 ldwrku = m
2747 END IF
2748 itau = iu + ldwrku*m
2749 iwork = itau + m
2750*
2751* Compute A=L*Q
2752* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2753*
2754 CALL dgelqf( m, n, a, lda, work( itau ),
2755 $ work( iwork ), lwork-iwork+1, ierr )
2756*
2757* Copy L to WORK(IU), zeroing out above it
2758*
2759 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2760 $ ldwrku )
2761 CALL dlaset( 'U', m-1, m-1, zero, zero,
2762 $ work( iu+ldwrku ), ldwrku )
2763*
2764* Generate Q in A
2765* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2766*
2767 CALL dorglq( m, n, m, a, lda, work( itau ),
2768 $ work( iwork ), lwork-iwork+1, ierr )
2769 ie = itau
2770 itauq = ie + m
2771 itaup = itauq + m
2772 iwork = itaup + m
2773*
2774* Bidiagonalize L in WORK(IU), copying result to U
2775* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2776*
2777 CALL dgebrd( m, m, work( iu ), ldwrku, s,
2778 $ work( ie ), work( itauq ),
2779 $ work( itaup ), work( iwork ),
2780 $ lwork-iwork+1, ierr )
2781 CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
2782 $ ldu )
2783*
2784* Generate right bidiagonalizing vectors in WORK(IU)
2785* (Workspace: need M*M + 4*M-1,
2786* prefer M*M+3*M+(M-1)*NB)
2787*
2788 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2789 $ work( itaup ), work( iwork ),
2790 $ lwork-iwork+1, ierr )
2791*
2792* Generate left bidiagonalizing vectors in U
2793* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2794*
2795 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2796 $ work( iwork ), lwork-iwork+1, ierr )
2797 iwork = ie + m
2798*
2799* Perform bidiagonal QR iteration, computing left
2800* singular vectors of L in U and computing right
2801* singular vectors of L in WORK(IU)
2802* (Workspace: need M*M + BDSPAC)
2803*
2804 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2805 $ work( iu ), ldwrku, u, ldu, dum, 1,
2806 $ work( iwork ), info )
2807*
2808* Multiply right singular vectors of L in WORK(IU) by
2809* Q in A, storing result in VT
2810* (Workspace: need M*M)
2811*
2812 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2813 $ ldwrku, a, lda, zero, vt, ldvt )
2814*
2815 ELSE
2816*
2817* Insufficient workspace for a fast algorithm
2818*
2819 itau = 1
2820 iwork = itau + m
2821*
2822* Compute A=L*Q, copying result to VT
2823* (Workspace: need 2*M, prefer M + M*NB)
2824*
2825 CALL dgelqf( m, n, a, lda, work( itau ),
2826 $ work( iwork ), lwork-iwork+1, ierr )
2827 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2828*
2829* Generate Q in VT
2830* (Workspace: need 2*M, prefer M + M*NB)
2831*
2832 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2833 $ work( iwork ), lwork-iwork+1, ierr )
2834*
2835* Copy L to U, zeroing out above it
2836*
2837 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2838 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2839 $ ldu )
2840 ie = itau
2841 itauq = ie + m
2842 itaup = itauq + m
2843 iwork = itaup + m
2844*
2845* Bidiagonalize L in U
2846* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2847*
2848 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2849 $ work( itauq ), work( itaup ),
2850 $ work( iwork ), lwork-iwork+1, ierr )
2851*
2852* Multiply right bidiagonalizing vectors in U by Q
2853* in VT
2854* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2855*
2856 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2857 $ work( itaup ), vt, ldvt,
2858 $ work( iwork ), lwork-iwork+1, ierr )
2859*
2860* Generate left bidiagonalizing vectors in U
2861* (Workspace: need 4*M, prefer 3*M + M*NB)
2862*
2863 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2864 $ work( iwork ), lwork-iwork+1, ierr )
2865 iwork = ie + m
2866*
2867* Perform bidiagonal QR iteration, computing left
2868* singular vectors of A in U and computing right
2869* singular vectors of A in VT
2870* (Workspace: need BDSPAC)
2871*
2872 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2873 $ ldvt, u, ldu, dum, 1, work( iwork ),
2874 $ info )
2875*
2876 END IF
2877*
2878 END IF
2879*
2880 ELSE IF( wntva ) THEN
2881*
2882 IF( wntun ) THEN
2883*
2884* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2885* N right singular vectors to be computed in VT and
2886* no left singular vectors to be computed
2887*
2888 IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
2889*
2890* Sufficient workspace for a fast algorithm
2891*
2892 ir = 1
2893 IF( lwork.GE.wrkbl+lda*m ) THEN
2894*
2895* WORK(IR) is LDA by M
2896*
2897 ldwrkr = lda
2898 ELSE
2899*
2900* WORK(IR) is M by M
2901*
2902 ldwrkr = m
2903 END IF
2904 itau = ir + ldwrkr*m
2905 iwork = itau + m
2906*
2907* Compute A=L*Q, copying result to VT
2908* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2909*
2910 CALL dgelqf( m, n, a, lda, work( itau ),
2911 $ work( iwork ), lwork-iwork+1, ierr )
2912 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2913*
2914* Copy L to WORK(IR), zeroing out above it
2915*
2916 CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2917 $ ldwrkr )
2918 CALL dlaset( 'U', m-1, m-1, zero, zero,
2919 $ work( ir+ldwrkr ), ldwrkr )
2920*
2921* Generate Q in VT
2922* (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
2923*
2924 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2925 $ work( iwork ), lwork-iwork+1, ierr )
2926 ie = itau
2927 itauq = ie + m
2928 itaup = itauq + m
2929 iwork = itaup + m
2930*
2931* Bidiagonalize L in WORK(IR)
2932* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2933*
2934 CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2935 $ work( ie ), work( itauq ),
2936 $ work( itaup ), work( iwork ),
2937 $ lwork-iwork+1, ierr )
2938*
2939* Generate right bidiagonalizing vectors in WORK(IR)
2940* (Workspace: need M*M + 4*M-1,
2941* prefer M*M+3*M+(M-1)*NB)
2942*
2943 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2944 $ work( itaup ), work( iwork ),
2945 $ lwork-iwork+1, ierr )
2946 iwork = ie + m
2947*
2948* Perform bidiagonal QR iteration, computing right
2949* singular vectors of L in WORK(IR)
2950* (Workspace: need M*M + BDSPAC)
2951*
2952 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2953 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2954 $ work( iwork ), info )
2955*
2956* Multiply right singular vectors of L in WORK(IR) by
2957* Q in VT, storing result in A
2958* (Workspace: need M*M)
2959*
2960 CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2961 $ ldwrkr, vt, ldvt, zero, a, lda )
2962*
2963* Copy right singular vectors of A from A to VT
2964*
2965 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
2966*
2967 ELSE
2968*
2969* Insufficient workspace for a fast algorithm
2970*
2971 itau = 1
2972 iwork = itau + m
2973*
2974* Compute A=L*Q, copying result to VT
2975* (Workspace: need 2*M, prefer M + M*NB)
2976*
2977 CALL dgelqf( m, n, a, lda, work( itau ),
2978 $ work( iwork ), lwork-iwork+1, ierr )
2979 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2980*
2981* Generate Q in VT
2982* (Workspace: need M + N, prefer M + N*NB)
2983*
2984 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2985 $ work( iwork ), lwork-iwork+1, ierr )
2986 ie = itau
2987 itauq = ie + m
2988 itaup = itauq + m
2989 iwork = itaup + m
2990*
2991* Zero out above L in A
2992*
2993 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2994 $ lda )
2995*
2996* Bidiagonalize L in A
2997* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2998*
2999 CALL dgebrd( m, m, a, lda, s, work( ie ),
3000 $ work( itauq ), work( itaup ),
3001 $ work( iwork ), lwork-iwork+1, ierr )
3002*
3003* Multiply right bidiagonalizing vectors in A by Q
3004* in VT
3005* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3006*
3007 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3008 $ work( itaup ), vt, ldvt,
3009 $ work( iwork ), lwork-iwork+1, ierr )
3010 iwork = ie + m
3011*
3012* Perform bidiagonal QR iteration, computing right
3013* singular vectors of A in VT
3014* (Workspace: need BDSPAC)
3015*
3016 CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3017 $ ldvt, dum, 1, dum, 1, work( iwork ),
3018 $ info )
3019*
3020 END IF
3021*
3022 ELSE IF( wntuo ) THEN
3023*
3024* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3025* N right singular vectors to be computed in VT and
3026* M left singular vectors to be overwritten on A
3027*
3028 IF( lwork.GE.2*m*m+max( n + m, 4*m, bdspac ) ) THEN
3029*
3030* Sufficient workspace for a fast algorithm
3031*
3032 iu = 1
3033 IF( lwork.GE.wrkbl+2*lda*m ) THEN
3034*
3035* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3036*
3037 ldwrku = lda
3038 ir = iu + ldwrku*m
3039 ldwrkr = lda
3040 ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
3041*
3042* WORK(IU) is LDA by M and WORK(IR) is M by M
3043*
3044 ldwrku = lda
3045 ir = iu + ldwrku*m
3046 ldwrkr = m
3047 ELSE
3048*
3049* WORK(IU) is M by M and WORK(IR) is M by M
3050*
3051 ldwrku = m
3052 ir = iu + ldwrku*m
3053 ldwrkr = m
3054 END IF
3055 itau = ir + ldwrkr*m
3056 iwork = itau + m
3057*
3058* Compute A=L*Q, copying result to VT
3059* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
3060*
3061 CALL dgelqf( m, n, a, lda, work( itau ),
3062 $ work( iwork ), lwork-iwork+1, ierr )
3063 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3064*
3065* Generate Q in VT
3066* (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB)
3067*
3068 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3069 $ work( iwork ), lwork-iwork+1, ierr )
3070*
3071* Copy L to WORK(IU), zeroing out above it
3072*
3073 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3074 $ ldwrku )
3075 CALL dlaset( 'U', m-1, m-1, zero, zero,
3076 $ work( iu+ldwrku ), ldwrku )
3077 ie = itau
3078 itauq = ie + m
3079 itaup = itauq + m
3080 iwork = itaup + m
3081*
3082* Bidiagonalize L in WORK(IU), copying result to
3083* WORK(IR)
3084* (Workspace: need 2*M*M + 4*M,
3085* prefer 2*M*M+3*M+2*M*NB)
3086*
3087 CALL dgebrd( m, m, work( iu ), ldwrku, s,
3088 $ work( ie ), work( itauq ),
3089 $ work( itaup ), work( iwork ),
3090 $ lwork-iwork+1, ierr )
3091 CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
3092 $ work( ir ), ldwrkr )
3093*
3094* Generate right bidiagonalizing vectors in WORK(IU)
3095* (Workspace: need 2*M*M + 4*M-1,
3096* prefer 2*M*M+3*M+(M-1)*NB)
3097*
3098 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3099 $ work( itaup ), work( iwork ),
3100 $ lwork-iwork+1, ierr )
3101*
3102* Generate left bidiagonalizing vectors in WORK(IR)
3103* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
3104*
3105 CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3106 $ work( itauq ), work( iwork ),
3107 $ lwork-iwork+1, ierr )
3108 iwork = ie + m
3109*
3110* Perform bidiagonal QR iteration, computing left
3111* singular vectors of L in WORK(IR) and computing
3112* right singular vectors of L in WORK(IU)
3113* (Workspace: need 2*M*M + BDSPAC)
3114*
3115 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3116 $ work( iu ), ldwrku, work( ir ),
3117 $ ldwrkr, dum, 1, work( iwork ), info )
3118*
3119* Multiply right singular vectors of L in WORK(IU) by
3120* Q in VT, storing result in A
3121* (Workspace: need M*M)
3122*
3123 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3124 $ ldwrku, vt, ldvt, zero, a, lda )
3125*
3126* Copy right singular vectors of A from A to VT
3127*
3128 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3129*
3130* Copy left singular vectors of A from WORK(IR) to A
3131*
3132 CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3133 $ lda )
3134*
3135 ELSE
3136*
3137* Insufficient workspace for a fast algorithm
3138*
3139 itau = 1
3140 iwork = itau + m
3141*
3142* Compute A=L*Q, copying result to VT
3143* (Workspace: need 2*M, prefer M + M*NB)
3144*
3145 CALL dgelqf( m, n, a, lda, work( itau ),
3146 $ work( iwork ), lwork-iwork+1, ierr )
3147 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3148*
3149* Generate Q in VT
3150* (Workspace: need M + N, prefer M + N*NB)
3151*
3152 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3153 $ work( iwork ), lwork-iwork+1, ierr )
3154 ie = itau
3155 itauq = ie + m
3156 itaup = itauq + m
3157 iwork = itaup + m
3158*
3159* Zero out above L in A
3160*
3161 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3162 $ lda )
3163*
3164* Bidiagonalize L in A
3165* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3166*
3167 CALL dgebrd( m, m, a, lda, s, work( ie ),
3168 $ work( itauq ), work( itaup ),
3169 $ work( iwork ), lwork-iwork+1, ierr )
3170*
3171* Multiply right bidiagonalizing vectors in A by Q
3172* in VT
3173* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3174*
3175 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3176 $ work( itaup ), vt, ldvt,
3177 $ work( iwork ), lwork-iwork+1, ierr )
3178*
3179* Generate left bidiagonalizing vectors in A
3180* (Workspace: need 4*M, prefer 3*M + M*NB)
3181*
3182 CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3183 $ work( iwork ), lwork-iwork+1, ierr )
3184 iwork = ie + m
3185*
3186* Perform bidiagonal QR iteration, computing left
3187* singular vectors of A in A and computing right
3188* singular vectors of A in VT
3189* (Workspace: need BDSPAC)
3190*
3191 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3192 $ ldvt, a, lda, dum, 1, work( iwork ),
3193 $ info )
3194*
3195 END IF
3196*
3197 ELSE IF( wntuas ) THEN
3198*
3199* Path 9t(N much larger than M, JOBU='S' or 'A',
3200* JOBVT='A')
3201* N right singular vectors to be computed in VT and
3202* M left singular vectors to be computed in U
3203*
3204 IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
3205*
3206* Sufficient workspace for a fast algorithm
3207*
3208 iu = 1
3209 IF( lwork.GE.wrkbl+lda*m ) THEN
3210*
3211* WORK(IU) is LDA by M
3212*
3213 ldwrku = lda
3214 ELSE
3215*
3216* WORK(IU) is M by M
3217*
3218 ldwrku = m
3219 END IF
3220 itau = iu + ldwrku*m
3221 iwork = itau + m
3222*
3223* Compute A=L*Q, copying result to VT
3224* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
3225*
3226 CALL dgelqf( m, n, a, lda, work( itau ),
3227 $ work( iwork ), lwork-iwork+1, ierr )
3228 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3229*
3230* Generate Q in VT
3231* (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
3232*
3233 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3234 $ work( iwork ), lwork-iwork+1, ierr )
3235*
3236* Copy L to WORK(IU), zeroing out above it
3237*
3238 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3239 $ ldwrku )
3240 CALL dlaset( 'U', m-1, m-1, zero, zero,
3241 $ work( iu+ldwrku ), ldwrku )
3242 ie = itau
3243 itauq = ie + m
3244 itaup = itauq + m
3245 iwork = itaup + m
3246*
3247* Bidiagonalize L in WORK(IU), copying result to U
3248* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
3249*
3250 CALL dgebrd( m, m, work( iu ), ldwrku, s,
3251 $ work( ie ), work( itauq ),
3252 $ work( itaup ), work( iwork ),
3253 $ lwork-iwork+1, ierr )
3254 CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
3255 $ ldu )
3256*
3257* Generate right bidiagonalizing vectors in WORK(IU)
3258* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
3259*
3260 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3261 $ work( itaup ), work( iwork ),
3262 $ lwork-iwork+1, ierr )
3263*
3264* Generate left bidiagonalizing vectors in U
3265* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
3266*
3267 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3268 $ work( iwork ), lwork-iwork+1, ierr )
3269 iwork = ie + m
3270*
3271* Perform bidiagonal QR iteration, computing left
3272* singular vectors of L in U and computing right
3273* singular vectors of L in WORK(IU)
3274* (Workspace: need M*M + BDSPAC)
3275*
3276 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3277 $ work( iu ), ldwrku, u, ldu, dum, 1,
3278 $ work( iwork ), info )
3279*
3280* Multiply right singular vectors of L in WORK(IU) by
3281* Q in VT, storing result in A
3282* (Workspace: need M*M)
3283*
3284 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3285 $ ldwrku, vt, ldvt, zero, a, lda )
3286*
3287* Copy right singular vectors of A from A to VT
3288*
3289 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3290*
3291 ELSE
3292*
3293* Insufficient workspace for a fast algorithm
3294*
3295 itau = 1
3296 iwork = itau + m
3297*
3298* Compute A=L*Q, copying result to VT
3299* (Workspace: need 2*M, prefer M + M*NB)
3300*
3301 CALL dgelqf( m, n, a, lda, work( itau ),
3302 $ work( iwork ), lwork-iwork+1, ierr )
3303 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3304*
3305* Generate Q in VT
3306* (Workspace: need M + N, prefer M + N*NB)
3307*
3308 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3309 $ work( iwork ), lwork-iwork+1, ierr )
3310*
3311* Copy L to U, zeroing out above it
3312*
3313 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3314 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3315 $ ldu )
3316 ie = itau
3317 itauq = ie + m
3318 itaup = itauq + m
3319 iwork = itaup + m
3320*
3321* Bidiagonalize L in U
3322* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3323*
3324 CALL dgebrd( m, m, u, ldu, s, work( ie ),
3325 $ work( itauq ), work( itaup ),
3326 $ work( iwork ), lwork-iwork+1, ierr )
3327*
3328* Multiply right bidiagonalizing vectors in U by Q
3329* in VT
3330* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3331*
3332 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3333 $ work( itaup ), vt, ldvt,
3334 $ work( iwork ), lwork-iwork+1, ierr )
3335*
3336* Generate left bidiagonalizing vectors in U
3337* (Workspace: need 4*M, prefer 3*M + M*NB)
3338*
3339 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3340 $ work( iwork ), lwork-iwork+1, ierr )
3341 iwork = ie + m
3342*
3343* Perform bidiagonal QR iteration, computing left
3344* singular vectors of A in U and computing right
3345* singular vectors of A in VT
3346* (Workspace: need BDSPAC)
3347*
3348 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3349 $ ldvt, u, ldu, dum, 1, work( iwork ),
3350 $ info )
3351*
3352 END IF
3353*
3354 END IF
3355*
3356 END IF
3357*
3358 ELSE
3359*
3360* N .LT. MNTHR
3361*
3362* Path 10t(N greater than M, but not much larger)
3363* Reduce to bidiagonal form without LQ decomposition
3364*
3365 ie = 1
3366 itauq = ie + m
3367 itaup = itauq + m
3368 iwork = itaup + m
3369*
3370* Bidiagonalize A
3371* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
3372*
3373 CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3374 $ work( itaup ), work( iwork ), lwork-iwork+1,
3375 $ ierr )
3376 IF( wntuas ) THEN
3377*
3378* If left singular vectors desired in U, copy result to U
3379* and generate left bidiagonalizing vectors in U
3380* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3381*
3382 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3383 CALL dorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3384 $ work( iwork ), lwork-iwork+1, ierr )
3385 END IF
3386 IF( wntvas ) THEN
3387*
3388* If right singular vectors desired in VT, copy result to
3389* VT and generate right bidiagonalizing vectors in VT
3390* (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB)
3391*
3392 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3393 IF( wntva )
3394 $ nrvt = n
3395 IF( wntvs )
3396 $ nrvt = m
3397 CALL dorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3398 $ work( iwork ), lwork-iwork+1, ierr )
3399 END IF
3400 IF( wntuo ) THEN
3401*
3402* If left singular vectors desired in A, generate left
3403* bidiagonalizing vectors in A
3404* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3405*
3406 CALL dorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3407 $ work( iwork ), lwork-iwork+1, ierr )
3408 END IF
3409 IF( wntvo ) THEN
3410*
3411* If right singular vectors desired in A, generate right
3412* bidiagonalizing vectors in A
3413* (Workspace: need 4*M, prefer 3*M + M*NB)
3414*
3415 CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
3416 $ work( iwork ), lwork-iwork+1, ierr )
3417 END IF
3418 iwork = ie + m
3419 IF( wntuas .OR. wntuo )
3420 $ nru = m
3421 IF( wntun )
3422 $ nru = 0
3423 IF( wntvas .OR. wntvo )
3424 $ ncvt = n
3425 IF( wntvn )
3426 $ ncvt = 0
3427 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3428*
3429* Perform bidiagonal QR iteration, if desired, computing
3430* left singular vectors in U and computing right singular
3431* vectors in VT
3432* (Workspace: need BDSPAC)
3433*
3434 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3435 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3436 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3437*
3438* Perform bidiagonal QR iteration, if desired, computing
3439* left singular vectors in U and computing right singular
3440* vectors in A
3441* (Workspace: need BDSPAC)
3442*
3443 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3444 $ u, ldu, dum, 1, work( iwork ), info )
3445 ELSE
3446*
3447* Perform bidiagonal QR iteration, if desired, computing
3448* left singular vectors in A and computing right singular
3449* vectors in VT
3450* (Workspace: need BDSPAC)
3451*
3452 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3453 $ ldvt, a, lda, dum, 1, work( iwork ), info )
3454 END IF
3455*
3456 END IF
3457*
3458 END IF
3459*
3460* If DBDSQR failed to converge, copy unconverged superdiagonals
3461* to WORK( 2:MINMN )
3462*
3463 IF( info.NE.0 ) THEN
3464 IF( ie.GT.2 ) THEN
3465 DO 50 i = 1, minmn - 1
3466 work( i+1 ) = work( i+ie-1 )
3467 50 CONTINUE
3468 END IF
3469 IF( ie.LT.2 ) THEN
3470 DO 60 i = minmn - 1, 1, -1
3471 work( i+1 ) = work( i+ie-1 )
3472 60 CONTINUE
3473 END IF
3474 END IF
3475*
3476* Undo scaling if necessary
3477*
3478 IF( iscl.EQ.1 ) THEN
3479 IF( anrm.GT.bignum )
3480 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3481 $ ierr )
3482 IF( info.NE.0 .AND. anrm.GT.bignum )
3483 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3484 $ minmn, ierr )
3485 IF( anrm.LT.smlnum )
3486 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3487 $ ierr )
3488 IF( info.NE.0 .AND. anrm.LT.smlnum )
3489 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3490 $ minmn, ierr )
3491 END IF
3492*
3493* Return optimal workspace in WORK(1)
3494*
3495 work( 1 ) = maxwrk
3496*
3497 RETURN
3498*
3499* End of DGESVD
3500*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine dbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, work, info)
DBDSQR
Definition dbdsqr.f:241
subroutine dgebrd(m, n, a, lda, d, e, tauq, taup, work, lwork, info)
DGEBRD
Definition dgebrd.f:205
subroutine dgelqf(m, n, a, lda, tau, work, lwork, info)
DGELQF
Definition dgelqf.f:143
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:188
subroutine dgeqrf(m, n, a, lda, tau, work, lwork, info)
DGEQRF
Definition dgeqrf.f:146
integer function ilaenv(ispec, name, opts, n1, n2, n3, n4)
ILAENV
Definition ilaenv.f:162
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:103
double precision function dlamch(cmach)
DLAMCH
Definition dlamch.f:69
double precision function dlange(norm, m, n, a, lda, work)
DLANGE returns the value of the 1-norm, Frobenius norm, infinity-norm, or the largest absolute value ...
Definition dlange.f:114
subroutine dlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition dlascl.f:143
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:110
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
subroutine dorgbr(vect, m, n, k, a, lda, tau, work, lwork, info)
DORGBR
Definition dorgbr.f:157
subroutine dorglq(m, n, k, a, lda, tau, work, lwork, info)
DORGLQ
Definition dorglq.f:127
subroutine dorgqr(m, n, k, a, lda, tau, work, lwork, info)
DORGQR
Definition dorgqr.f:128
subroutine dormbr(vect, side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
DORMBR
Definition dormbr.f:195
Here is the call graph for this function:
Here is the caller graph for this function: