LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
dgesvd.f
Go to the documentation of this file.
1*> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download DGESVD + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22* WORK, LWORK, INFO )
23*
24* .. Scalar Arguments ..
25* CHARACTER JOBU, JOBVT
26* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27* ..
28* .. Array Arguments ..
29* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
30* $ VT( LDVT, * ), WORK( * )
31* ..
32*
33*
34*> \par Purpose:
35* =============
36*>
37*> \verbatim
38*>
39*> DGESVD computes the singular value decomposition (SVD) of a real
40*> M-by-N matrix A, optionally computing the left and/or right singular
41*> vectors. The SVD is written
42*>
43*> A = U * SIGMA * transpose(V)
44*>
45*> where SIGMA is an M-by-N matrix which is zero except for its
46*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
48*> are the singular values of A; they are real and non-negative, and
49*> are returned in descending order. The first min(m,n) columns of
50*> U and V are the left and right singular vectors of A.
51*>
52*> Note that the routine returns V**T, not V.
53*> \endverbatim
54*
55* Arguments:
56* ==========
57*
58*> \param[in] JOBU
59*> \verbatim
60*> JOBU is CHARACTER*1
61*> Specifies options for computing all or part of the matrix U:
62*> = 'A': all M columns of U are returned in array U:
63*> = 'S': the first min(m,n) columns of U (the left singular
64*> vectors) are returned in the array U;
65*> = 'O': the first min(m,n) columns of U (the left singular
66*> vectors) are overwritten on the array A;
67*> = 'N': no columns of U (no left singular vectors) are
68*> computed.
69*> \endverbatim
70*>
71*> \param[in] JOBVT
72*> \verbatim
73*> JOBVT is CHARACTER*1
74*> Specifies options for computing all or part of the matrix
75*> V**T:
76*> = 'A': all N rows of V**T are returned in the array VT;
77*> = 'S': the first min(m,n) rows of V**T (the right singular
78*> vectors) are returned in the array VT;
79*> = 'O': the first min(m,n) rows of V**T (the right singular
80*> vectors) are overwritten on the array A;
81*> = 'N': no rows of V**T (no right singular vectors) are
82*> computed.
83*>
84*> JOBVT and JOBU cannot both be 'O'.
85*> \endverbatim
86*>
87*> \param[in] M
88*> \verbatim
89*> M is INTEGER
90*> The number of rows of the input matrix A. M >= 0.
91*> \endverbatim
92*>
93*> \param[in] N
94*> \verbatim
95*> N is INTEGER
96*> The number of columns of the input matrix A. N >= 0.
97*> \endverbatim
98*>
99*> \param[in,out] A
100*> \verbatim
101*> A is DOUBLE PRECISION array, dimension (LDA,N)
102*> On entry, the M-by-N matrix A.
103*> On exit,
104*> if JOBU = 'O', A is overwritten with the first min(m,n)
105*> columns of U (the left singular vectors,
106*> stored columnwise);
107*> if JOBVT = 'O', A is overwritten with the first min(m,n)
108*> rows of V**T (the right singular vectors,
109*> stored rowwise);
110*> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111*> are destroyed.
112*> \endverbatim
113*>
114*> \param[in] LDA
115*> \verbatim
116*> LDA is INTEGER
117*> The leading dimension of the array A. LDA >= max(1,M).
118*> \endverbatim
119*>
120*> \param[out] S
121*> \verbatim
122*> S is DOUBLE PRECISION array, dimension (min(M,N))
123*> The singular values of A, sorted so that S(i) >= S(i+1).
124*> \endverbatim
125*>
126*> \param[out] U
127*> \verbatim
128*> U is DOUBLE PRECISION array, dimension (LDU,UCOL)
129*> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130*> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131*> if JOBU = 'S', U contains the first min(m,n) columns of U
132*> (the left singular vectors, stored columnwise);
133*> if JOBU = 'N' or 'O', U is not referenced.
134*> \endverbatim
135*>
136*> \param[in] LDU
137*> \verbatim
138*> LDU is INTEGER
139*> The leading dimension of the array U. LDU >= 1; if
140*> JOBU = 'S' or 'A', LDU >= M.
141*> \endverbatim
142*>
143*> \param[out] VT
144*> \verbatim
145*> VT is DOUBLE PRECISION array, dimension (LDVT,N)
146*> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147*> V**T;
148*> if JOBVT = 'S', VT contains the first min(m,n) rows of
149*> V**T (the right singular vectors, stored rowwise);
150*> if JOBVT = 'N' or 'O', VT is not referenced.
151*> \endverbatim
152*>
153*> \param[in] LDVT
154*> \verbatim
155*> LDVT is INTEGER
156*> The leading dimension of the array VT. LDVT >= 1; if
157*> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158*> \endverbatim
159*>
160*> \param[out] WORK
161*> \verbatim
162*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
163*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164*> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165*> superdiagonal elements of an upper bidiagonal matrix B
166*> whose diagonal is in S (not necessarily sorted). B
167*> satisfies A = U * B * VT, so it has the same singular values
168*> as A, and singular vectors related by U and VT.
169*> \endverbatim
170*>
171*> \param[in] LWORK
172*> \verbatim
173*> LWORK is INTEGER
174*> The dimension of the array WORK.
175*> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176*> - PATH 1 (M much larger than N, JOBU='N')
177*> - PATH 1t (N much larger than M, JOBVT='N')
178*> LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths
179*> For good performance, LWORK should generally be larger.
180*>
181*> If LWORK = -1, then a workspace query is assumed; the routine
182*> only calculates the optimal size of the WORK array, returns
183*> this value as the first entry of the WORK array, and no error
184*> message related to LWORK is issued by XERBLA.
185*> \endverbatim
186*>
187*> \param[out] INFO
188*> \verbatim
189*> INFO is INTEGER
190*> = 0: successful exit.
191*> < 0: if INFO = -i, the i-th argument had an illegal value.
192*> > 0: if DBDSQR did not converge, INFO specifies how many
193*> superdiagonals of an intermediate bidiagonal form B
194*> did not converge to zero. See the description of WORK
195*> above for details.
196*> \endverbatim
197*
198* Authors:
199* ========
200*
201*> \author Univ. of Tennessee
202*> \author Univ. of California Berkeley
203*> \author Univ. of Colorado Denver
204*> \author NAG Ltd.
205*
206*> \ingroup doubleGEsing
207*
208* =====================================================================
209 SUBROUTINE dgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
210 $ VT, LDVT, WORK, LWORK, INFO )
211*
212* -- LAPACK driver routine --
213* -- LAPACK is a software package provided by Univ. of Tennessee, --
214* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
215*
216* .. Scalar Arguments ..
217 CHARACTER JOBU, JOBVT
218 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
219* ..
220* .. Array Arguments ..
221 DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
222 $ vt( ldvt, * ), work( * )
223* ..
224*
225* =====================================================================
226*
227* .. Parameters ..
228 DOUBLE PRECISION ZERO, ONE
229 parameter( zero = 0.0d0, one = 1.0d0 )
230* ..
231* .. Local Scalars ..
232 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
233 $ wntva, wntvas, wntvn, wntvo, wntvs
234 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
235 $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
236 $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
237 $ nrvt, wrkbl
238 INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
239 $ lwork_dgebrd, lwork_dorgbr_p, lwork_dorgbr_q,
240 $ lwork_dgelqf, lwork_dorglq_n, lwork_dorglq_m
241 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
242* ..
243* .. Local Arrays ..
244 DOUBLE PRECISION DUM( 1 )
245* ..
246* .. External Subroutines ..
247 EXTERNAL dbdsqr, dgebrd, dgelqf, dgemm, dgeqrf, dlacpy,
249 $ xerbla
250* ..
251* .. External Functions ..
252 LOGICAL LSAME
253 INTEGER ILAENV
254 DOUBLE PRECISION DLAMCH, DLANGE
255 EXTERNAL lsame, ilaenv, dlamch, dlange
256* ..
257* .. Intrinsic Functions ..
258 INTRINSIC max, min, sqrt
259* ..
260* .. Executable Statements ..
261*
262* Test the input arguments
263*
264 info = 0
265 minmn = min( m, n )
266 wntua = lsame( jobu, 'A' )
267 wntus = lsame( jobu, 'S' )
268 wntuas = wntua .OR. wntus
269 wntuo = lsame( jobu, 'O' )
270 wntun = lsame( jobu, 'N' )
271 wntva = lsame( jobvt, 'A' )
272 wntvs = lsame( jobvt, 'S' )
273 wntvas = wntva .OR. wntvs
274 wntvo = lsame( jobvt, 'O' )
275 wntvn = lsame( jobvt, 'N' )
276 lquery = ( lwork.EQ.-1 )
277*
278 IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
279 info = -1
280 ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
281 $ ( wntvo .AND. wntuo ) ) THEN
282 info = -2
283 ELSE IF( m.LT.0 ) THEN
284 info = -3
285 ELSE IF( n.LT.0 ) THEN
286 info = -4
287 ELSE IF( lda.LT.max( 1, m ) ) THEN
288 info = -6
289 ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
290 info = -9
291 ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
292 $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
293 info = -11
294 END IF
295*
296* Compute workspace
297* (Note: Comments in the code beginning "Workspace:" describe the
298* minimal amount of workspace needed at that point in the code,
299* as well as the preferred amount for good performance.
300* NB refers to the optimal block size for the immediately
301* following subroutine, as returned by ILAENV.)
302*
303 IF( info.EQ.0 ) THEN
304 minwrk = 1
305 maxwrk = 1
306 IF( m.GE.n .AND. minmn.GT.0 ) THEN
307*
308* Compute space needed for DBDSQR
309*
310 mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
311 bdspac = 5*n
312* Compute space needed for DGEQRF
313 CALL dgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
314 lwork_dgeqrf = int( dum(1) )
315* Compute space needed for DORGQR
316 CALL dorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
317 lwork_dorgqr_n = int( dum(1) )
318 CALL dorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
319 lwork_dorgqr_m = int( dum(1) )
320* Compute space needed for DGEBRD
321 CALL dgebrd( n, n, a, lda, s, dum(1), dum(1),
322 $ dum(1), dum(1), -1, ierr )
323 lwork_dgebrd = int( dum(1) )
324* Compute space needed for DORGBR P
325 CALL dorgbr( 'P', n, n, n, a, lda, dum(1),
326 $ dum(1), -1, ierr )
327 lwork_dorgbr_p = int( dum(1) )
328* Compute space needed for DORGBR Q
329 CALL dorgbr( 'Q', n, n, n, a, lda, dum(1),
330 $ dum(1), -1, ierr )
331 lwork_dorgbr_q = int( dum(1) )
332*
333 IF( m.GE.mnthr ) THEN
334 IF( wntun ) THEN
335*
336* Path 1 (M much larger than N, JOBU='N')
337*
338 maxwrk = n + lwork_dgeqrf
339 maxwrk = max( maxwrk, 3*n + lwork_dgebrd )
340 IF( wntvo .OR. wntvas )
341 $ maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
342 maxwrk = max( maxwrk, bdspac )
343 minwrk = max( 4*n, bdspac )
344 ELSE IF( wntuo .AND. wntvn ) THEN
345*
346* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
347*
348 wrkbl = n + lwork_dgeqrf
349 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
350 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
351 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
352 wrkbl = max( wrkbl, bdspac )
353 maxwrk = max( n*n + wrkbl, n*n + m*n + n )
354 minwrk = max( 3*n + m, bdspac )
355 ELSE IF( wntuo .AND. wntvas ) THEN
356*
357* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
358* 'A')
359*
360 wrkbl = n + lwork_dgeqrf
361 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
362 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
363 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
364 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
365 wrkbl = max( wrkbl, bdspac )
366 maxwrk = max( n*n + wrkbl, n*n + m*n + n )
367 minwrk = max( 3*n + m, bdspac )
368 ELSE IF( wntus .AND. wntvn ) THEN
369*
370* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
371*
372 wrkbl = n + lwork_dgeqrf
373 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
374 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
375 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
376 wrkbl = max( wrkbl, bdspac )
377 maxwrk = n*n + wrkbl
378 minwrk = max( 3*n + m, bdspac )
379 ELSE IF( wntus .AND. wntvo ) THEN
380*
381* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
382*
383 wrkbl = n + lwork_dgeqrf
384 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
385 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
386 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
387 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
388 wrkbl = max( wrkbl, bdspac )
389 maxwrk = 2*n*n + wrkbl
390 minwrk = max( 3*n + m, bdspac )
391 ELSE IF( wntus .AND. wntvas ) THEN
392*
393* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
394* 'A')
395*
396 wrkbl = n + lwork_dgeqrf
397 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
398 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
399 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
400 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
401 wrkbl = max( wrkbl, bdspac )
402 maxwrk = n*n + wrkbl
403 minwrk = max( 3*n + m, bdspac )
404 ELSE IF( wntua .AND. wntvn ) THEN
405*
406* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
407*
408 wrkbl = n + lwork_dgeqrf
409 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
410 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
411 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
412 wrkbl = max( wrkbl, bdspac )
413 maxwrk = n*n + wrkbl
414 minwrk = max( 3*n + m, bdspac )
415 ELSE IF( wntua .AND. wntvo ) THEN
416*
417* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418*
419 wrkbl = n + lwork_dgeqrf
420 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
421 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
422 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
423 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
424 wrkbl = max( wrkbl, bdspac )
425 maxwrk = 2*n*n + wrkbl
426 minwrk = max( 3*n + m, bdspac )
427 ELSE IF( wntua .AND. wntvas ) THEN
428*
429* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430* 'A')
431*
432 wrkbl = n + lwork_dgeqrf
433 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
434 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
435 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
436 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
437 wrkbl = max( wrkbl, bdspac )
438 maxwrk = n*n + wrkbl
439 minwrk = max( 3*n + m, bdspac )
440 END IF
441 ELSE
442*
443* Path 10 (M at least N, but not much larger)
444*
445 CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
446 $ dum(1), dum(1), -1, ierr )
447 lwork_dgebrd = int( dum(1) )
448 maxwrk = 3*n + lwork_dgebrd
449 IF( wntus .OR. wntuo ) THEN
450 CALL dorgbr( 'Q', m, n, n, a, lda, dum(1),
451 $ dum(1), -1, ierr )
452 lwork_dorgbr_q = int( dum(1) )
453 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
454 END IF
455 IF( wntua ) THEN
456 CALL dorgbr( 'Q', m, m, n, a, lda, dum(1),
457 $ dum(1), -1, ierr )
458 lwork_dorgbr_q = int( dum(1) )
459 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
460 END IF
461 IF( .NOT.wntvn ) THEN
462 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
463 END IF
464 maxwrk = max( maxwrk, bdspac )
465 minwrk = max( 3*n + m, bdspac )
466 END IF
467 ELSE IF( minmn.GT.0 ) THEN
468*
469* Compute space needed for DBDSQR
470*
471 mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
472 bdspac = 5*m
473* Compute space needed for DGELQF
474 CALL dgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
475 lwork_dgelqf = int( dum(1) )
476* Compute space needed for DORGLQ
477 CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
478 lwork_dorglq_n = int( dum(1) )
479 CALL dorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480 lwork_dorglq_m = int( dum(1) )
481* Compute space needed for DGEBRD
482 CALL dgebrd( m, m, a, lda, s, dum(1), dum(1),
483 $ dum(1), dum(1), -1, ierr )
484 lwork_dgebrd = int( dum(1) )
485* Compute space needed for DORGBR P
486 CALL dorgbr( 'P', m, m, m, a, n, dum(1),
487 $ dum(1), -1, ierr )
488 lwork_dorgbr_p = int( dum(1) )
489* Compute space needed for DORGBR Q
490 CALL dorgbr( 'Q', m, m, m, a, n, dum(1),
491 $ dum(1), -1, ierr )
492 lwork_dorgbr_q = int( dum(1) )
493 IF( n.GE.mnthr ) THEN
494 IF( wntvn ) THEN
495*
496* Path 1t(N much larger than M, JOBVT='N')
497*
498 maxwrk = m + lwork_dgelqf
499 maxwrk = max( maxwrk, 3*m + lwork_dgebrd )
500 IF( wntuo .OR. wntuas )
501 $ maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
502 maxwrk = max( maxwrk, bdspac )
503 minwrk = max( 4*m, bdspac )
504 ELSE IF( wntvo .AND. wntun ) THEN
505*
506* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507*
508 wrkbl = m + lwork_dgelqf
509 wrkbl = max( wrkbl, m + lwork_dorglq_m )
510 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
511 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
512 wrkbl = max( wrkbl, bdspac )
513 maxwrk = max( m*m + wrkbl, m*m + m*n + m )
514 minwrk = max( 3*m + n, bdspac )
515 ELSE IF( wntvo .AND. wntuas ) THEN
516*
517* Path 3t(N much larger than M, JOBU='S' or 'A',
518* JOBVT='O')
519*
520 wrkbl = m + lwork_dgelqf
521 wrkbl = max( wrkbl, m + lwork_dorglq_m )
522 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
523 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
524 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
525 wrkbl = max( wrkbl, bdspac )
526 maxwrk = max( m*m + wrkbl, m*m + m*n + m )
527 minwrk = max( 3*m + n, bdspac )
528 ELSE IF( wntvs .AND. wntun ) THEN
529*
530* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531*
532 wrkbl = m + lwork_dgelqf
533 wrkbl = max( wrkbl, m + lwork_dorglq_m )
534 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
535 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
536 wrkbl = max( wrkbl, bdspac )
537 maxwrk = m*m + wrkbl
538 minwrk = max( 3*m + n, bdspac )
539 ELSE IF( wntvs .AND. wntuo ) THEN
540*
541* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542*
543 wrkbl = m + lwork_dgelqf
544 wrkbl = max( wrkbl, m + lwork_dorglq_m )
545 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
546 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
547 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
548 wrkbl = max( wrkbl, bdspac )
549 maxwrk = 2*m*m + wrkbl
550 minwrk = max( 3*m + n, bdspac )
551 ELSE IF( wntvs .AND. wntuas ) THEN
552*
553* Path 6t(N much larger than M, JOBU='S' or 'A',
554* JOBVT='S')
555*
556 wrkbl = m + lwork_dgelqf
557 wrkbl = max( wrkbl, m + lwork_dorglq_m )
558 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
559 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
560 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
561 wrkbl = max( wrkbl, bdspac )
562 maxwrk = m*m + wrkbl
563 minwrk = max( 3*m + n, bdspac )
564 ELSE IF( wntva .AND. wntun ) THEN
565*
566* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
567*
568 wrkbl = m + lwork_dgelqf
569 wrkbl = max( wrkbl, m + lwork_dorglq_n )
570 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
571 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
572 wrkbl = max( wrkbl, bdspac )
573 maxwrk = m*m + wrkbl
574 minwrk = max( 3*m + n, bdspac )
575 ELSE IF( wntva .AND. wntuo ) THEN
576*
577* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
578*
579 wrkbl = m + lwork_dgelqf
580 wrkbl = max( wrkbl, m + lwork_dorglq_n )
581 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
582 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
583 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
584 wrkbl = max( wrkbl, bdspac )
585 maxwrk = 2*m*m + wrkbl
586 minwrk = max( 3*m + n, bdspac )
587 ELSE IF( wntva .AND. wntuas ) THEN
588*
589* Path 9t(N much larger than M, JOBU='S' or 'A',
590* JOBVT='A')
591*
592 wrkbl = m + lwork_dgelqf
593 wrkbl = max( wrkbl, m + lwork_dorglq_n )
594 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
595 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
596 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
597 wrkbl = max( wrkbl, bdspac )
598 maxwrk = m*m + wrkbl
599 minwrk = max( 3*m + n, bdspac )
600 END IF
601 ELSE
602*
603* Path 10t(N greater than M, but not much larger)
604*
605 CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
606 $ dum(1), dum(1), -1, ierr )
607 lwork_dgebrd = int( dum(1) )
608 maxwrk = 3*m + lwork_dgebrd
609 IF( wntvs .OR. wntvo ) THEN
610* Compute space needed for DORGBR P
611 CALL dorgbr( 'P', m, n, m, a, n, dum(1),
612 $ dum(1), -1, ierr )
613 lwork_dorgbr_p = int( dum(1) )
614 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
615 END IF
616 IF( wntva ) THEN
617 CALL dorgbr( 'P', n, n, m, a, n, dum(1),
618 $ dum(1), -1, ierr )
619 lwork_dorgbr_p = int( dum(1) )
620 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
621 END IF
622 IF( .NOT.wntun ) THEN
623 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
624 END IF
625 maxwrk = max( maxwrk, bdspac )
626 minwrk = max( 3*m + n, bdspac )
627 END IF
628 END IF
629 maxwrk = max( maxwrk, minwrk )
630 work( 1 ) = maxwrk
631*
632 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
633 info = -13
634 END IF
635 END IF
636*
637 IF( info.NE.0 ) THEN
638 CALL xerbla( 'DGESVD', -info )
639 RETURN
640 ELSE IF( lquery ) THEN
641 RETURN
642 END IF
643*
644* Quick return if possible
645*
646 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
647 RETURN
648 END IF
649*
650* Get machine constants
651*
652 eps = dlamch( 'P' )
653 smlnum = sqrt( dlamch( 'S' ) ) / eps
654 bignum = one / smlnum
655*
656* Scale A if max element outside range [SMLNUM,BIGNUM]
657*
658 anrm = dlange( 'M', m, n, a, lda, dum )
659 iscl = 0
660 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
661 iscl = 1
662 CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
663 ELSE IF( anrm.GT.bignum ) THEN
664 iscl = 1
665 CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
666 END IF
667*
668 IF( m.GE.n ) THEN
669*
670* A has at least as many rows as columns. If A has sufficiently
671* more rows than columns, first reduce using the QR
672* decomposition (if sufficient workspace available)
673*
674 IF( m.GE.mnthr ) THEN
675*
676 IF( wntun ) THEN
677*
678* Path 1 (M much larger than N, JOBU='N')
679* No left singular vectors to be computed
680*
681 itau = 1
682 iwork = itau + n
683*
684* Compute A=Q*R
685* (Workspace: need 2*N, prefer N + N*NB)
686*
687 CALL dgeqrf( m, n, a, lda, work( itau ), work( iwork ),
688 $ lwork-iwork+1, ierr )
689*
690* Zero out below R
691*
692 IF( n .GT. 1 ) THEN
693 CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
694 $ lda )
695 END IF
696 ie = 1
697 itauq = ie + n
698 itaup = itauq + n
699 iwork = itaup + n
700*
701* Bidiagonalize R in A
702* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
703*
704 CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
705 $ work( itaup ), work( iwork ), lwork-iwork+1,
706 $ ierr )
707 ncvt = 0
708 IF( wntvo .OR. wntvas ) THEN
709*
710* If right singular vectors desired, generate P'.
711* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
712*
713 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
714 $ work( iwork ), lwork-iwork+1, ierr )
715 ncvt = n
716 END IF
717 iwork = ie + n
718*
719* Perform bidiagonal QR iteration, computing right
720* singular vectors of A in A if desired
721* (Workspace: need BDSPAC)
722*
723 CALL dbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
724 $ dum, 1, dum, 1, work( iwork ), info )
725*
726* If right singular vectors desired in VT, copy them there
727*
728 IF( wntvas )
729 $ CALL dlacpy( 'F', n, n, a, lda, vt, ldvt )
730*
731 ELSE IF( wntuo .AND. wntvn ) THEN
732*
733* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
734* N left singular vectors to be overwritten on A and
735* no right singular vectors to be computed
736*
737 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
738*
739* Sufficient workspace for a fast algorithm
740*
741 ir = 1
742 IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
743*
744* WORK(IU) is LDA by N, WORK(IR) is LDA by N
745*
746 ldwrku = lda
747 ldwrkr = lda
748 ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
749*
750* WORK(IU) is LDA by N, WORK(IR) is N by N
751*
752 ldwrku = lda
753 ldwrkr = n
754 ELSE
755*
756* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
757*
758 ldwrku = ( lwork-n*n-n ) / n
759 ldwrkr = n
760 END IF
761 itau = ir + ldwrkr*n
762 iwork = itau + n
763*
764* Compute A=Q*R
765* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
766*
767 CALL dgeqrf( m, n, a, lda, work( itau ),
768 $ work( iwork ), lwork-iwork+1, ierr )
769*
770* Copy R to WORK(IR) and zero out below it
771*
772 CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
773 CALL dlaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
774 $ ldwrkr )
775*
776* Generate Q in A
777* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
778*
779 CALL dorgqr( m, n, n, a, lda, work( itau ),
780 $ work( iwork ), lwork-iwork+1, ierr )
781 ie = itau
782 itauq = ie + n
783 itaup = itauq + n
784 iwork = itaup + n
785*
786* Bidiagonalize R in WORK(IR)
787* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
788*
789 CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
790 $ work( itauq ), work( itaup ),
791 $ work( iwork ), lwork-iwork+1, ierr )
792*
793* Generate left vectors bidiagonalizing R
794* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
795*
796 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
797 $ work( itauq ), work( iwork ),
798 $ lwork-iwork+1, ierr )
799 iwork = ie + n
800*
801* Perform bidiagonal QR iteration, computing left
802* singular vectors of R in WORK(IR)
803* (Workspace: need N*N + BDSPAC)
804*
805 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
806 $ work( ir ), ldwrkr, dum, 1,
807 $ work( iwork ), info )
808 iu = ie + n
809*
810* Multiply Q in A by left singular vectors of R in
811* WORK(IR), storing result in WORK(IU) and copying to A
812* (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
813*
814 DO 10 i = 1, m, ldwrku
815 chunk = min( m-i+1, ldwrku )
816 CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
817 $ lda, work( ir ), ldwrkr, zero,
818 $ work( iu ), ldwrku )
819 CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
820 $ a( i, 1 ), lda )
821 10 CONTINUE
822*
823 ELSE
824*
825* Insufficient workspace for a fast algorithm
826*
827 ie = 1
828 itauq = ie + n
829 itaup = itauq + n
830 iwork = itaup + n
831*
832* Bidiagonalize A
833* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
834*
835 CALL dgebrd( m, n, a, lda, s, work( ie ),
836 $ work( itauq ), work( itaup ),
837 $ work( iwork ), lwork-iwork+1, ierr )
838*
839* Generate left vectors bidiagonalizing A
840* (Workspace: need 4*N, prefer 3*N + N*NB)
841*
842 CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
843 $ work( iwork ), lwork-iwork+1, ierr )
844 iwork = ie + n
845*
846* Perform bidiagonal QR iteration, computing left
847* singular vectors of A in A
848* (Workspace: need BDSPAC)
849*
850 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
851 $ a, lda, dum, 1, work( iwork ), info )
852*
853 END IF
854*
855 ELSE IF( wntuo .AND. wntvas ) THEN
856*
857* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858* N left singular vectors to be overwritten on A and
859* N right singular vectors to be computed in VT
860*
861 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
862*
863* Sufficient workspace for a fast algorithm
864*
865 ir = 1
866 IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
867*
868* WORK(IU) is LDA by N and WORK(IR) is LDA by N
869*
870 ldwrku = lda
871 ldwrkr = lda
872 ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
873*
874* WORK(IU) is LDA by N and WORK(IR) is N by N
875*
876 ldwrku = lda
877 ldwrkr = n
878 ELSE
879*
880* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881*
882 ldwrku = ( lwork-n*n-n ) / n
883 ldwrkr = n
884 END IF
885 itau = ir + ldwrkr*n
886 iwork = itau + n
887*
888* Compute A=Q*R
889* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
890*
891 CALL dgeqrf( m, n, a, lda, work( itau ),
892 $ work( iwork ), lwork-iwork+1, ierr )
893*
894* Copy R to VT, zeroing out below it
895*
896 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
897 IF( n.GT.1 )
898 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
899 $ vt( 2, 1 ), ldvt )
900*
901* Generate Q in A
902* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
903*
904 CALL dorgqr( m, n, n, a, lda, work( itau ),
905 $ work( iwork ), lwork-iwork+1, ierr )
906 ie = itau
907 itauq = ie + n
908 itaup = itauq + n
909 iwork = itaup + n
910*
911* Bidiagonalize R in VT, copying result to WORK(IR)
912* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
913*
914 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
915 $ work( itauq ), work( itaup ),
916 $ work( iwork ), lwork-iwork+1, ierr )
917 CALL dlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
918*
919* Generate left vectors bidiagonalizing R in WORK(IR)
920* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
921*
922 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
923 $ work( itauq ), work( iwork ),
924 $ lwork-iwork+1, ierr )
925*
926* Generate right vectors bidiagonalizing R in VT
927* (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB)
928*
929 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
930 $ work( iwork ), lwork-iwork+1, ierr )
931 iwork = ie + n
932*
933* Perform bidiagonal QR iteration, computing left
934* singular vectors of R in WORK(IR) and computing right
935* singular vectors of R in VT
936* (Workspace: need N*N + BDSPAC)
937*
938 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
939 $ work( ir ), ldwrkr, dum, 1,
940 $ work( iwork ), info )
941 iu = ie + n
942*
943* Multiply Q in A by left singular vectors of R in
944* WORK(IR), storing result in WORK(IU) and copying to A
945* (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
946*
947 DO 20 i = 1, m, ldwrku
948 chunk = min( m-i+1, ldwrku )
949 CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
950 $ lda, work( ir ), ldwrkr, zero,
951 $ work( iu ), ldwrku )
952 CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
953 $ a( i, 1 ), lda )
954 20 CONTINUE
955*
956 ELSE
957*
958* Insufficient workspace for a fast algorithm
959*
960 itau = 1
961 iwork = itau + n
962*
963* Compute A=Q*R
964* (Workspace: need 2*N, prefer N + N*NB)
965*
966 CALL dgeqrf( m, n, a, lda, work( itau ),
967 $ work( iwork ), lwork-iwork+1, ierr )
968*
969* Copy R to VT, zeroing out below it
970*
971 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
972 IF( n.GT.1 )
973 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
974 $ vt( 2, 1 ), ldvt )
975*
976* Generate Q in A
977* (Workspace: need 2*N, prefer N + N*NB)
978*
979 CALL dorgqr( m, n, n, a, lda, work( itau ),
980 $ work( iwork ), lwork-iwork+1, ierr )
981 ie = itau
982 itauq = ie + n
983 itaup = itauq + n
984 iwork = itaup + n
985*
986* Bidiagonalize R in VT
987* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
988*
989 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
990 $ work( itauq ), work( itaup ),
991 $ work( iwork ), lwork-iwork+1, ierr )
992*
993* Multiply Q in A by left vectors bidiagonalizing R
994* (Workspace: need 3*N + M, prefer 3*N + M*NB)
995*
996 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
997 $ work( itauq ), a, lda, work( iwork ),
998 $ lwork-iwork+1, ierr )
999*
1000* Generate right vectors bidiagonalizing R in VT
1001* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1002*
1003 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1004 $ work( iwork ), lwork-iwork+1, ierr )
1005 iwork = ie + n
1006*
1007* Perform bidiagonal QR iteration, computing left
1008* singular vectors of A in A and computing right
1009* singular vectors of A in VT
1010* (Workspace: need BDSPAC)
1011*
1012 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1013 $ a, lda, dum, 1, work( iwork ), info )
1014*
1015 END IF
1016*
1017 ELSE IF( wntus ) THEN
1018*
1019 IF( wntvn ) THEN
1020*
1021* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1022* N left singular vectors to be computed in U and
1023* no right singular vectors to be computed
1024*
1025 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1026*
1027* Sufficient workspace for a fast algorithm
1028*
1029 ir = 1
1030 IF( lwork.GE.wrkbl+lda*n ) THEN
1031*
1032* WORK(IR) is LDA by N
1033*
1034 ldwrkr = lda
1035 ELSE
1036*
1037* WORK(IR) is N by N
1038*
1039 ldwrkr = n
1040 END IF
1041 itau = ir + ldwrkr*n
1042 iwork = itau + n
1043*
1044* Compute A=Q*R
1045* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1046*
1047 CALL dgeqrf( m, n, a, lda, work( itau ),
1048 $ work( iwork ), lwork-iwork+1, ierr )
1049*
1050* Copy R to WORK(IR), zeroing out below it
1051*
1052 CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1053 $ ldwrkr )
1054 CALL dlaset( 'L', n-1, n-1, zero, zero,
1055 $ work( ir+1 ), ldwrkr )
1056*
1057* Generate Q in A
1058* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1059*
1060 CALL dorgqr( m, n, n, a, lda, work( itau ),
1061 $ work( iwork ), lwork-iwork+1, ierr )
1062 ie = itau
1063 itauq = ie + n
1064 itaup = itauq + n
1065 iwork = itaup + n
1066*
1067* Bidiagonalize R in WORK(IR)
1068* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1069*
1070 CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1071 $ work( ie ), work( itauq ),
1072 $ work( itaup ), work( iwork ),
1073 $ lwork-iwork+1, ierr )
1074*
1075* Generate left vectors bidiagonalizing R in WORK(IR)
1076* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1077*
1078 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1079 $ work( itauq ), work( iwork ),
1080 $ lwork-iwork+1, ierr )
1081 iwork = ie + n
1082*
1083* Perform bidiagonal QR iteration, computing left
1084* singular vectors of R in WORK(IR)
1085* (Workspace: need N*N + BDSPAC)
1086*
1087 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1088 $ 1, work( ir ), ldwrkr, dum, 1,
1089 $ work( iwork ), info )
1090*
1091* Multiply Q in A by left singular vectors of R in
1092* WORK(IR), storing result in U
1093* (Workspace: need N*N)
1094*
1095 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1096 $ work( ir ), ldwrkr, zero, u, ldu )
1097*
1098 ELSE
1099*
1100* Insufficient workspace for a fast algorithm
1101*
1102 itau = 1
1103 iwork = itau + n
1104*
1105* Compute A=Q*R, copying result to U
1106* (Workspace: need 2*N, prefer N + N*NB)
1107*
1108 CALL dgeqrf( m, n, a, lda, work( itau ),
1109 $ work( iwork ), lwork-iwork+1, ierr )
1110 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1111*
1112* Generate Q in U
1113* (Workspace: need 2*N, prefer N + N*NB)
1114*
1115 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1116 $ work( iwork ), lwork-iwork+1, ierr )
1117 ie = itau
1118 itauq = ie + n
1119 itaup = itauq + n
1120 iwork = itaup + n
1121*
1122* Zero out below R in A
1123*
1124 IF( n .GT. 1 ) THEN
1125 CALL dlaset( 'L', n-1, n-1, zero, zero,
1126 $ a( 2, 1 ), lda )
1127 END IF
1128*
1129* Bidiagonalize R in A
1130* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1131*
1132 CALL dgebrd( n, n, a, lda, s, work( ie ),
1133 $ work( itauq ), work( itaup ),
1134 $ work( iwork ), lwork-iwork+1, ierr )
1135*
1136* Multiply Q in U by left vectors bidiagonalizing R
1137* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1138*
1139 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1140 $ work( itauq ), u, ldu, work( iwork ),
1141 $ lwork-iwork+1, ierr )
1142 iwork = ie + n
1143*
1144* Perform bidiagonal QR iteration, computing left
1145* singular vectors of A in U
1146* (Workspace: need BDSPAC)
1147*
1148 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1149 $ 1, u, ldu, dum, 1, work( iwork ),
1150 $ info )
1151*
1152 END IF
1153*
1154 ELSE IF( wntvo ) THEN
1155*
1156* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1157* N left singular vectors to be computed in U and
1158* N right singular vectors to be overwritten on A
1159*
1160 IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1161*
1162* Sufficient workspace for a fast algorithm
1163*
1164 iu = 1
1165 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1166*
1167* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1168*
1169 ldwrku = lda
1170 ir = iu + ldwrku*n
1171 ldwrkr = lda
1172 ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1173*
1174* WORK(IU) is LDA by N and WORK(IR) is N by N
1175*
1176 ldwrku = lda
1177 ir = iu + ldwrku*n
1178 ldwrkr = n
1179 ELSE
1180*
1181* WORK(IU) is N by N and WORK(IR) is N by N
1182*
1183 ldwrku = n
1184 ir = iu + ldwrku*n
1185 ldwrkr = n
1186 END IF
1187 itau = ir + ldwrkr*n
1188 iwork = itau + n
1189*
1190* Compute A=Q*R
1191* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1192*
1193 CALL dgeqrf( m, n, a, lda, work( itau ),
1194 $ work( iwork ), lwork-iwork+1, ierr )
1195*
1196* Copy R to WORK(IU), zeroing out below it
1197*
1198 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1199 $ ldwrku )
1200 CALL dlaset( 'L', n-1, n-1, zero, zero,
1201 $ work( iu+1 ), ldwrku )
1202*
1203* Generate Q in A
1204* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1205*
1206 CALL dorgqr( m, n, n, a, lda, work( itau ),
1207 $ work( iwork ), lwork-iwork+1, ierr )
1208 ie = itau
1209 itauq = ie + n
1210 itaup = itauq + n
1211 iwork = itaup + n
1212*
1213* Bidiagonalize R in WORK(IU), copying result to
1214* WORK(IR)
1215* (Workspace: need 2*N*N + 4*N,
1216* prefer 2*N*N+3*N+2*N*NB)
1217*
1218 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1219 $ work( ie ), work( itauq ),
1220 $ work( itaup ), work( iwork ),
1221 $ lwork-iwork+1, ierr )
1222 CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1223 $ work( ir ), ldwrkr )
1224*
1225* Generate left bidiagonalizing vectors in WORK(IU)
1226* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1227*
1228 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1229 $ work( itauq ), work( iwork ),
1230 $ lwork-iwork+1, ierr )
1231*
1232* Generate right bidiagonalizing vectors in WORK(IR)
1233* (Workspace: need 2*N*N + 4*N-1,
1234* prefer 2*N*N+3*N+(N-1)*NB)
1235*
1236 CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1237 $ work( itaup ), work( iwork ),
1238 $ lwork-iwork+1, ierr )
1239 iwork = ie + n
1240*
1241* Perform bidiagonal QR iteration, computing left
1242* singular vectors of R in WORK(IU) and computing
1243* right singular vectors of R in WORK(IR)
1244* (Workspace: need 2*N*N + BDSPAC)
1245*
1246 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1247 $ work( ir ), ldwrkr, work( iu ),
1248 $ ldwrku, dum, 1, work( iwork ), info )
1249*
1250* Multiply Q in A by left singular vectors of R in
1251* WORK(IU), storing result in U
1252* (Workspace: need N*N)
1253*
1254 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1255 $ work( iu ), ldwrku, zero, u, ldu )
1256*
1257* Copy right singular vectors of R to A
1258* (Workspace: need N*N)
1259*
1260 CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1261 $ lda )
1262*
1263 ELSE
1264*
1265* Insufficient workspace for a fast algorithm
1266*
1267 itau = 1
1268 iwork = itau + n
1269*
1270* Compute A=Q*R, copying result to U
1271* (Workspace: need 2*N, prefer N + N*NB)
1272*
1273 CALL dgeqrf( m, n, a, lda, work( itau ),
1274 $ work( iwork ), lwork-iwork+1, ierr )
1275 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1276*
1277* Generate Q in U
1278* (Workspace: need 2*N, prefer N + N*NB)
1279*
1280 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1281 $ work( iwork ), lwork-iwork+1, ierr )
1282 ie = itau
1283 itauq = ie + n
1284 itaup = itauq + n
1285 iwork = itaup + n
1286*
1287* Zero out below R in A
1288*
1289 IF( n .GT. 1 ) THEN
1290 CALL dlaset( 'L', n-1, n-1, zero, zero,
1291 $ a( 2, 1 ), lda )
1292 END IF
1293*
1294* Bidiagonalize R in A
1295* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1296*
1297 CALL dgebrd( n, n, a, lda, s, work( ie ),
1298 $ work( itauq ), work( itaup ),
1299 $ work( iwork ), lwork-iwork+1, ierr )
1300*
1301* Multiply Q in U by left vectors bidiagonalizing R
1302* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1303*
1304 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1305 $ work( itauq ), u, ldu, work( iwork ),
1306 $ lwork-iwork+1, ierr )
1307*
1308* Generate right vectors bidiagonalizing R in A
1309* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1310*
1311 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1312 $ work( iwork ), lwork-iwork+1, ierr )
1313 iwork = ie + n
1314*
1315* Perform bidiagonal QR iteration, computing left
1316* singular vectors of A in U and computing right
1317* singular vectors of A in A
1318* (Workspace: need BDSPAC)
1319*
1320 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1321 $ lda, u, ldu, dum, 1, work( iwork ),
1322 $ info )
1323*
1324 END IF
1325*
1326 ELSE IF( wntvas ) THEN
1327*
1328* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1329* or 'A')
1330* N left singular vectors to be computed in U and
1331* N right singular vectors to be computed in VT
1332*
1333 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1334*
1335* Sufficient workspace for a fast algorithm
1336*
1337 iu = 1
1338 IF( lwork.GE.wrkbl+lda*n ) THEN
1339*
1340* WORK(IU) is LDA by N
1341*
1342 ldwrku = lda
1343 ELSE
1344*
1345* WORK(IU) is N by N
1346*
1347 ldwrku = n
1348 END IF
1349 itau = iu + ldwrku*n
1350 iwork = itau + n
1351*
1352* Compute A=Q*R
1353* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1354*
1355 CALL dgeqrf( m, n, a, lda, work( itau ),
1356 $ work( iwork ), lwork-iwork+1, ierr )
1357*
1358* Copy R to WORK(IU), zeroing out below it
1359*
1360 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1361 $ ldwrku )
1362 CALL dlaset( 'L', n-1, n-1, zero, zero,
1363 $ work( iu+1 ), ldwrku )
1364*
1365* Generate Q in A
1366* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1367*
1368 CALL dorgqr( m, n, n, a, lda, work( itau ),
1369 $ work( iwork ), lwork-iwork+1, ierr )
1370 ie = itau
1371 itauq = ie + n
1372 itaup = itauq + n
1373 iwork = itaup + n
1374*
1375* Bidiagonalize R in WORK(IU), copying result to VT
1376* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1377*
1378 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1379 $ work( ie ), work( itauq ),
1380 $ work( itaup ), work( iwork ),
1381 $ lwork-iwork+1, ierr )
1382 CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1383 $ ldvt )
1384*
1385* Generate left bidiagonalizing vectors in WORK(IU)
1386* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1387*
1388 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1389 $ work( itauq ), work( iwork ),
1390 $ lwork-iwork+1, ierr )
1391*
1392* Generate right bidiagonalizing vectors in VT
1393* (Workspace: need N*N + 4*N-1,
1394* prefer N*N+3*N+(N-1)*NB)
1395*
1396 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1397 $ work( iwork ), lwork-iwork+1, ierr )
1398 iwork = ie + n
1399*
1400* Perform bidiagonal QR iteration, computing left
1401* singular vectors of R in WORK(IU) and computing
1402* right singular vectors of R in VT
1403* (Workspace: need N*N + BDSPAC)
1404*
1405 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1406 $ ldvt, work( iu ), ldwrku, dum, 1,
1407 $ work( iwork ), info )
1408*
1409* Multiply Q in A by left singular vectors of R in
1410* WORK(IU), storing result in U
1411* (Workspace: need N*N)
1412*
1413 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1414 $ work( iu ), ldwrku, zero, u, ldu )
1415*
1416 ELSE
1417*
1418* Insufficient workspace for a fast algorithm
1419*
1420 itau = 1
1421 iwork = itau + n
1422*
1423* Compute A=Q*R, copying result to U
1424* (Workspace: need 2*N, prefer N + N*NB)
1425*
1426 CALL dgeqrf( m, n, a, lda, work( itau ),
1427 $ work( iwork ), lwork-iwork+1, ierr )
1428 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1429*
1430* Generate Q in U
1431* (Workspace: need 2*N, prefer N + N*NB)
1432*
1433 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1434 $ work( iwork ), lwork-iwork+1, ierr )
1435*
1436* Copy R to VT, zeroing out below it
1437*
1438 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1439 IF( n.GT.1 )
1440 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1441 $ vt( 2, 1 ), ldvt )
1442 ie = itau
1443 itauq = ie + n
1444 itaup = itauq + n
1445 iwork = itaup + n
1446*
1447* Bidiagonalize R in VT
1448* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1449*
1450 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1451 $ work( itauq ), work( itaup ),
1452 $ work( iwork ), lwork-iwork+1, ierr )
1453*
1454* Multiply Q in U by left bidiagonalizing vectors
1455* in VT
1456* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1457*
1458 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1459 $ work( itauq ), u, ldu, work( iwork ),
1460 $ lwork-iwork+1, ierr )
1461*
1462* Generate right bidiagonalizing vectors in VT
1463* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1464*
1465 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1466 $ work( iwork ), lwork-iwork+1, ierr )
1467 iwork = ie + n
1468*
1469* Perform bidiagonal QR iteration, computing left
1470* singular vectors of A in U and computing right
1471* singular vectors of A in VT
1472* (Workspace: need BDSPAC)
1473*
1474 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1475 $ ldvt, u, ldu, dum, 1, work( iwork ),
1476 $ info )
1477*
1478 END IF
1479*
1480 END IF
1481*
1482 ELSE IF( wntua ) THEN
1483*
1484 IF( wntvn ) THEN
1485*
1486* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1487* M left singular vectors to be computed in U and
1488* no right singular vectors to be computed
1489*
1490 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1491*
1492* Sufficient workspace for a fast algorithm
1493*
1494 ir = 1
1495 IF( lwork.GE.wrkbl+lda*n ) THEN
1496*
1497* WORK(IR) is LDA by N
1498*
1499 ldwrkr = lda
1500 ELSE
1501*
1502* WORK(IR) is N by N
1503*
1504 ldwrkr = n
1505 END IF
1506 itau = ir + ldwrkr*n
1507 iwork = itau + n
1508*
1509* Compute A=Q*R, copying result to U
1510* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1511*
1512 CALL dgeqrf( m, n, a, lda, work( itau ),
1513 $ work( iwork ), lwork-iwork+1, ierr )
1514 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1515*
1516* Copy R to WORK(IR), zeroing out below it
1517*
1518 CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1519 $ ldwrkr )
1520 CALL dlaset( 'L', n-1, n-1, zero, zero,
1521 $ work( ir+1 ), ldwrkr )
1522*
1523* Generate Q in U
1524* (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1525*
1526 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1527 $ work( iwork ), lwork-iwork+1, ierr )
1528 ie = itau
1529 itauq = ie + n
1530 itaup = itauq + n
1531 iwork = itaup + n
1532*
1533* Bidiagonalize R in WORK(IR)
1534* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1535*
1536 CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1537 $ work( ie ), work( itauq ),
1538 $ work( itaup ), work( iwork ),
1539 $ lwork-iwork+1, ierr )
1540*
1541* Generate left bidiagonalizing vectors in WORK(IR)
1542* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1543*
1544 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1545 $ work( itauq ), work( iwork ),
1546 $ lwork-iwork+1, ierr )
1547 iwork = ie + n
1548*
1549* Perform bidiagonal QR iteration, computing left
1550* singular vectors of R in WORK(IR)
1551* (Workspace: need N*N + BDSPAC)
1552*
1553 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1554 $ 1, work( ir ), ldwrkr, dum, 1,
1555 $ work( iwork ), info )
1556*
1557* Multiply Q in U by left singular vectors of R in
1558* WORK(IR), storing result in A
1559* (Workspace: need N*N)
1560*
1561 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1562 $ work( ir ), ldwrkr, zero, a, lda )
1563*
1564* Copy left singular vectors of A from A to U
1565*
1566 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1567*
1568 ELSE
1569*
1570* Insufficient workspace for a fast algorithm
1571*
1572 itau = 1
1573 iwork = itau + n
1574*
1575* Compute A=Q*R, copying result to U
1576* (Workspace: need 2*N, prefer N + N*NB)
1577*
1578 CALL dgeqrf( m, n, a, lda, work( itau ),
1579 $ work( iwork ), lwork-iwork+1, ierr )
1580 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1581*
1582* Generate Q in U
1583* (Workspace: need N + M, prefer N + M*NB)
1584*
1585 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1586 $ work( iwork ), lwork-iwork+1, ierr )
1587 ie = itau
1588 itauq = ie + n
1589 itaup = itauq + n
1590 iwork = itaup + n
1591*
1592* Zero out below R in A
1593*
1594 IF( n .GT. 1 ) THEN
1595 CALL dlaset( 'L', n-1, n-1, zero, zero,
1596 $ a( 2, 1 ), lda )
1597 END IF
1598*
1599* Bidiagonalize R in A
1600* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1601*
1602 CALL dgebrd( n, n, a, lda, s, work( ie ),
1603 $ work( itauq ), work( itaup ),
1604 $ work( iwork ), lwork-iwork+1, ierr )
1605*
1606* Multiply Q in U by left bidiagonalizing vectors
1607* in A
1608* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1609*
1610 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1611 $ work( itauq ), u, ldu, work( iwork ),
1612 $ lwork-iwork+1, ierr )
1613 iwork = ie + n
1614*
1615* Perform bidiagonal QR iteration, computing left
1616* singular vectors of A in U
1617* (Workspace: need BDSPAC)
1618*
1619 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1620 $ 1, u, ldu, dum, 1, work( iwork ),
1621 $ info )
1622*
1623 END IF
1624*
1625 ELSE IF( wntvo ) THEN
1626*
1627* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1628* M left singular vectors to be computed in U and
1629* N right singular vectors to be overwritten on A
1630*
1631 IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1632*
1633* Sufficient workspace for a fast algorithm
1634*
1635 iu = 1
1636 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1637*
1638* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1639*
1640 ldwrku = lda
1641 ir = iu + ldwrku*n
1642 ldwrkr = lda
1643 ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1644*
1645* WORK(IU) is LDA by N and WORK(IR) is N by N
1646*
1647 ldwrku = lda
1648 ir = iu + ldwrku*n
1649 ldwrkr = n
1650 ELSE
1651*
1652* WORK(IU) is N by N and WORK(IR) is N by N
1653*
1654 ldwrku = n
1655 ir = iu + ldwrku*n
1656 ldwrkr = n
1657 END IF
1658 itau = ir + ldwrkr*n
1659 iwork = itau + n
1660*
1661* Compute A=Q*R, copying result to U
1662* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1663*
1664 CALL dgeqrf( m, n, a, lda, work( itau ),
1665 $ work( iwork ), lwork-iwork+1, ierr )
1666 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1667*
1668* Generate Q in U
1669* (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB)
1670*
1671 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1672 $ work( iwork ), lwork-iwork+1, ierr )
1673*
1674* Copy R to WORK(IU), zeroing out below it
1675*
1676 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1677 $ ldwrku )
1678 CALL dlaset( 'L', n-1, n-1, zero, zero,
1679 $ work( iu+1 ), ldwrku )
1680 ie = itau
1681 itauq = ie + n
1682 itaup = itauq + n
1683 iwork = itaup + n
1684*
1685* Bidiagonalize R in WORK(IU), copying result to
1686* WORK(IR)
1687* (Workspace: need 2*N*N + 4*N,
1688* prefer 2*N*N+3*N+2*N*NB)
1689*
1690 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1691 $ work( ie ), work( itauq ),
1692 $ work( itaup ), work( iwork ),
1693 $ lwork-iwork+1, ierr )
1694 CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1695 $ work( ir ), ldwrkr )
1696*
1697* Generate left bidiagonalizing vectors in WORK(IU)
1698* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1699*
1700 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1701 $ work( itauq ), work( iwork ),
1702 $ lwork-iwork+1, ierr )
1703*
1704* Generate right bidiagonalizing vectors in WORK(IR)
1705* (Workspace: need 2*N*N + 4*N-1,
1706* prefer 2*N*N+3*N+(N-1)*NB)
1707*
1708 CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1709 $ work( itaup ), work( iwork ),
1710 $ lwork-iwork+1, ierr )
1711 iwork = ie + n
1712*
1713* Perform bidiagonal QR iteration, computing left
1714* singular vectors of R in WORK(IU) and computing
1715* right singular vectors of R in WORK(IR)
1716* (Workspace: need 2*N*N + BDSPAC)
1717*
1718 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1719 $ work( ir ), ldwrkr, work( iu ),
1720 $ ldwrku, dum, 1, work( iwork ), info )
1721*
1722* Multiply Q in U by left singular vectors of R in
1723* WORK(IU), storing result in A
1724* (Workspace: need N*N)
1725*
1726 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1727 $ work( iu ), ldwrku, zero, a, lda )
1728*
1729* Copy left singular vectors of A from A to U
1730*
1731 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1732*
1733* Copy right singular vectors of R from WORK(IR) to A
1734*
1735 CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1736 $ lda )
1737*
1738 ELSE
1739*
1740* Insufficient workspace for a fast algorithm
1741*
1742 itau = 1
1743 iwork = itau + n
1744*
1745* Compute A=Q*R, copying result to U
1746* (Workspace: need 2*N, prefer N + N*NB)
1747*
1748 CALL dgeqrf( m, n, a, lda, work( itau ),
1749 $ work( iwork ), lwork-iwork+1, ierr )
1750 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1751*
1752* Generate Q in U
1753* (Workspace: need N + M, prefer N + M*NB)
1754*
1755 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1756 $ work( iwork ), lwork-iwork+1, ierr )
1757 ie = itau
1758 itauq = ie + n
1759 itaup = itauq + n
1760 iwork = itaup + n
1761*
1762* Zero out below R in A
1763*
1764 IF( n .GT. 1 ) THEN
1765 CALL dlaset( 'L', n-1, n-1, zero, zero,
1766 $ a( 2, 1 ), lda )
1767 END IF
1768*
1769* Bidiagonalize R in A
1770* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1771*
1772 CALL dgebrd( n, n, a, lda, s, work( ie ),
1773 $ work( itauq ), work( itaup ),
1774 $ work( iwork ), lwork-iwork+1, ierr )
1775*
1776* Multiply Q in U by left bidiagonalizing vectors
1777* in A
1778* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1779*
1780 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1781 $ work( itauq ), u, ldu, work( iwork ),
1782 $ lwork-iwork+1, ierr )
1783*
1784* Generate right bidiagonalizing vectors in A
1785* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1786*
1787 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1788 $ work( iwork ), lwork-iwork+1, ierr )
1789 iwork = ie + n
1790*
1791* Perform bidiagonal QR iteration, computing left
1792* singular vectors of A in U and computing right
1793* singular vectors of A in A
1794* (Workspace: need BDSPAC)
1795*
1796 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1797 $ lda, u, ldu, dum, 1, work( iwork ),
1798 $ info )
1799*
1800 END IF
1801*
1802 ELSE IF( wntvas ) THEN
1803*
1804* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1805* or 'A')
1806* M left singular vectors to be computed in U and
1807* N right singular vectors to be computed in VT
1808*
1809 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1810*
1811* Sufficient workspace for a fast algorithm
1812*
1813 iu = 1
1814 IF( lwork.GE.wrkbl+lda*n ) THEN
1815*
1816* WORK(IU) is LDA by N
1817*
1818 ldwrku = lda
1819 ELSE
1820*
1821* WORK(IU) is N by N
1822*
1823 ldwrku = n
1824 END IF
1825 itau = iu + ldwrku*n
1826 iwork = itau + n
1827*
1828* Compute A=Q*R, copying result to U
1829* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1830*
1831 CALL dgeqrf( m, n, a, lda, work( itau ),
1832 $ work( iwork ), lwork-iwork+1, ierr )
1833 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1834*
1835* Generate Q in U
1836* (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1837*
1838 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1839 $ work( iwork ), lwork-iwork+1, ierr )
1840*
1841* Copy R to WORK(IU), zeroing out below it
1842*
1843 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1844 $ ldwrku )
1845 CALL dlaset( 'L', n-1, n-1, zero, zero,
1846 $ work( iu+1 ), ldwrku )
1847 ie = itau
1848 itauq = ie + n
1849 itaup = itauq + n
1850 iwork = itaup + n
1851*
1852* Bidiagonalize R in WORK(IU), copying result to VT
1853* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1854*
1855 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1856 $ work( ie ), work( itauq ),
1857 $ work( itaup ), work( iwork ),
1858 $ lwork-iwork+1, ierr )
1859 CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1860 $ ldvt )
1861*
1862* Generate left bidiagonalizing vectors in WORK(IU)
1863* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1864*
1865 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1866 $ work( itauq ), work( iwork ),
1867 $ lwork-iwork+1, ierr )
1868*
1869* Generate right bidiagonalizing vectors in VT
1870* (Workspace: need N*N + 4*N-1,
1871* prefer N*N+3*N+(N-1)*NB)
1872*
1873 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1874 $ work( iwork ), lwork-iwork+1, ierr )
1875 iwork = ie + n
1876*
1877* Perform bidiagonal QR iteration, computing left
1878* singular vectors of R in WORK(IU) and computing
1879* right singular vectors of R in VT
1880* (Workspace: need N*N + BDSPAC)
1881*
1882 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1883 $ ldvt, work( iu ), ldwrku, dum, 1,
1884 $ work( iwork ), info )
1885*
1886* Multiply Q in U by left singular vectors of R in
1887* WORK(IU), storing result in A
1888* (Workspace: need N*N)
1889*
1890 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1891 $ work( iu ), ldwrku, zero, a, lda )
1892*
1893* Copy left singular vectors of A from A to U
1894*
1895 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1896*
1897 ELSE
1898*
1899* Insufficient workspace for a fast algorithm
1900*
1901 itau = 1
1902 iwork = itau + n
1903*
1904* Compute A=Q*R, copying result to U
1905* (Workspace: need 2*N, prefer N + N*NB)
1906*
1907 CALL dgeqrf( m, n, a, lda, work( itau ),
1908 $ work( iwork ), lwork-iwork+1, ierr )
1909 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1910*
1911* Generate Q in U
1912* (Workspace: need N + M, prefer N + M*NB)
1913*
1914 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1915 $ work( iwork ), lwork-iwork+1, ierr )
1916*
1917* Copy R from A to VT, zeroing out below it
1918*
1919 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1920 IF( n.GT.1 )
1921 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1922 $ vt( 2, 1 ), ldvt )
1923 ie = itau
1924 itauq = ie + n
1925 itaup = itauq + n
1926 iwork = itaup + n
1927*
1928* Bidiagonalize R in VT
1929* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1930*
1931 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1932 $ work( itauq ), work( itaup ),
1933 $ work( iwork ), lwork-iwork+1, ierr )
1934*
1935* Multiply Q in U by left bidiagonalizing vectors
1936* in VT
1937* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1938*
1939 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1940 $ work( itauq ), u, ldu, work( iwork ),
1941 $ lwork-iwork+1, ierr )
1942*
1943* Generate right bidiagonalizing vectors in VT
1944* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1945*
1946 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1947 $ work( iwork ), lwork-iwork+1, ierr )
1948 iwork = ie + n
1949*
1950* Perform bidiagonal QR iteration, computing left
1951* singular vectors of A in U and computing right
1952* singular vectors of A in VT
1953* (Workspace: need BDSPAC)
1954*
1955 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1956 $ ldvt, u, ldu, dum, 1, work( iwork ),
1957 $ info )
1958*
1959 END IF
1960*
1961 END IF
1962*
1963 END IF
1964*
1965 ELSE
1966*
1967* M .LT. MNTHR
1968*
1969* Path 10 (M at least N, but not much larger)
1970* Reduce to bidiagonal form without QR decomposition
1971*
1972 ie = 1
1973 itauq = ie + n
1974 itaup = itauq + n
1975 iwork = itaup + n
1976*
1977* Bidiagonalize A
1978* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
1979*
1980 CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1981 $ work( itaup ), work( iwork ), lwork-iwork+1,
1982 $ ierr )
1983 IF( wntuas ) THEN
1984*
1985* If left singular vectors desired in U, copy result to U
1986* and generate left bidiagonalizing vectors in U
1987* (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB)
1988*
1989 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1990 IF( wntus )
1991 $ ncu = n
1992 IF( wntua )
1993 $ ncu = m
1994 CALL dorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1995 $ work( iwork ), lwork-iwork+1, ierr )
1996 END IF
1997 IF( wntvas ) THEN
1998*
1999* If right singular vectors desired in VT, copy result to
2000* VT and generate right bidiagonalizing vectors in VT
2001* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2002*
2003 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
2004 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2005 $ work( iwork ), lwork-iwork+1, ierr )
2006 END IF
2007 IF( wntuo ) THEN
2008*
2009* If left singular vectors desired in A, generate left
2010* bidiagonalizing vectors in A
2011* (Workspace: need 4*N, prefer 3*N + N*NB)
2012*
2013 CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2014 $ work( iwork ), lwork-iwork+1, ierr )
2015 END IF
2016 IF( wntvo ) THEN
2017*
2018* If right singular vectors desired in A, generate right
2019* bidiagonalizing vectors in A
2020* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2021*
2022 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
2023 $ work( iwork ), lwork-iwork+1, ierr )
2024 END IF
2025 iwork = ie + n
2026 IF( wntuas .OR. wntuo )
2027 $ nru = m
2028 IF( wntun )
2029 $ nru = 0
2030 IF( wntvas .OR. wntvo )
2031 $ ncvt = n
2032 IF( wntvn )
2033 $ ncvt = 0
2034 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2035*
2036* Perform bidiagonal QR iteration, if desired, computing
2037* left singular vectors in U and computing right singular
2038* vectors in VT
2039* (Workspace: need BDSPAC)
2040*
2041 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2042 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2043 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2044*
2045* Perform bidiagonal QR iteration, if desired, computing
2046* left singular vectors in U and computing right singular
2047* vectors in A
2048* (Workspace: need BDSPAC)
2049*
2050 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2051 $ u, ldu, dum, 1, work( iwork ), info )
2052 ELSE
2053*
2054* Perform bidiagonal QR iteration, if desired, computing
2055* left singular vectors in A and computing right singular
2056* vectors in VT
2057* (Workspace: need BDSPAC)
2058*
2059 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2060 $ ldvt, a, lda, dum, 1, work( iwork ), info )
2061 END IF
2062*
2063 END IF
2064*
2065 ELSE
2066*
2067* A has more columns than rows. If A has sufficiently more
2068* columns than rows, first reduce using the LQ decomposition (if
2069* sufficient workspace available)
2070*
2071 IF( n.GE.mnthr ) THEN
2072*
2073 IF( wntvn ) THEN
2074*
2075* Path 1t(N much larger than M, JOBVT='N')
2076* No right singular vectors to be computed
2077*
2078 itau = 1
2079 iwork = itau + m
2080*
2081* Compute A=L*Q
2082* (Workspace: need 2*M, prefer M + M*NB)
2083*
2084 CALL dgelqf( m, n, a, lda, work( itau ), work( iwork ),
2085 $ lwork-iwork+1, ierr )
2086*
2087* Zero out above L
2088*
2089 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2090 ie = 1
2091 itauq = ie + m
2092 itaup = itauq + m
2093 iwork = itaup + m
2094*
2095* Bidiagonalize L in A
2096* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2097*
2098 CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2099 $ work( itaup ), work( iwork ), lwork-iwork+1,
2100 $ ierr )
2101 IF( wntuo .OR. wntuas ) THEN
2102*
2103* If left singular vectors desired, generate Q
2104* (Workspace: need 4*M, prefer 3*M + M*NB)
2105*
2106 CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2107 $ work( iwork ), lwork-iwork+1, ierr )
2108 END IF
2109 iwork = ie + m
2110 nru = 0
2111 IF( wntuo .OR. wntuas )
2112 $ nru = m
2113*
2114* Perform bidiagonal QR iteration, computing left singular
2115* vectors of A in A if desired
2116* (Workspace: need BDSPAC)
2117*
2118 CALL dbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2119 $ lda, dum, 1, work( iwork ), info )
2120*
2121* If left singular vectors desired in U, copy them there
2122*
2123 IF( wntuas )
2124 $ CALL dlacpy( 'F', m, m, a, lda, u, ldu )
2125*
2126 ELSE IF( wntvo .AND. wntun ) THEN
2127*
2128* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2129* M right singular vectors to be overwritten on A and
2130* no left singular vectors to be computed
2131*
2132 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2133*
2134* Sufficient workspace for a fast algorithm
2135*
2136 ir = 1
2137 IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2138*
2139* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2140*
2141 ldwrku = lda
2142 chunk = n
2143 ldwrkr = lda
2144 ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2145*
2146* WORK(IU) is LDA by N and WORK(IR) is M by M
2147*
2148 ldwrku = lda
2149 chunk = n
2150 ldwrkr = m
2151 ELSE
2152*
2153* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2154*
2155 ldwrku = m
2156 chunk = ( lwork-m*m-m ) / m
2157 ldwrkr = m
2158 END IF
2159 itau = ir + ldwrkr*m
2160 iwork = itau + m
2161*
2162* Compute A=L*Q
2163* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2164*
2165 CALL dgelqf( m, n, a, lda, work( itau ),
2166 $ work( iwork ), lwork-iwork+1, ierr )
2167*
2168* Copy L to WORK(IR) and zero out above it
2169*
2170 CALL dlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2171 CALL dlaset( 'U', m-1, m-1, zero, zero,
2172 $ work( ir+ldwrkr ), ldwrkr )
2173*
2174* Generate Q in A
2175* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2176*
2177 CALL dorglq( m, n, m, a, lda, work( itau ),
2178 $ work( iwork ), lwork-iwork+1, ierr )
2179 ie = itau
2180 itauq = ie + m
2181 itaup = itauq + m
2182 iwork = itaup + m
2183*
2184* Bidiagonalize L in WORK(IR)
2185* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2186*
2187 CALL dgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2188 $ work( itauq ), work( itaup ),
2189 $ work( iwork ), lwork-iwork+1, ierr )
2190*
2191* Generate right vectors bidiagonalizing L
2192* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2193*
2194 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2195 $ work( itaup ), work( iwork ),
2196 $ lwork-iwork+1, ierr )
2197 iwork = ie + m
2198*
2199* Perform bidiagonal QR iteration, computing right
2200* singular vectors of L in WORK(IR)
2201* (Workspace: need M*M + BDSPAC)
2202*
2203 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2204 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2205 $ work( iwork ), info )
2206 iu = ie + m
2207*
2208* Multiply right singular vectors of L in WORK(IR) by Q
2209* in A, storing result in WORK(IU) and copying to A
2210* (Workspace: need M*M + 2*M, prefer M*M + M*N + M)
2211*
2212 DO 30 i = 1, n, chunk
2213 blk = min( n-i+1, chunk )
2214 CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2215 $ ldwrkr, a( 1, i ), lda, zero,
2216 $ work( iu ), ldwrku )
2217 CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2218 $ a( 1, i ), lda )
2219 30 CONTINUE
2220*
2221 ELSE
2222*
2223* Insufficient workspace for a fast algorithm
2224*
2225 ie = 1
2226 itauq = ie + m
2227 itaup = itauq + m
2228 iwork = itaup + m
2229*
2230* Bidiagonalize A
2231* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
2232*
2233 CALL dgebrd( m, n, a, lda, s, work( ie ),
2234 $ work( itauq ), work( itaup ),
2235 $ work( iwork ), lwork-iwork+1, ierr )
2236*
2237* Generate right vectors bidiagonalizing A
2238* (Workspace: need 4*M, prefer 3*M + M*NB)
2239*
2240 CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
2241 $ work( iwork ), lwork-iwork+1, ierr )
2242 iwork = ie + m
2243*
2244* Perform bidiagonal QR iteration, computing right
2245* singular vectors of A in A
2246* (Workspace: need BDSPAC)
2247*
2248 CALL dbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2249 $ dum, 1, dum, 1, work( iwork ), info )
2250*
2251 END IF
2252*
2253 ELSE IF( wntvo .AND. wntuas ) THEN
2254*
2255* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2256* M right singular vectors to be overwritten on A and
2257* M left singular vectors to be computed in U
2258*
2259 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2260*
2261* Sufficient workspace for a fast algorithm
2262*
2263 ir = 1
2264 IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2265*
2266* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2267*
2268 ldwrku = lda
2269 chunk = n
2270 ldwrkr = lda
2271 ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2272*
2273* WORK(IU) is LDA by N and WORK(IR) is M by M
2274*
2275 ldwrku = lda
2276 chunk = n
2277 ldwrkr = m
2278 ELSE
2279*
2280* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2281*
2282 ldwrku = m
2283 chunk = ( lwork-m*m-m ) / m
2284 ldwrkr = m
2285 END IF
2286 itau = ir + ldwrkr*m
2287 iwork = itau + m
2288*
2289* Compute A=L*Q
2290* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2291*
2292 CALL dgelqf( m, n, a, lda, work( itau ),
2293 $ work( iwork ), lwork-iwork+1, ierr )
2294*
2295* Copy L to U, zeroing about above it
2296*
2297 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2298 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2299 $ ldu )
2300*
2301* Generate Q in A
2302* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2303*
2304 CALL dorglq( m, n, m, a, lda, work( itau ),
2305 $ work( iwork ), lwork-iwork+1, ierr )
2306 ie = itau
2307 itauq = ie + m
2308 itaup = itauq + m
2309 iwork = itaup + m
2310*
2311* Bidiagonalize L in U, copying result to WORK(IR)
2312* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2313*
2314 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2315 $ work( itauq ), work( itaup ),
2316 $ work( iwork ), lwork-iwork+1, ierr )
2317 CALL dlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2318*
2319* Generate right vectors bidiagonalizing L in WORK(IR)
2320* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2321*
2322 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2323 $ work( itaup ), work( iwork ),
2324 $ lwork-iwork+1, ierr )
2325*
2326* Generate left vectors bidiagonalizing L in U
2327* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2328*
2329 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2330 $ work( iwork ), lwork-iwork+1, ierr )
2331 iwork = ie + m
2332*
2333* Perform bidiagonal QR iteration, computing left
2334* singular vectors of L in U, and computing right
2335* singular vectors of L in WORK(IR)
2336* (Workspace: need M*M + BDSPAC)
2337*
2338 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2339 $ work( ir ), ldwrkr, u, ldu, dum, 1,
2340 $ work( iwork ), info )
2341 iu = ie + m
2342*
2343* Multiply right singular vectors of L in WORK(IR) by Q
2344* in A, storing result in WORK(IU) and copying to A
2345* (Workspace: need M*M + 2*M, prefer M*M + M*N + M))
2346*
2347 DO 40 i = 1, n, chunk
2348 blk = min( n-i+1, chunk )
2349 CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2350 $ ldwrkr, a( 1, i ), lda, zero,
2351 $ work( iu ), ldwrku )
2352 CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2353 $ a( 1, i ), lda )
2354 40 CONTINUE
2355*
2356 ELSE
2357*
2358* Insufficient workspace for a fast algorithm
2359*
2360 itau = 1
2361 iwork = itau + m
2362*
2363* Compute A=L*Q
2364* (Workspace: need 2*M, prefer M + M*NB)
2365*
2366 CALL dgelqf( m, n, a, lda, work( itau ),
2367 $ work( iwork ), lwork-iwork+1, ierr )
2368*
2369* Copy L to U, zeroing out above it
2370*
2371 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2372 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2373 $ ldu )
2374*
2375* Generate Q in A
2376* (Workspace: need 2*M, prefer M + M*NB)
2377*
2378 CALL dorglq( m, n, m, a, lda, work( itau ),
2379 $ work( iwork ), lwork-iwork+1, ierr )
2380 ie = itau
2381 itauq = ie + m
2382 itaup = itauq + m
2383 iwork = itaup + m
2384*
2385* Bidiagonalize L in U
2386* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2387*
2388 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2389 $ work( itauq ), work( itaup ),
2390 $ work( iwork ), lwork-iwork+1, ierr )
2391*
2392* Multiply right vectors bidiagonalizing L by Q in A
2393* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2394*
2395 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2396 $ work( itaup ), a, lda, work( iwork ),
2397 $ lwork-iwork+1, ierr )
2398*
2399* Generate left vectors bidiagonalizing L in U
2400* (Workspace: need 4*M, prefer 3*M + M*NB)
2401*
2402 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2403 $ work( iwork ), lwork-iwork+1, ierr )
2404 iwork = ie + m
2405*
2406* Perform bidiagonal QR iteration, computing left
2407* singular vectors of A in U and computing right
2408* singular vectors of A in A
2409* (Workspace: need BDSPAC)
2410*
2411 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2412 $ u, ldu, dum, 1, work( iwork ), info )
2413*
2414 END IF
2415*
2416 ELSE IF( wntvs ) THEN
2417*
2418 IF( wntun ) THEN
2419*
2420* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2421* M right singular vectors to be computed in VT and
2422* no left singular vectors to be computed
2423*
2424 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2425*
2426* Sufficient workspace for a fast algorithm
2427*
2428 ir = 1
2429 IF( lwork.GE.wrkbl+lda*m ) THEN
2430*
2431* WORK(IR) is LDA by M
2432*
2433 ldwrkr = lda
2434 ELSE
2435*
2436* WORK(IR) is M by M
2437*
2438 ldwrkr = m
2439 END IF
2440 itau = ir + ldwrkr*m
2441 iwork = itau + m
2442*
2443* Compute A=L*Q
2444* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2445*
2446 CALL dgelqf( m, n, a, lda, work( itau ),
2447 $ work( iwork ), lwork-iwork+1, ierr )
2448*
2449* Copy L to WORK(IR), zeroing out above it
2450*
2451 CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2452 $ ldwrkr )
2453 CALL dlaset( 'U', m-1, m-1, zero, zero,
2454 $ work( ir+ldwrkr ), ldwrkr )
2455*
2456* Generate Q in A
2457* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2458*
2459 CALL dorglq( m, n, m, a, lda, work( itau ),
2460 $ work( iwork ), lwork-iwork+1, ierr )
2461 ie = itau
2462 itauq = ie + m
2463 itaup = itauq + m
2464 iwork = itaup + m
2465*
2466* Bidiagonalize L in WORK(IR)
2467* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2468*
2469 CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2470 $ work( ie ), work( itauq ),
2471 $ work( itaup ), work( iwork ),
2472 $ lwork-iwork+1, ierr )
2473*
2474* Generate right vectors bidiagonalizing L in
2475* WORK(IR)
2476* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
2477*
2478 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2479 $ work( itaup ), work( iwork ),
2480 $ lwork-iwork+1, ierr )
2481 iwork = ie + m
2482*
2483* Perform bidiagonal QR iteration, computing right
2484* singular vectors of L in WORK(IR)
2485* (Workspace: need M*M + BDSPAC)
2486*
2487 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2488 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2489 $ work( iwork ), info )
2490*
2491* Multiply right singular vectors of L in WORK(IR) by
2492* Q in A, storing result in VT
2493* (Workspace: need M*M)
2494*
2495 CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2496 $ ldwrkr, a, lda, zero, vt, ldvt )
2497*
2498 ELSE
2499*
2500* Insufficient workspace for a fast algorithm
2501*
2502 itau = 1
2503 iwork = itau + m
2504*
2505* Compute A=L*Q
2506* (Workspace: need 2*M, prefer M + M*NB)
2507*
2508 CALL dgelqf( m, n, a, lda, work( itau ),
2509 $ work( iwork ), lwork-iwork+1, ierr )
2510*
2511* Copy result to VT
2512*
2513 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2514*
2515* Generate Q in VT
2516* (Workspace: need 2*M, prefer M + M*NB)
2517*
2518 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2519 $ work( iwork ), lwork-iwork+1, ierr )
2520 ie = itau
2521 itauq = ie + m
2522 itaup = itauq + m
2523 iwork = itaup + m
2524*
2525* Zero out above L in A
2526*
2527 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2528 $ lda )
2529*
2530* Bidiagonalize L in A
2531* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2532*
2533 CALL dgebrd( m, m, a, lda, s, work( ie ),
2534 $ work( itauq ), work( itaup ),
2535 $ work( iwork ), lwork-iwork+1, ierr )
2536*
2537* Multiply right vectors bidiagonalizing L by Q in VT
2538* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2539*
2540 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2541 $ work( itaup ), vt, ldvt,
2542 $ work( iwork ), lwork-iwork+1, ierr )
2543 iwork = ie + m
2544*
2545* Perform bidiagonal QR iteration, computing right
2546* singular vectors of A in VT
2547* (Workspace: need BDSPAC)
2548*
2549 CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2550 $ ldvt, dum, 1, dum, 1, work( iwork ),
2551 $ info )
2552*
2553 END IF
2554*
2555 ELSE IF( wntuo ) THEN
2556*
2557* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2558* M right singular vectors to be computed in VT and
2559* M left singular vectors to be overwritten on A
2560*
2561 IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2562*
2563* Sufficient workspace for a fast algorithm
2564*
2565 iu = 1
2566 IF( lwork.GE.wrkbl+2*lda*m ) THEN
2567*
2568* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2569*
2570 ldwrku = lda
2571 ir = iu + ldwrku*m
2572 ldwrkr = lda
2573 ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
2574*
2575* WORK(IU) is LDA by M and WORK(IR) is M by M
2576*
2577 ldwrku = lda
2578 ir = iu + ldwrku*m
2579 ldwrkr = m
2580 ELSE
2581*
2582* WORK(IU) is M by M and WORK(IR) is M by M
2583*
2584 ldwrku = m
2585 ir = iu + ldwrku*m
2586 ldwrkr = m
2587 END IF
2588 itau = ir + ldwrkr*m
2589 iwork = itau + m
2590*
2591* Compute A=L*Q
2592* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2593*
2594 CALL dgelqf( m, n, a, lda, work( itau ),
2595 $ work( iwork ), lwork-iwork+1, ierr )
2596*
2597* Copy L to WORK(IU), zeroing out below it
2598*
2599 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2600 $ ldwrku )
2601 CALL dlaset( 'U', m-1, m-1, zero, zero,
2602 $ work( iu+ldwrku ), ldwrku )
2603*
2604* Generate Q in A
2605* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2606*
2607 CALL dorglq( m, n, m, a, lda, work( itau ),
2608 $ work( iwork ), lwork-iwork+1, ierr )
2609 ie = itau
2610 itauq = ie + m
2611 itaup = itauq + m
2612 iwork = itaup + m
2613*
2614* Bidiagonalize L in WORK(IU), copying result to
2615* WORK(IR)
2616* (Workspace: need 2*M*M + 4*M,
2617* prefer 2*M*M+3*M+2*M*NB)
2618*
2619 CALL dgebrd( m, m, work( iu ), ldwrku, s,
2620 $ work( ie ), work( itauq ),
2621 $ work( itaup ), work( iwork ),
2622 $ lwork-iwork+1, ierr )
2623 CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
2624 $ work( ir ), ldwrkr )
2625*
2626* Generate right bidiagonalizing vectors in WORK(IU)
2627* (Workspace: need 2*M*M + 4*M-1,
2628* prefer 2*M*M+3*M+(M-1)*NB)
2629*
2630 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2631 $ work( itaup ), work( iwork ),
2632 $ lwork-iwork+1, ierr )
2633*
2634* Generate left bidiagonalizing vectors in WORK(IR)
2635* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
2636*
2637 CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2638 $ work( itauq ), work( iwork ),
2639 $ lwork-iwork+1, ierr )
2640 iwork = ie + m
2641*
2642* Perform bidiagonal QR iteration, computing left
2643* singular vectors of L in WORK(IR) and computing
2644* right singular vectors of L in WORK(IU)
2645* (Workspace: need 2*M*M + BDSPAC)
2646*
2647 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2648 $ work( iu ), ldwrku, work( ir ),
2649 $ ldwrkr, dum, 1, work( iwork ), info )
2650*
2651* Multiply right singular vectors of L in WORK(IU) by
2652* Q in A, storing result in VT
2653* (Workspace: need M*M)
2654*
2655 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2656 $ ldwrku, a, lda, zero, vt, ldvt )
2657*
2658* Copy left singular vectors of L to A
2659* (Workspace: need M*M)
2660*
2661 CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2662 $ lda )
2663*
2664 ELSE
2665*
2666* Insufficient workspace for a fast algorithm
2667*
2668 itau = 1
2669 iwork = itau + m
2670*
2671* Compute A=L*Q, copying result to VT
2672* (Workspace: need 2*M, prefer M + M*NB)
2673*
2674 CALL dgelqf( m, n, a, lda, work( itau ),
2675 $ work( iwork ), lwork-iwork+1, ierr )
2676 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2677*
2678* Generate Q in VT
2679* (Workspace: need 2*M, prefer M + M*NB)
2680*
2681 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2682 $ work( iwork ), lwork-iwork+1, ierr )
2683 ie = itau
2684 itauq = ie + m
2685 itaup = itauq + m
2686 iwork = itaup + m
2687*
2688* Zero out above L in A
2689*
2690 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2691 $ lda )
2692*
2693* Bidiagonalize L in A
2694* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2695*
2696 CALL dgebrd( m, m, a, lda, s, work( ie ),
2697 $ work( itauq ), work( itaup ),
2698 $ work( iwork ), lwork-iwork+1, ierr )
2699*
2700* Multiply right vectors bidiagonalizing L by Q in VT
2701* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2702*
2703 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2704 $ work( itaup ), vt, ldvt,
2705 $ work( iwork ), lwork-iwork+1, ierr )
2706*
2707* Generate left bidiagonalizing vectors of L in A
2708* (Workspace: need 4*M, prefer 3*M + M*NB)
2709*
2710 CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2711 $ work( iwork ), lwork-iwork+1, ierr )
2712 iwork = ie + m
2713*
2714* Perform bidiagonal QR iteration, compute left
2715* singular vectors of A in A and compute right
2716* singular vectors of A in VT
2717* (Workspace: need BDSPAC)
2718*
2719 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2720 $ ldvt, a, lda, dum, 1, work( iwork ),
2721 $ info )
2722*
2723 END IF
2724*
2725 ELSE IF( wntuas ) THEN
2726*
2727* Path 6t(N much larger than M, JOBU='S' or 'A',
2728* JOBVT='S')
2729* M right singular vectors to be computed in VT and
2730* M left singular vectors to be computed in U
2731*
2732 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2733*
2734* Sufficient workspace for a fast algorithm
2735*
2736 iu = 1
2737 IF( lwork.GE.wrkbl+lda*m ) THEN
2738*
2739* WORK(IU) is LDA by N
2740*
2741 ldwrku = lda
2742 ELSE
2743*
2744* WORK(IU) is LDA by M
2745*
2746 ldwrku = m
2747 END IF
2748 itau = iu + ldwrku*m
2749 iwork = itau + m
2750*
2751* Compute A=L*Q
2752* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2753*
2754 CALL dgelqf( m, n, a, lda, work( itau ),
2755 $ work( iwork ), lwork-iwork+1, ierr )
2756*
2757* Copy L to WORK(IU), zeroing out above it
2758*
2759 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2760 $ ldwrku )
2761 CALL dlaset( 'U', m-1, m-1, zero, zero,
2762 $ work( iu+ldwrku ), ldwrku )
2763*
2764* Generate Q in A
2765* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2766*
2767 CALL dorglq( m, n, m, a, lda, work( itau ),
2768 $ work( iwork ), lwork-iwork+1, ierr )
2769 ie = itau
2770 itauq = ie + m
2771 itaup = itauq + m
2772 iwork = itaup + m
2773*
2774* Bidiagonalize L in WORK(IU), copying result to U
2775* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2776*
2777 CALL dgebrd( m, m, work( iu ), ldwrku, s,
2778 $ work( ie ), work( itauq ),
2779 $ work( itaup ), work( iwork ),
2780 $ lwork-iwork+1, ierr )
2781 CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
2782 $ ldu )
2783*
2784* Generate right bidiagonalizing vectors in WORK(IU)
2785* (Workspace: need M*M + 4*M-1,
2786* prefer M*M+3*M+(M-1)*NB)
2787*
2788 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2789 $ work( itaup ), work( iwork ),
2790 $ lwork-iwork+1, ierr )
2791*
2792* Generate left bidiagonalizing vectors in U
2793* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2794*
2795 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2796 $ work( iwork ), lwork-iwork+1, ierr )
2797 iwork = ie + m
2798*
2799* Perform bidiagonal QR iteration, computing left
2800* singular vectors of L in U and computing right
2801* singular vectors of L in WORK(IU)
2802* (Workspace: need M*M + BDSPAC)
2803*
2804 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2805 $ work( iu ), ldwrku, u, ldu, dum, 1,
2806 $ work( iwork ), info )
2807*
2808* Multiply right singular vectors of L in WORK(IU) by
2809* Q in A, storing result in VT
2810* (Workspace: need M*M)
2811*
2812 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2813 $ ldwrku, a, lda, zero, vt, ldvt )
2814*
2815 ELSE
2816*
2817* Insufficient workspace for a fast algorithm
2818*
2819 itau = 1
2820 iwork = itau + m
2821*
2822* Compute A=L*Q, copying result to VT
2823* (Workspace: need 2*M, prefer M + M*NB)
2824*
2825 CALL dgelqf( m, n, a, lda, work( itau ),
2826 $ work( iwork ), lwork-iwork+1, ierr )
2827 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2828*
2829* Generate Q in VT
2830* (Workspace: need 2*M, prefer M + M*NB)
2831*
2832 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2833 $ work( iwork ), lwork-iwork+1, ierr )
2834*
2835* Copy L to U, zeroing out above it
2836*
2837 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2838 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2839 $ ldu )
2840 ie = itau
2841 itauq = ie + m
2842 itaup = itauq + m
2843 iwork = itaup + m
2844*
2845* Bidiagonalize L in U
2846* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2847*
2848 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2849 $ work( itauq ), work( itaup ),
2850 $ work( iwork ), lwork-iwork+1, ierr )
2851*
2852* Multiply right bidiagonalizing vectors in U by Q
2853* in VT
2854* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2855*
2856 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2857 $ work( itaup ), vt, ldvt,
2858 $ work( iwork ), lwork-iwork+1, ierr )
2859*
2860* Generate left bidiagonalizing vectors in U
2861* (Workspace: need 4*M, prefer 3*M + M*NB)
2862*
2863 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2864 $ work( iwork ), lwork-iwork+1, ierr )
2865 iwork = ie + m
2866*
2867* Perform bidiagonal QR iteration, computing left
2868* singular vectors of A in U and computing right
2869* singular vectors of A in VT
2870* (Workspace: need BDSPAC)
2871*
2872 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2873 $ ldvt, u, ldu, dum, 1, work( iwork ),
2874 $ info )
2875*
2876 END IF
2877*
2878 END IF
2879*
2880 ELSE IF( wntva ) THEN
2881*
2882 IF( wntun ) THEN
2883*
2884* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2885* N right singular vectors to be computed in VT and
2886* no left singular vectors to be computed
2887*
2888 IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
2889*
2890* Sufficient workspace for a fast algorithm
2891*
2892 ir = 1
2893 IF( lwork.GE.wrkbl+lda*m ) THEN
2894*
2895* WORK(IR) is LDA by M
2896*
2897 ldwrkr = lda
2898 ELSE
2899*
2900* WORK(IR) is M by M
2901*
2902 ldwrkr = m
2903 END IF
2904 itau = ir + ldwrkr*m
2905 iwork = itau + m
2906*
2907* Compute A=L*Q, copying result to VT
2908* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2909*
2910 CALL dgelqf( m, n, a, lda, work( itau ),
2911 $ work( iwork ), lwork-iwork+1, ierr )
2912 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2913*
2914* Copy L to WORK(IR), zeroing out above it
2915*
2916 CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2917 $ ldwrkr )
2918 CALL dlaset( 'U', m-1, m-1, zero, zero,
2919 $ work( ir+ldwrkr ), ldwrkr )
2920*
2921* Generate Q in VT
2922* (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
2923*
2924 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2925 $ work( iwork ), lwork-iwork+1, ierr )
2926 ie = itau
2927 itauq = ie + m
2928 itaup = itauq + m
2929 iwork = itaup + m
2930*
2931* Bidiagonalize L in WORK(IR)
2932* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2933*
2934 CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2935 $ work( ie ), work( itauq ),
2936 $ work( itaup ), work( iwork ),
2937 $ lwork-iwork+1, ierr )
2938*
2939* Generate right bidiagonalizing vectors in WORK(IR)
2940* (Workspace: need M*M + 4*M-1,
2941* prefer M*M+3*M+(M-1)*NB)
2942*
2943 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2944 $ work( itaup ), work( iwork ),
2945 $ lwork-iwork+1, ierr )
2946 iwork = ie + m
2947*
2948* Perform bidiagonal QR iteration, computing right
2949* singular vectors of L in WORK(IR)
2950* (Workspace: need M*M + BDSPAC)
2951*
2952 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2953 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2954 $ work( iwork ), info )
2955*
2956* Multiply right singular vectors of L in WORK(IR) by
2957* Q in VT, storing result in A
2958* (Workspace: need M*M)
2959*
2960 CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2961 $ ldwrkr, vt, ldvt, zero, a, lda )
2962*
2963* Copy right singular vectors of A from A to VT
2964*
2965 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
2966*
2967 ELSE
2968*
2969* Insufficient workspace for a fast algorithm
2970*
2971 itau = 1
2972 iwork = itau + m
2973*
2974* Compute A=L*Q, copying result to VT
2975* (Workspace: need 2*M, prefer M + M*NB)
2976*
2977 CALL dgelqf( m, n, a, lda, work( itau ),
2978 $ work( iwork ), lwork-iwork+1, ierr )
2979 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2980*
2981* Generate Q in VT
2982* (Workspace: need M + N, prefer M + N*NB)
2983*
2984 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2985 $ work( iwork ), lwork-iwork+1, ierr )
2986 ie = itau
2987 itauq = ie + m
2988 itaup = itauq + m
2989 iwork = itaup + m
2990*
2991* Zero out above L in A
2992*
2993 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2994 $ lda )
2995*
2996* Bidiagonalize L in A
2997* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2998*
2999 CALL dgebrd( m, m, a, lda, s, work( ie ),
3000 $ work( itauq ), work( itaup ),
3001 $ work( iwork ), lwork-iwork+1, ierr )
3002*
3003* Multiply right bidiagonalizing vectors in A by Q
3004* in VT
3005* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3006*
3007 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3008 $ work( itaup ), vt, ldvt,
3009 $ work( iwork ), lwork-iwork+1, ierr )
3010 iwork = ie + m
3011*
3012* Perform bidiagonal QR iteration, computing right
3013* singular vectors of A in VT
3014* (Workspace: need BDSPAC)
3015*
3016 CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3017 $ ldvt, dum, 1, dum, 1, work( iwork ),
3018 $ info )
3019*
3020 END IF
3021*
3022 ELSE IF( wntuo ) THEN
3023*
3024* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3025* N right singular vectors to be computed in VT and
3026* M left singular vectors to be overwritten on A
3027*
3028 IF( lwork.GE.2*m*m+max( n + m, 4*m, bdspac ) ) THEN
3029*
3030* Sufficient workspace for a fast algorithm
3031*
3032 iu = 1
3033 IF( lwork.GE.wrkbl+2*lda*m ) THEN
3034*
3035* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3036*
3037 ldwrku = lda
3038 ir = iu + ldwrku*m
3039 ldwrkr = lda
3040 ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
3041*
3042* WORK(IU) is LDA by M and WORK(IR) is M by M
3043*
3044 ldwrku = lda
3045 ir = iu + ldwrku*m
3046 ldwrkr = m
3047 ELSE
3048*
3049* WORK(IU) is M by M and WORK(IR) is M by M
3050*
3051 ldwrku = m
3052 ir = iu + ldwrku*m
3053 ldwrkr = m
3054 END IF
3055 itau = ir + ldwrkr*m
3056 iwork = itau + m
3057*
3058* Compute A=L*Q, copying result to VT
3059* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
3060*
3061 CALL dgelqf( m, n, a, lda, work( itau ),
3062 $ work( iwork ), lwork-iwork+1, ierr )
3063 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3064*
3065* Generate Q in VT
3066* (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB)
3067*
3068 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3069 $ work( iwork ), lwork-iwork+1, ierr )
3070*
3071* Copy L to WORK(IU), zeroing out above it
3072*
3073 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3074 $ ldwrku )
3075 CALL dlaset( 'U', m-1, m-1, zero, zero,
3076 $ work( iu+ldwrku ), ldwrku )
3077 ie = itau
3078 itauq = ie + m
3079 itaup = itauq + m
3080 iwork = itaup + m
3081*
3082* Bidiagonalize L in WORK(IU), copying result to
3083* WORK(IR)
3084* (Workspace: need 2*M*M + 4*M,
3085* prefer 2*M*M+3*M+2*M*NB)
3086*
3087 CALL dgebrd( m, m, work( iu ), ldwrku, s,
3088 $ work( ie ), work( itauq ),
3089 $ work( itaup ), work( iwork ),
3090 $ lwork-iwork+1, ierr )
3091 CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
3092 $ work( ir ), ldwrkr )
3093*
3094* Generate right bidiagonalizing vectors in WORK(IU)
3095* (Workspace: need 2*M*M + 4*M-1,
3096* prefer 2*M*M+3*M+(M-1)*NB)
3097*
3098 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3099 $ work( itaup ), work( iwork ),
3100 $ lwork-iwork+1, ierr )
3101*
3102* Generate left bidiagonalizing vectors in WORK(IR)
3103* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
3104*
3105 CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3106 $ work( itauq ), work( iwork ),
3107 $ lwork-iwork+1, ierr )
3108 iwork = ie + m
3109*
3110* Perform bidiagonal QR iteration, computing left
3111* singular vectors of L in WORK(IR) and computing
3112* right singular vectors of L in WORK(IU)
3113* (Workspace: need 2*M*M + BDSPAC)
3114*
3115 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3116 $ work( iu ), ldwrku, work( ir ),
3117 $ ldwrkr, dum, 1, work( iwork ), info )
3118*
3119* Multiply right singular vectors of L in WORK(IU) by
3120* Q in VT, storing result in A
3121* (Workspace: need M*M)
3122*
3123 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3124 $ ldwrku, vt, ldvt, zero, a, lda )
3125*
3126* Copy right singular vectors of A from A to VT
3127*
3128 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3129*
3130* Copy left singular vectors of A from WORK(IR) to A
3131*
3132 CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3133 $ lda )
3134*
3135 ELSE
3136*
3137* Insufficient workspace for a fast algorithm
3138*
3139 itau = 1
3140 iwork = itau + m
3141*
3142* Compute A=L*Q, copying result to VT
3143* (Workspace: need 2*M, prefer M + M*NB)
3144*
3145 CALL dgelqf( m, n, a, lda, work( itau ),
3146 $ work( iwork ), lwork-iwork+1, ierr )
3147 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3148*
3149* Generate Q in VT
3150* (Workspace: need M + N, prefer M + N*NB)
3151*
3152 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3153 $ work( iwork ), lwork-iwork+1, ierr )
3154 ie = itau
3155 itauq = ie + m
3156 itaup = itauq + m
3157 iwork = itaup + m
3158*
3159* Zero out above L in A
3160*
3161 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3162 $ lda )
3163*
3164* Bidiagonalize L in A
3165* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3166*
3167 CALL dgebrd( m, m, a, lda, s, work( ie ),
3168 $ work( itauq ), work( itaup ),
3169 $ work( iwork ), lwork-iwork+1, ierr )
3170*
3171* Multiply right bidiagonalizing vectors in A by Q
3172* in VT
3173* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3174*
3175 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3176 $ work( itaup ), vt, ldvt,
3177 $ work( iwork ), lwork-iwork+1, ierr )
3178*
3179* Generate left bidiagonalizing vectors in A
3180* (Workspace: need 4*M, prefer 3*M + M*NB)
3181*
3182 CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3183 $ work( iwork ), lwork-iwork+1, ierr )
3184 iwork = ie + m
3185*
3186* Perform bidiagonal QR iteration, computing left
3187* singular vectors of A in A and computing right
3188* singular vectors of A in VT
3189* (Workspace: need BDSPAC)
3190*
3191 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3192 $ ldvt, a, lda, dum, 1, work( iwork ),
3193 $ info )
3194*
3195 END IF
3196*
3197 ELSE IF( wntuas ) THEN
3198*
3199* Path 9t(N much larger than M, JOBU='S' or 'A',
3200* JOBVT='A')
3201* N right singular vectors to be computed in VT and
3202* M left singular vectors to be computed in U
3203*
3204 IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
3205*
3206* Sufficient workspace for a fast algorithm
3207*
3208 iu = 1
3209 IF( lwork.GE.wrkbl+lda*m ) THEN
3210*
3211* WORK(IU) is LDA by M
3212*
3213 ldwrku = lda
3214 ELSE
3215*
3216* WORK(IU) is M by M
3217*
3218 ldwrku = m
3219 END IF
3220 itau = iu + ldwrku*m
3221 iwork = itau + m
3222*
3223* Compute A=L*Q, copying result to VT
3224* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
3225*
3226 CALL dgelqf( m, n, a, lda, work( itau ),
3227 $ work( iwork ), lwork-iwork+1, ierr )
3228 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3229*
3230* Generate Q in VT
3231* (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
3232*
3233 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3234 $ work( iwork ), lwork-iwork+1, ierr )
3235*
3236* Copy L to WORK(IU), zeroing out above it
3237*
3238 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3239 $ ldwrku )
3240 CALL dlaset( 'U', m-1, m-1, zero, zero,
3241 $ work( iu+ldwrku ), ldwrku )
3242 ie = itau
3243 itauq = ie + m
3244 itaup = itauq + m
3245 iwork = itaup + m
3246*
3247* Bidiagonalize L in WORK(IU), copying result to U
3248* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
3249*
3250 CALL dgebrd( m, m, work( iu ), ldwrku, s,
3251 $ work( ie ), work( itauq ),
3252 $ work( itaup ), work( iwork ),
3253 $ lwork-iwork+1, ierr )
3254 CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
3255 $ ldu )
3256*
3257* Generate right bidiagonalizing vectors in WORK(IU)
3258* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
3259*
3260 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3261 $ work( itaup ), work( iwork ),
3262 $ lwork-iwork+1, ierr )
3263*
3264* Generate left bidiagonalizing vectors in U
3265* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
3266*
3267 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3268 $ work( iwork ), lwork-iwork+1, ierr )
3269 iwork = ie + m
3270*
3271* Perform bidiagonal QR iteration, computing left
3272* singular vectors of L in U and computing right
3273* singular vectors of L in WORK(IU)
3274* (Workspace: need M*M + BDSPAC)
3275*
3276 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3277 $ work( iu ), ldwrku, u, ldu, dum, 1,
3278 $ work( iwork ), info )
3279*
3280* Multiply right singular vectors of L in WORK(IU) by
3281* Q in VT, storing result in A
3282* (Workspace: need M*M)
3283*
3284 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3285 $ ldwrku, vt, ldvt, zero, a, lda )
3286*
3287* Copy right singular vectors of A from A to VT
3288*
3289 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3290*
3291 ELSE
3292*
3293* Insufficient workspace for a fast algorithm
3294*
3295 itau = 1
3296 iwork = itau + m
3297*
3298* Compute A=L*Q, copying result to VT
3299* (Workspace: need 2*M, prefer M + M*NB)
3300*
3301 CALL dgelqf( m, n, a, lda, work( itau ),
3302 $ work( iwork ), lwork-iwork+1, ierr )
3303 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3304*
3305* Generate Q in VT
3306* (Workspace: need M + N, prefer M + N*NB)
3307*
3308 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3309 $ work( iwork ), lwork-iwork+1, ierr )
3310*
3311* Copy L to U, zeroing out above it
3312*
3313 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3314 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3315 $ ldu )
3316 ie = itau
3317 itauq = ie + m
3318 itaup = itauq + m
3319 iwork = itaup + m
3320*
3321* Bidiagonalize L in U
3322* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3323*
3324 CALL dgebrd( m, m, u, ldu, s, work( ie ),
3325 $ work( itauq ), work( itaup ),
3326 $ work( iwork ), lwork-iwork+1, ierr )
3327*
3328* Multiply right bidiagonalizing vectors in U by Q
3329* in VT
3330* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3331*
3332 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3333 $ work( itaup ), vt, ldvt,
3334 $ work( iwork ), lwork-iwork+1, ierr )
3335*
3336* Generate left bidiagonalizing vectors in U
3337* (Workspace: need 4*M, prefer 3*M + M*NB)
3338*
3339 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3340 $ work( iwork ), lwork-iwork+1, ierr )
3341 iwork = ie + m
3342*
3343* Perform bidiagonal QR iteration, computing left
3344* singular vectors of A in U and computing right
3345* singular vectors of A in VT
3346* (Workspace: need BDSPAC)
3347*
3348 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3349 $ ldvt, u, ldu, dum, 1, work( iwork ),
3350 $ info )
3351*
3352 END IF
3353*
3354 END IF
3355*
3356 END IF
3357*
3358 ELSE
3359*
3360* N .LT. MNTHR
3361*
3362* Path 10t(N greater than M, but not much larger)
3363* Reduce to bidiagonal form without LQ decomposition
3364*
3365 ie = 1
3366 itauq = ie + m
3367 itaup = itauq + m
3368 iwork = itaup + m
3369*
3370* Bidiagonalize A
3371* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
3372*
3373 CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3374 $ work( itaup ), work( iwork ), lwork-iwork+1,
3375 $ ierr )
3376 IF( wntuas ) THEN
3377*
3378* If left singular vectors desired in U, copy result to U
3379* and generate left bidiagonalizing vectors in U
3380* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3381*
3382 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3383 CALL dorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3384 $ work( iwork ), lwork-iwork+1, ierr )
3385 END IF
3386 IF( wntvas ) THEN
3387*
3388* If right singular vectors desired in VT, copy result to
3389* VT and generate right bidiagonalizing vectors in VT
3390* (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB)
3391*
3392 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3393 IF( wntva )
3394 $ nrvt = n
3395 IF( wntvs )
3396 $ nrvt = m
3397 CALL dorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3398 $ work( iwork ), lwork-iwork+1, ierr )
3399 END IF
3400 IF( wntuo ) THEN
3401*
3402* If left singular vectors desired in A, generate left
3403* bidiagonalizing vectors in A
3404* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3405*
3406 CALL dorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3407 $ work( iwork ), lwork-iwork+1, ierr )
3408 END IF
3409 IF( wntvo ) THEN
3410*
3411* If right singular vectors desired in A, generate right
3412* bidiagonalizing vectors in A
3413* (Workspace: need 4*M, prefer 3*M + M*NB)
3414*
3415 CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
3416 $ work( iwork ), lwork-iwork+1, ierr )
3417 END IF
3418 iwork = ie + m
3419 IF( wntuas .OR. wntuo )
3420 $ nru = m
3421 IF( wntun )
3422 $ nru = 0
3423 IF( wntvas .OR. wntvo )
3424 $ ncvt = n
3425 IF( wntvn )
3426 $ ncvt = 0
3427 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3428*
3429* Perform bidiagonal QR iteration, if desired, computing
3430* left singular vectors in U and computing right singular
3431* vectors in VT
3432* (Workspace: need BDSPAC)
3433*
3434 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3435 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3436 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3437*
3438* Perform bidiagonal QR iteration, if desired, computing
3439* left singular vectors in U and computing right singular
3440* vectors in A
3441* (Workspace: need BDSPAC)
3442*
3443 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3444 $ u, ldu, dum, 1, work( iwork ), info )
3445 ELSE
3446*
3447* Perform bidiagonal QR iteration, if desired, computing
3448* left singular vectors in A and computing right singular
3449* vectors in VT
3450* (Workspace: need BDSPAC)
3451*
3452 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3453 $ ldvt, a, lda, dum, 1, work( iwork ), info )
3454 END IF
3455*
3456 END IF
3457*
3458 END IF
3459*
3460* If DBDSQR failed to converge, copy unconverged superdiagonals
3461* to WORK( 2:MINMN )
3462*
3463 IF( info.NE.0 ) THEN
3464 IF( ie.GT.2 ) THEN
3465 DO 50 i = 1, minmn - 1
3466 work( i+1 ) = work( i+ie-1 )
3467 50 CONTINUE
3468 END IF
3469 IF( ie.LT.2 ) THEN
3470 DO 60 i = minmn - 1, 1, -1
3471 work( i+1 ) = work( i+ie-1 )
3472 60 CONTINUE
3473 END IF
3474 END IF
3475*
3476* Undo scaling if necessary
3477*
3478 IF( iscl.EQ.1 ) THEN
3479 IF( anrm.GT.bignum )
3480 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3481 $ ierr )
3482 IF( info.NE.0 .AND. anrm.GT.bignum )
3483 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3484 $ minmn, ierr )
3485 IF( anrm.LT.smlnum )
3486 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3487 $ ierr )
3488 IF( info.NE.0 .AND. anrm.LT.smlnum )
3489 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3490 $ minmn, ierr )
3491 END IF
3492*
3493* Return optimal workspace in WORK(1)
3494*
3495 work( 1 ) = maxwrk
3496*
3497 RETURN
3498*
3499* End of DGESVD
3500*
3501 END
subroutine dlascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: dlascl.f:143
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:103
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition: dlaset.f:110
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
subroutine dbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, WORK, INFO)
DBDSQR
Definition: dbdsqr.f:241
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:187
subroutine dorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGBR
Definition: dorgbr.f:157
subroutine dgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGEQRF
Definition: dgeqrf.f:146
subroutine dgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
DGELQF
Definition: dgelqf.f:143
subroutine dgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
DGEBRD
Definition: dgebrd.f:205
subroutine dgesvd(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO)
DGESVD computes the singular value decomposition (SVD) for GE matrices
Definition: dgesvd.f:211
subroutine dorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGQR
Definition: dorgqr.f:128
subroutine dorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
DORGLQ
Definition: dorglq.f:127
subroutine dormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
DORMBR
Definition: dormbr.f:195