LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
dgesvd.f
Go to the documentation of this file.
1*> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> Download DGESVD + dependencies
9*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
10*> [TGZ]</a>
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
12*> [ZIP]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
14*> [TXT]</a>
15*
16* Definition:
17* ===========
18*
19* SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
20* WORK, LWORK, INFO )
21*
22* .. Scalar Arguments ..
23* CHARACTER JOBU, JOBVT
24* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
25* ..
26* .. Array Arguments ..
27* DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
28* $ VT( LDVT, * ), WORK( * )
29* ..
30*
31*
32*> \par Purpose:
33* =============
34*>
35*> \verbatim
36*>
37*> DGESVD computes the singular value decomposition (SVD) of a real
38*> M-by-N matrix A, optionally computing the left and/or right singular
39*> vectors. The SVD is written
40*>
41*> A = U * SIGMA * transpose(V)
42*>
43*> where SIGMA is an M-by-N matrix which is zero except for its
44*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
45*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
46*> are the singular values of A; they are real and non-negative, and
47*> are returned in descending order. The first min(m,n) columns of
48*> U and V are the left and right singular vectors of A.
49*>
50*> Note that the routine returns V**T, not V.
51*> \endverbatim
52*
53* Arguments:
54* ==========
55*
56*> \param[in] JOBU
57*> \verbatim
58*> JOBU is CHARACTER*1
59*> Specifies options for computing all or part of the matrix U:
60*> = 'A': all M columns of U are returned in array U:
61*> = 'S': the first min(m,n) columns of U (the left singular
62*> vectors) are returned in the array U;
63*> = 'O': the first min(m,n) columns of U (the left singular
64*> vectors) are overwritten on the array A;
65*> = 'N': no columns of U (no left singular vectors) are
66*> computed.
67*> \endverbatim
68*>
69*> \param[in] JOBVT
70*> \verbatim
71*> JOBVT is CHARACTER*1
72*> Specifies options for computing all or part of the matrix
73*> V**T:
74*> = 'A': all N rows of V**T are returned in the array VT;
75*> = 'S': the first min(m,n) rows of V**T (the right singular
76*> vectors) are returned in the array VT;
77*> = 'O': the first min(m,n) rows of V**T (the right singular
78*> vectors) are overwritten on the array A;
79*> = 'N': no rows of V**T (no right singular vectors) are
80*> computed.
81*>
82*> JOBVT and JOBU cannot both be 'O'.
83*> \endverbatim
84*>
85*> \param[in] M
86*> \verbatim
87*> M is INTEGER
88*> The number of rows of the input matrix A. M >= 0.
89*> \endverbatim
90*>
91*> \param[in] N
92*> \verbatim
93*> N is INTEGER
94*> The number of columns of the input matrix A. N >= 0.
95*> \endverbatim
96*>
97*> \param[in,out] A
98*> \verbatim
99*> A is DOUBLE PRECISION array, dimension (LDA,N)
100*> On entry, the M-by-N matrix A.
101*> On exit,
102*> if JOBU = 'O', A is overwritten with the first min(m,n)
103*> columns of U (the left singular vectors,
104*> stored columnwise);
105*> if JOBVT = 'O', A is overwritten with the first min(m,n)
106*> rows of V**T (the right singular vectors,
107*> stored rowwise);
108*> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
109*> are destroyed.
110*> \endverbatim
111*>
112*> \param[in] LDA
113*> \verbatim
114*> LDA is INTEGER
115*> The leading dimension of the array A. LDA >= max(1,M).
116*> \endverbatim
117*>
118*> \param[out] S
119*> \verbatim
120*> S is DOUBLE PRECISION array, dimension (min(M,N))
121*> The singular values of A, sorted so that S(i) >= S(i+1).
122*> \endverbatim
123*>
124*> \param[out] U
125*> \verbatim
126*> U is DOUBLE PRECISION array, dimension (LDU,UCOL)
127*> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
128*> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
129*> if JOBU = 'S', U contains the first min(m,n) columns of U
130*> (the left singular vectors, stored columnwise);
131*> if JOBU = 'N' or 'O', U is not referenced.
132*> \endverbatim
133*>
134*> \param[in] LDU
135*> \verbatim
136*> LDU is INTEGER
137*> The leading dimension of the array U. LDU >= 1; if
138*> JOBU = 'S' or 'A', LDU >= M.
139*> \endverbatim
140*>
141*> \param[out] VT
142*> \verbatim
143*> VT is DOUBLE PRECISION array, dimension (LDVT,N)
144*> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
145*> V**T;
146*> if JOBVT = 'S', VT contains the first min(m,n) rows of
147*> V**T (the right singular vectors, stored rowwise);
148*> if JOBVT = 'N' or 'O', VT is not referenced.
149*> \endverbatim
150*>
151*> \param[in] LDVT
152*> \verbatim
153*> LDVT is INTEGER
154*> The leading dimension of the array VT. LDVT >= 1; if
155*> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
156*> \endverbatim
157*>
158*> \param[out] WORK
159*> \verbatim
160*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
161*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
162*> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
163*> superdiagonal elements of an upper bidiagonal matrix B
164*> whose diagonal is in S (not necessarily sorted). B
165*> satisfies A = U * B * VT, so it has the same singular values
166*> as A, and singular vectors related by U and VT.
167*> \endverbatim
168*>
169*> \param[in] LWORK
170*> \verbatim
171*> LWORK is INTEGER
172*> The dimension of the array WORK.
173*> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
174*> - PATH 1 (M much larger than N, JOBU='N')
175*> - PATH 1t (N much larger than M, JOBVT='N')
176*> LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths
177*> For good performance, LWORK should generally be larger.
178*>
179*> If LWORK = -1, then a workspace query is assumed; the routine
180*> only calculates the optimal size of the WORK array, returns
181*> this value as the first entry of the WORK array, and no error
182*> message related to LWORK is issued by XERBLA.
183*> \endverbatim
184*>
185*> \param[out] INFO
186*> \verbatim
187*> INFO is INTEGER
188*> = 0: successful exit.
189*> < 0: if INFO = -i, the i-th argument had an illegal value.
190*> > 0: if DBDSQR did not converge, INFO specifies how many
191*> superdiagonals of an intermediate bidiagonal form B
192*> did not converge to zero. See the description of WORK
193*> above for details.
194*> \endverbatim
195*
196* Authors:
197* ========
198*
199*> \author Univ. of Tennessee
200*> \author Univ. of California Berkeley
201*> \author Univ. of Colorado Denver
202*> \author NAG Ltd.
203*
204*> \ingroup gesvd
205*
206* =====================================================================
207 SUBROUTINE dgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
208 $ VT, LDVT, WORK, LWORK, INFO )
209*
210* -- LAPACK driver routine --
211* -- LAPACK is a software package provided by Univ. of Tennessee, --
212* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
213*
214* .. Scalar Arguments ..
215 CHARACTER JOBU, JOBVT
216 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
217* ..
218* .. Array Arguments ..
219 DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
220 $ vt( ldvt, * ), work( * )
221* ..
222*
223* =====================================================================
224*
225* .. Parameters ..
226 DOUBLE PRECISION ZERO, ONE
227 parameter( zero = 0.0d0, one = 1.0d0 )
228* ..
229* .. Local Scalars ..
230 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
231 $ wntva, wntvas, wntvn, wntvo, wntvs
232 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
233 $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
234 $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
235 $ nrvt, wrkbl
236 INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
237 $ lwork_dgebrd, lwork_dorgbr_p, lwork_dorgbr_q,
238 $ lwork_dgelqf, lwork_dorglq_n, lwork_dorglq_m
239 DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
240* ..
241* .. Local Arrays ..
242 DOUBLE PRECISION DUM( 1 )
243* ..
244* .. External Subroutines ..
245 EXTERNAL dbdsqr, dgebrd, dgelqf, dgemm, dgeqrf,
246 $ dlacpy,
248 $ xerbla
249* ..
250* .. External Functions ..
251 LOGICAL LSAME
252 INTEGER ILAENV
253 DOUBLE PRECISION DLAMCH, DLANGE
254 EXTERNAL lsame, ilaenv, dlamch, dlange
255* ..
256* .. Intrinsic Functions ..
257 INTRINSIC max, min, sqrt
258* ..
259* .. Executable Statements ..
260*
261* Test the input arguments
262*
263 info = 0
264 minmn = min( m, n )
265 wntua = lsame( jobu, 'A' )
266 wntus = lsame( jobu, 'S' )
267 wntuas = wntua .OR. wntus
268 wntuo = lsame( jobu, 'O' )
269 wntun = lsame( jobu, 'N' )
270 wntva = lsame( jobvt, 'A' )
271 wntvs = lsame( jobvt, 'S' )
272 wntvas = wntva .OR. wntvs
273 wntvo = lsame( jobvt, 'O' )
274 wntvn = lsame( jobvt, 'N' )
275 lquery = ( lwork.EQ.-1 )
276*
277 IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
278 info = -1
279 ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
280 $ ( wntvo .AND. wntuo ) ) THEN
281 info = -2
282 ELSE IF( m.LT.0 ) THEN
283 info = -3
284 ELSE IF( n.LT.0 ) THEN
285 info = -4
286 ELSE IF( lda.LT.max( 1, m ) ) THEN
287 info = -6
288 ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
289 info = -9
290 ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
291 $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
292 info = -11
293 END IF
294*
295* Compute workspace
296* (Note: Comments in the code beginning "Workspace:" describe the
297* minimal amount of workspace needed at that point in the code,
298* as well as the preferred amount for good performance.
299* NB refers to the optimal block size for the immediately
300* following subroutine, as returned by ILAENV.)
301*
302 IF( info.EQ.0 ) THEN
303 minwrk = 1
304 maxwrk = 1
305 IF( m.GE.n .AND. minmn.GT.0 ) THEN
306*
307* Compute space needed for DBDSQR
308*
309 mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
310 bdspac = 5*n
311* Compute space needed for DGEQRF
312 CALL dgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
313 lwork_dgeqrf = int( dum(1) )
314* Compute space needed for DORGQR
315 CALL dorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
316 lwork_dorgqr_n = int( dum(1) )
317 CALL dorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
318 lwork_dorgqr_m = int( dum(1) )
319* Compute space needed for DGEBRD
320 CALL dgebrd( n, n, a, lda, s, dum(1), dum(1),
321 $ dum(1), dum(1), -1, ierr )
322 lwork_dgebrd = int( dum(1) )
323* Compute space needed for DORGBR P
324 CALL dorgbr( 'P', n, n, n, a, lda, dum(1),
325 $ dum(1), -1, ierr )
326 lwork_dorgbr_p = int( dum(1) )
327* Compute space needed for DORGBR Q
328 CALL dorgbr( 'Q', n, n, n, a, lda, dum(1),
329 $ dum(1), -1, ierr )
330 lwork_dorgbr_q = int( dum(1) )
331*
332 IF( m.GE.mnthr ) THEN
333 IF( wntun ) THEN
334*
335* Path 1 (M much larger than N, JOBU='N')
336*
337 maxwrk = n + lwork_dgeqrf
338 maxwrk = max( maxwrk, 3*n + lwork_dgebrd )
339 IF( wntvo .OR. wntvas )
340 $ maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
341 maxwrk = max( maxwrk, bdspac )
342 minwrk = max( 4*n, bdspac )
343 ELSE IF( wntuo .AND. wntvn ) THEN
344*
345* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
346*
347 wrkbl = n + lwork_dgeqrf
348 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
349 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
350 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
351 wrkbl = max( wrkbl, bdspac )
352 maxwrk = max( n*n + wrkbl, n*n + m*n + n )
353 minwrk = max( 3*n + m, bdspac )
354 ELSE IF( wntuo .AND. wntvas ) THEN
355*
356* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
357* 'A')
358*
359 wrkbl = n + lwork_dgeqrf
360 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
361 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
362 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
363 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
364 wrkbl = max( wrkbl, bdspac )
365 maxwrk = max( n*n + wrkbl, n*n + m*n + n )
366 minwrk = max( 3*n + m, bdspac )
367 ELSE IF( wntus .AND. wntvn ) THEN
368*
369* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
370*
371 wrkbl = n + lwork_dgeqrf
372 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
373 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
374 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
375 wrkbl = max( wrkbl, bdspac )
376 maxwrk = n*n + wrkbl
377 minwrk = max( 3*n + m, bdspac )
378 ELSE IF( wntus .AND. wntvo ) THEN
379*
380* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
381*
382 wrkbl = n + lwork_dgeqrf
383 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
384 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
385 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
386 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
387 wrkbl = max( wrkbl, bdspac )
388 maxwrk = 2*n*n + wrkbl
389 minwrk = max( 3*n + m, bdspac )
390 ELSE IF( wntus .AND. wntvas ) THEN
391*
392* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
393* 'A')
394*
395 wrkbl = n + lwork_dgeqrf
396 wrkbl = max( wrkbl, n + lwork_dorgqr_n )
397 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
398 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
399 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
400 wrkbl = max( wrkbl, bdspac )
401 maxwrk = n*n + wrkbl
402 minwrk = max( 3*n + m, bdspac )
403 ELSE IF( wntua .AND. wntvn ) THEN
404*
405* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
406*
407 wrkbl = n + lwork_dgeqrf
408 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
409 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
410 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
411 wrkbl = max( wrkbl, bdspac )
412 maxwrk = n*n + wrkbl
413 minwrk = max( 3*n + m, bdspac )
414 ELSE IF( wntua .AND. wntvo ) THEN
415*
416* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
417*
418 wrkbl = n + lwork_dgeqrf
419 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
420 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
421 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
422 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
423 wrkbl = max( wrkbl, bdspac )
424 maxwrk = 2*n*n + wrkbl
425 minwrk = max( 3*n + m, bdspac )
426 ELSE IF( wntua .AND. wntvas ) THEN
427*
428* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
429* 'A')
430*
431 wrkbl = n + lwork_dgeqrf
432 wrkbl = max( wrkbl, n + lwork_dorgqr_m )
433 wrkbl = max( wrkbl, 3*n + lwork_dgebrd )
434 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_q )
435 wrkbl = max( wrkbl, 3*n + lwork_dorgbr_p )
436 wrkbl = max( wrkbl, bdspac )
437 maxwrk = n*n + wrkbl
438 minwrk = max( 3*n + m, bdspac )
439 END IF
440 ELSE
441*
442* Path 10 (M at least N, but not much larger)
443*
444 CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
445 $ dum(1), dum(1), -1, ierr )
446 lwork_dgebrd = int( dum(1) )
447 maxwrk = 3*n + lwork_dgebrd
448 IF( wntus .OR. wntuo ) THEN
449 CALL dorgbr( 'Q', m, n, n, a, lda, dum(1),
450 $ dum(1), -1, ierr )
451 lwork_dorgbr_q = int( dum(1) )
452 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
453 END IF
454 IF( wntua ) THEN
455 CALL dorgbr( 'Q', m, m, n, a, lda, dum(1),
456 $ dum(1), -1, ierr )
457 lwork_dorgbr_q = int( dum(1) )
458 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_q )
459 END IF
460 IF( .NOT.wntvn ) THEN
461 maxwrk = max( maxwrk, 3*n + lwork_dorgbr_p )
462 END IF
463 maxwrk = max( maxwrk, bdspac )
464 minwrk = max( 3*n + m, bdspac )
465 END IF
466 ELSE IF( minmn.GT.0 ) THEN
467*
468* Compute space needed for DBDSQR
469*
470 mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
471 bdspac = 5*m
472* Compute space needed for DGELQF
473 CALL dgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
474 lwork_dgelqf = int( dum(1) )
475* Compute space needed for DORGLQ
476 CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1,
477 $ ierr )
478 lwork_dorglq_n = int( dum(1) )
479 CALL dorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480 lwork_dorglq_m = int( dum(1) )
481* Compute space needed for DGEBRD
482 CALL dgebrd( m, m, a, lda, s, dum(1), dum(1),
483 $ dum(1), dum(1), -1, ierr )
484 lwork_dgebrd = int( dum(1) )
485* Compute space needed for DORGBR P
486 CALL dorgbr( 'P', m, m, m, a, n, dum(1),
487 $ dum(1), -1, ierr )
488 lwork_dorgbr_p = int( dum(1) )
489* Compute space needed for DORGBR Q
490 CALL dorgbr( 'Q', m, m, m, a, n, dum(1),
491 $ dum(1), -1, ierr )
492 lwork_dorgbr_q = int( dum(1) )
493 IF( n.GE.mnthr ) THEN
494 IF( wntvn ) THEN
495*
496* Path 1t(N much larger than M, JOBVT='N')
497*
498 maxwrk = m + lwork_dgelqf
499 maxwrk = max( maxwrk, 3*m + lwork_dgebrd )
500 IF( wntuo .OR. wntuas )
501 $ maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
502 maxwrk = max( maxwrk, bdspac )
503 minwrk = max( 4*m, bdspac )
504 ELSE IF( wntvo .AND. wntun ) THEN
505*
506* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507*
508 wrkbl = m + lwork_dgelqf
509 wrkbl = max( wrkbl, m + lwork_dorglq_m )
510 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
511 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
512 wrkbl = max( wrkbl, bdspac )
513 maxwrk = max( m*m + wrkbl, m*m + m*n + m )
514 minwrk = max( 3*m + n, bdspac )
515 ELSE IF( wntvo .AND. wntuas ) THEN
516*
517* Path 3t(N much larger than M, JOBU='S' or 'A',
518* JOBVT='O')
519*
520 wrkbl = m + lwork_dgelqf
521 wrkbl = max( wrkbl, m + lwork_dorglq_m )
522 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
523 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
524 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
525 wrkbl = max( wrkbl, bdspac )
526 maxwrk = max( m*m + wrkbl, m*m + m*n + m )
527 minwrk = max( 3*m + n, bdspac )
528 ELSE IF( wntvs .AND. wntun ) THEN
529*
530* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531*
532 wrkbl = m + lwork_dgelqf
533 wrkbl = max( wrkbl, m + lwork_dorglq_m )
534 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
535 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
536 wrkbl = max( wrkbl, bdspac )
537 maxwrk = m*m + wrkbl
538 minwrk = max( 3*m + n, bdspac )
539 ELSE IF( wntvs .AND. wntuo ) THEN
540*
541* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542*
543 wrkbl = m + lwork_dgelqf
544 wrkbl = max( wrkbl, m + lwork_dorglq_m )
545 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
546 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
547 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
548 wrkbl = max( wrkbl, bdspac )
549 maxwrk = 2*m*m + wrkbl
550 minwrk = max( 3*m + n, bdspac )
551 ELSE IF( wntvs .AND. wntuas ) THEN
552*
553* Path 6t(N much larger than M, JOBU='S' or 'A',
554* JOBVT='S')
555*
556 wrkbl = m + lwork_dgelqf
557 wrkbl = max( wrkbl, m + lwork_dorglq_m )
558 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
559 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
560 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
561 wrkbl = max( wrkbl, bdspac )
562 maxwrk = m*m + wrkbl
563 minwrk = max( 3*m + n, bdspac )
564 ELSE IF( wntva .AND. wntun ) THEN
565*
566* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
567*
568 wrkbl = m + lwork_dgelqf
569 wrkbl = max( wrkbl, m + lwork_dorglq_n )
570 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
571 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
572 wrkbl = max( wrkbl, bdspac )
573 maxwrk = m*m + wrkbl
574 minwrk = max( 3*m + n, bdspac )
575 ELSE IF( wntva .AND. wntuo ) THEN
576*
577* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
578*
579 wrkbl = m + lwork_dgelqf
580 wrkbl = max( wrkbl, m + lwork_dorglq_n )
581 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
582 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
583 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
584 wrkbl = max( wrkbl, bdspac )
585 maxwrk = 2*m*m + wrkbl
586 minwrk = max( 3*m + n, bdspac )
587 ELSE IF( wntva .AND. wntuas ) THEN
588*
589* Path 9t(N much larger than M, JOBU='S' or 'A',
590* JOBVT='A')
591*
592 wrkbl = m + lwork_dgelqf
593 wrkbl = max( wrkbl, m + lwork_dorglq_n )
594 wrkbl = max( wrkbl, 3*m + lwork_dgebrd )
595 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_p )
596 wrkbl = max( wrkbl, 3*m + lwork_dorgbr_q )
597 wrkbl = max( wrkbl, bdspac )
598 maxwrk = m*m + wrkbl
599 minwrk = max( 3*m + n, bdspac )
600 END IF
601 ELSE
602*
603* Path 10t(N greater than M, but not much larger)
604*
605 CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
606 $ dum(1), dum(1), -1, ierr )
607 lwork_dgebrd = int( dum(1) )
608 maxwrk = 3*m + lwork_dgebrd
609 IF( wntvs .OR. wntvo ) THEN
610* Compute space needed for DORGBR P
611 CALL dorgbr( 'P', m, n, m, a, n, dum(1),
612 $ dum(1), -1, ierr )
613 lwork_dorgbr_p = int( dum(1) )
614 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
615 END IF
616 IF( wntva ) THEN
617 CALL dorgbr( 'P', n, n, m, a, n, dum(1),
618 $ dum(1), -1, ierr )
619 lwork_dorgbr_p = int( dum(1) )
620 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_p )
621 END IF
622 IF( .NOT.wntun ) THEN
623 maxwrk = max( maxwrk, 3*m + lwork_dorgbr_q )
624 END IF
625 maxwrk = max( maxwrk, bdspac )
626 minwrk = max( 3*m + n, bdspac )
627 END IF
628 END IF
629 maxwrk = max( maxwrk, minwrk )
630 work( 1 ) = maxwrk
631*
632 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
633 info = -13
634 END IF
635 END IF
636*
637 IF( info.NE.0 ) THEN
638 CALL xerbla( 'DGESVD', -info )
639 RETURN
640 ELSE IF( lquery ) THEN
641 RETURN
642 END IF
643*
644* Quick return if possible
645*
646 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
647 RETURN
648 END IF
649*
650* Get machine constants
651*
652 eps = dlamch( 'P' )
653 smlnum = sqrt( dlamch( 'S' ) ) / eps
654 bignum = one / smlnum
655*
656* Scale A if max element outside range [SMLNUM,BIGNUM]
657*
658 anrm = dlange( 'M', m, n, a, lda, dum )
659 iscl = 0
660 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
661 iscl = 1
662 CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
663 ELSE IF( anrm.GT.bignum ) THEN
664 iscl = 1
665 CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
666 END IF
667*
668 IF( m.GE.n ) THEN
669*
670* A has at least as many rows as columns. If A has sufficiently
671* more rows than columns, first reduce using the QR
672* decomposition (if sufficient workspace available)
673*
674 IF( m.GE.mnthr ) THEN
675*
676 IF( wntun ) THEN
677*
678* Path 1 (M much larger than N, JOBU='N')
679* No left singular vectors to be computed
680*
681 itau = 1
682 iwork = itau + n
683*
684* Compute A=Q*R
685* (Workspace: need 2*N, prefer N + N*NB)
686*
687 CALL dgeqrf( m, n, a, lda, work( itau ),
688 $ work( iwork ),
689 $ lwork-iwork+1, ierr )
690*
691* Zero out below R
692*
693 IF( n .GT. 1 ) THEN
694 CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
695 $ lda )
696 END IF
697 ie = 1
698 itauq = ie + n
699 itaup = itauq + n
700 iwork = itaup + n
701*
702* Bidiagonalize R in A
703* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
704*
705 CALL dgebrd( n, n, a, lda, s, work( ie ),
706 $ work( itauq ),
707 $ work( itaup ), work( iwork ), lwork-iwork+1,
708 $ ierr )
709 ncvt = 0
710 IF( wntvo .OR. wntvas ) THEN
711*
712* If right singular vectors desired, generate P'.
713* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
714*
715 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
716 $ work( iwork ), lwork-iwork+1, ierr )
717 ncvt = n
718 END IF
719 iwork = ie + n
720*
721* Perform bidiagonal QR iteration, computing right
722* singular vectors of A in A if desired
723* (Workspace: need BDSPAC)
724*
725 CALL dbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a,
726 $ lda,
727 $ dum, 1, dum, 1, work( iwork ), info )
728*
729* If right singular vectors desired in VT, copy them there
730*
731 IF( wntvas )
732 $ CALL dlacpy( 'F', n, n, a, lda, vt, ldvt )
733*
734 ELSE IF( wntuo .AND. wntvn ) THEN
735*
736* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
737* N left singular vectors to be overwritten on A and
738* no right singular vectors to be computed
739*
740 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
741*
742* Sufficient workspace for a fast algorithm
743*
744 ir = 1
745 IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
746*
747* WORK(IU) is LDA by N, WORK(IR) is LDA by N
748*
749 ldwrku = lda
750 ldwrkr = lda
751 ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
752*
753* WORK(IU) is LDA by N, WORK(IR) is N by N
754*
755 ldwrku = lda
756 ldwrkr = n
757 ELSE
758*
759* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
760*
761 ldwrku = ( lwork-n*n-n ) / n
762 ldwrkr = n
763 END IF
764 itau = ir + ldwrkr*n
765 iwork = itau + n
766*
767* Compute A=Q*R
768* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
769*
770 CALL dgeqrf( m, n, a, lda, work( itau ),
771 $ work( iwork ), lwork-iwork+1, ierr )
772*
773* Copy R to WORK(IR) and zero out below it
774*
775 CALL dlacpy( 'U', n, n, a, lda, work( ir ),
776 $ ldwrkr )
777 CALL dlaset( 'L', n-1, n-1, zero, zero,
778 $ work( ir+1 ),
779 $ ldwrkr )
780*
781* Generate Q in A
782* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
783*
784 CALL dorgqr( m, n, n, a, lda, work( itau ),
785 $ work( iwork ), lwork-iwork+1, ierr )
786 ie = itau
787 itauq = ie + n
788 itaup = itauq + n
789 iwork = itaup + n
790*
791* Bidiagonalize R in WORK(IR)
792* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
793*
794 CALL dgebrd( n, n, work( ir ), ldwrkr, s,
795 $ work( ie ),
796 $ work( itauq ), work( itaup ),
797 $ work( iwork ), lwork-iwork+1, ierr )
798*
799* Generate left vectors bidiagonalizing R
800* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
801*
802 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
803 $ work( itauq ), work( iwork ),
804 $ lwork-iwork+1, ierr )
805 iwork = ie + n
806*
807* Perform bidiagonal QR iteration, computing left
808* singular vectors of R in WORK(IR)
809* (Workspace: need N*N + BDSPAC)
810*
811 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
812 $ 1,
813 $ work( ir ), ldwrkr, dum, 1,
814 $ work( iwork ), info )
815 iu = ie + n
816*
817* Multiply Q in A by left singular vectors of R in
818* WORK(IR), storing result in WORK(IU) and copying to A
819* (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
820*
821 DO 10 i = 1, m, ldwrku
822 chunk = min( m-i+1, ldwrku )
823 CALL dgemm( 'N', 'N', chunk, n, n, one, a( i,
824 $ 1 ),
825 $ lda, work( ir ), ldwrkr, zero,
826 $ work( iu ), ldwrku )
827 CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
828 $ a( i, 1 ), lda )
829 10 CONTINUE
830*
831 ELSE
832*
833* Insufficient workspace for a fast algorithm
834*
835 ie = 1
836 itauq = ie + n
837 itaup = itauq + n
838 iwork = itaup + n
839*
840* Bidiagonalize A
841* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
842*
843 CALL dgebrd( m, n, a, lda, s, work( ie ),
844 $ work( itauq ), work( itaup ),
845 $ work( iwork ), lwork-iwork+1, ierr )
846*
847* Generate left vectors bidiagonalizing A
848* (Workspace: need 4*N, prefer 3*N + N*NB)
849*
850 CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
851 $ work( iwork ), lwork-iwork+1, ierr )
852 iwork = ie + n
853*
854* Perform bidiagonal QR iteration, computing left
855* singular vectors of A in A
856* (Workspace: need BDSPAC)
857*
858 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
859 $ 1,
860 $ a, lda, dum, 1, work( iwork ), info )
861*
862 END IF
863*
864 ELSE IF( wntuo .AND. wntvas ) THEN
865*
866* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
867* N left singular vectors to be overwritten on A and
868* N right singular vectors to be computed in VT
869*
870 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
871*
872* Sufficient workspace for a fast algorithm
873*
874 ir = 1
875 IF( lwork.GE.max( wrkbl, lda*n + n ) + lda*n ) THEN
876*
877* WORK(IU) is LDA by N and WORK(IR) is LDA by N
878*
879 ldwrku = lda
880 ldwrkr = lda
881 ELSE IF( lwork.GE.max( wrkbl, lda*n + n ) + n*n ) THEN
882*
883* WORK(IU) is LDA by N and WORK(IR) is N by N
884*
885 ldwrku = lda
886 ldwrkr = n
887 ELSE
888*
889* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
890*
891 ldwrku = ( lwork-n*n-n ) / n
892 ldwrkr = n
893 END IF
894 itau = ir + ldwrkr*n
895 iwork = itau + n
896*
897* Compute A=Q*R
898* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
899*
900 CALL dgeqrf( m, n, a, lda, work( itau ),
901 $ work( iwork ), lwork-iwork+1, ierr )
902*
903* Copy R to VT, zeroing out below it
904*
905 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
906 IF( n.GT.1 )
907 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
908 $ vt( 2, 1 ), ldvt )
909*
910* Generate Q in A
911* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
912*
913 CALL dorgqr( m, n, n, a, lda, work( itau ),
914 $ work( iwork ), lwork-iwork+1, ierr )
915 ie = itau
916 itauq = ie + n
917 itaup = itauq + n
918 iwork = itaup + n
919*
920* Bidiagonalize R in VT, copying result to WORK(IR)
921* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
922*
923 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
924 $ work( itauq ), work( itaup ),
925 $ work( iwork ), lwork-iwork+1, ierr )
926 CALL dlacpy( 'L', n, n, vt, ldvt, work( ir ),
927 $ ldwrkr )
928*
929* Generate left vectors bidiagonalizing R in WORK(IR)
930* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
931*
932 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
933 $ work( itauq ), work( iwork ),
934 $ lwork-iwork+1, ierr )
935*
936* Generate right vectors bidiagonalizing R in VT
937* (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB)
938*
939 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
940 $ work( iwork ), lwork-iwork+1, ierr )
941 iwork = ie + n
942*
943* Perform bidiagonal QR iteration, computing left
944* singular vectors of R in WORK(IR) and computing right
945* singular vectors of R in VT
946* (Workspace: need N*N + BDSPAC)
947*
948 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
949 $ ldvt,
950 $ work( ir ), ldwrkr, dum, 1,
951 $ work( iwork ), info )
952 iu = ie + n
953*
954* Multiply Q in A by left singular vectors of R in
955* WORK(IR), storing result in WORK(IU) and copying to A
956* (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
957*
958 DO 20 i = 1, m, ldwrku
959 chunk = min( m-i+1, ldwrku )
960 CALL dgemm( 'N', 'N', chunk, n, n, one, a( i,
961 $ 1 ),
962 $ lda, work( ir ), ldwrkr, zero,
963 $ work( iu ), ldwrku )
964 CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
965 $ a( i, 1 ), lda )
966 20 CONTINUE
967*
968 ELSE
969*
970* Insufficient workspace for a fast algorithm
971*
972 itau = 1
973 iwork = itau + n
974*
975* Compute A=Q*R
976* (Workspace: need 2*N, prefer N + N*NB)
977*
978 CALL dgeqrf( m, n, a, lda, work( itau ),
979 $ work( iwork ), lwork-iwork+1, ierr )
980*
981* Copy R to VT, zeroing out below it
982*
983 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
984 IF( n.GT.1 )
985 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
986 $ vt( 2, 1 ), ldvt )
987*
988* Generate Q in A
989* (Workspace: need 2*N, prefer N + N*NB)
990*
991 CALL dorgqr( m, n, n, a, lda, work( itau ),
992 $ work( iwork ), lwork-iwork+1, ierr )
993 ie = itau
994 itauq = ie + n
995 itaup = itauq + n
996 iwork = itaup + n
997*
998* Bidiagonalize R in VT
999* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1000*
1001 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1002 $ work( itauq ), work( itaup ),
1003 $ work( iwork ), lwork-iwork+1, ierr )
1004*
1005* Multiply Q in A by left vectors bidiagonalizing R
1006* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1007*
1008 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1009 $ work( itauq ), a, lda, work( iwork ),
1010 $ lwork-iwork+1, ierr )
1011*
1012* Generate right vectors bidiagonalizing R in VT
1013* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1014*
1015 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1016 $ work( iwork ), lwork-iwork+1, ierr )
1017 iwork = ie + n
1018*
1019* Perform bidiagonal QR iteration, computing left
1020* singular vectors of A in A and computing right
1021* singular vectors of A in VT
1022* (Workspace: need BDSPAC)
1023*
1024 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1025 $ ldvt,
1026 $ a, lda, dum, 1, work( iwork ), info )
1027*
1028 END IF
1029*
1030 ELSE IF( wntus ) THEN
1031*
1032 IF( wntvn ) THEN
1033*
1034* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1035* N left singular vectors to be computed in U and
1036* no right singular vectors to be computed
1037*
1038 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1039*
1040* Sufficient workspace for a fast algorithm
1041*
1042 ir = 1
1043 IF( lwork.GE.wrkbl+lda*n ) THEN
1044*
1045* WORK(IR) is LDA by N
1046*
1047 ldwrkr = lda
1048 ELSE
1049*
1050* WORK(IR) is N by N
1051*
1052 ldwrkr = n
1053 END IF
1054 itau = ir + ldwrkr*n
1055 iwork = itau + n
1056*
1057* Compute A=Q*R
1058* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1059*
1060 CALL dgeqrf( m, n, a, lda, work( itau ),
1061 $ work( iwork ), lwork-iwork+1, ierr )
1062*
1063* Copy R to WORK(IR), zeroing out below it
1064*
1065 CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1066 $ ldwrkr )
1067 CALL dlaset( 'L', n-1, n-1, zero, zero,
1068 $ work( ir+1 ), ldwrkr )
1069*
1070* Generate Q in A
1071* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1072*
1073 CALL dorgqr( m, n, n, a, lda, work( itau ),
1074 $ work( iwork ), lwork-iwork+1, ierr )
1075 ie = itau
1076 itauq = ie + n
1077 itaup = itauq + n
1078 iwork = itaup + n
1079*
1080* Bidiagonalize R in WORK(IR)
1081* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1082*
1083 CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1084 $ work( ie ), work( itauq ),
1085 $ work( itaup ), work( iwork ),
1086 $ lwork-iwork+1, ierr )
1087*
1088* Generate left vectors bidiagonalizing R in WORK(IR)
1089* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1090*
1091 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1092 $ work( itauq ), work( iwork ),
1093 $ lwork-iwork+1, ierr )
1094 iwork = ie + n
1095*
1096* Perform bidiagonal QR iteration, computing left
1097* singular vectors of R in WORK(IR)
1098* (Workspace: need N*N + BDSPAC)
1099*
1100 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ),
1101 $ dum,
1102 $ 1, work( ir ), ldwrkr, dum, 1,
1103 $ work( iwork ), info )
1104*
1105* Multiply Q in A by left singular vectors of R in
1106* WORK(IR), storing result in U
1107* (Workspace: need N*N)
1108*
1109 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1110 $ work( ir ), ldwrkr, zero, u, ldu )
1111*
1112 ELSE
1113*
1114* Insufficient workspace for a fast algorithm
1115*
1116 itau = 1
1117 iwork = itau + n
1118*
1119* Compute A=Q*R, copying result to U
1120* (Workspace: need 2*N, prefer N + N*NB)
1121*
1122 CALL dgeqrf( m, n, a, lda, work( itau ),
1123 $ work( iwork ), lwork-iwork+1, ierr )
1124 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1125*
1126* Generate Q in U
1127* (Workspace: need 2*N, prefer N + N*NB)
1128*
1129 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1130 $ work( iwork ), lwork-iwork+1, ierr )
1131 ie = itau
1132 itauq = ie + n
1133 itaup = itauq + n
1134 iwork = itaup + n
1135*
1136* Zero out below R in A
1137*
1138 IF( n .GT. 1 ) THEN
1139 CALL dlaset( 'L', n-1, n-1, zero, zero,
1140 $ a( 2, 1 ), lda )
1141 END IF
1142*
1143* Bidiagonalize R in A
1144* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1145*
1146 CALL dgebrd( n, n, a, lda, s, work( ie ),
1147 $ work( itauq ), work( itaup ),
1148 $ work( iwork ), lwork-iwork+1, ierr )
1149*
1150* Multiply Q in U by left vectors bidiagonalizing R
1151* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1152*
1153 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1154 $ work( itauq ), u, ldu, work( iwork ),
1155 $ lwork-iwork+1, ierr )
1156 iwork = ie + n
1157*
1158* Perform bidiagonal QR iteration, computing left
1159* singular vectors of A in U
1160* (Workspace: need BDSPAC)
1161*
1162 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ),
1163 $ dum,
1164 $ 1, u, ldu, dum, 1, work( iwork ),
1165 $ info )
1166*
1167 END IF
1168*
1169 ELSE IF( wntvo ) THEN
1170*
1171* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1172* N left singular vectors to be computed in U and
1173* N right singular vectors to be overwritten on A
1174*
1175 IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1176*
1177* Sufficient workspace for a fast algorithm
1178*
1179 iu = 1
1180 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1181*
1182* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1183*
1184 ldwrku = lda
1185 ir = iu + ldwrku*n
1186 ldwrkr = lda
1187 ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1188*
1189* WORK(IU) is LDA by N and WORK(IR) is N by N
1190*
1191 ldwrku = lda
1192 ir = iu + ldwrku*n
1193 ldwrkr = n
1194 ELSE
1195*
1196* WORK(IU) is N by N and WORK(IR) is N by N
1197*
1198 ldwrku = n
1199 ir = iu + ldwrku*n
1200 ldwrkr = n
1201 END IF
1202 itau = ir + ldwrkr*n
1203 iwork = itau + n
1204*
1205* Compute A=Q*R
1206* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1207*
1208 CALL dgeqrf( m, n, a, lda, work( itau ),
1209 $ work( iwork ), lwork-iwork+1, ierr )
1210*
1211* Copy R to WORK(IU), zeroing out below it
1212*
1213 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1214 $ ldwrku )
1215 CALL dlaset( 'L', n-1, n-1, zero, zero,
1216 $ work( iu+1 ), ldwrku )
1217*
1218* Generate Q in A
1219* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1220*
1221 CALL dorgqr( m, n, n, a, lda, work( itau ),
1222 $ work( iwork ), lwork-iwork+1, ierr )
1223 ie = itau
1224 itauq = ie + n
1225 itaup = itauq + n
1226 iwork = itaup + n
1227*
1228* Bidiagonalize R in WORK(IU), copying result to
1229* WORK(IR)
1230* (Workspace: need 2*N*N + 4*N,
1231* prefer 2*N*N+3*N+2*N*NB)
1232*
1233 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1234 $ work( ie ), work( itauq ),
1235 $ work( itaup ), work( iwork ),
1236 $ lwork-iwork+1, ierr )
1237 CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1238 $ work( ir ), ldwrkr )
1239*
1240* Generate left bidiagonalizing vectors in WORK(IU)
1241* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1242*
1243 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1244 $ work( itauq ), work( iwork ),
1245 $ lwork-iwork+1, ierr )
1246*
1247* Generate right bidiagonalizing vectors in WORK(IR)
1248* (Workspace: need 2*N*N + 4*N-1,
1249* prefer 2*N*N+3*N+(N-1)*NB)
1250*
1251 CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1252 $ work( itaup ), work( iwork ),
1253 $ lwork-iwork+1, ierr )
1254 iwork = ie + n
1255*
1256* Perform bidiagonal QR iteration, computing left
1257* singular vectors of R in WORK(IU) and computing
1258* right singular vectors of R in WORK(IR)
1259* (Workspace: need 2*N*N + BDSPAC)
1260*
1261 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1262 $ work( ir ), ldwrkr, work( iu ),
1263 $ ldwrku, dum, 1, work( iwork ), info )
1264*
1265* Multiply Q in A by left singular vectors of R in
1266* WORK(IU), storing result in U
1267* (Workspace: need N*N)
1268*
1269 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1270 $ work( iu ), ldwrku, zero, u, ldu )
1271*
1272* Copy right singular vectors of R to A
1273* (Workspace: need N*N)
1274*
1275 CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1276 $ lda )
1277*
1278 ELSE
1279*
1280* Insufficient workspace for a fast algorithm
1281*
1282 itau = 1
1283 iwork = itau + n
1284*
1285* Compute A=Q*R, copying result to U
1286* (Workspace: need 2*N, prefer N + N*NB)
1287*
1288 CALL dgeqrf( m, n, a, lda, work( itau ),
1289 $ work( iwork ), lwork-iwork+1, ierr )
1290 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1291*
1292* Generate Q in U
1293* (Workspace: need 2*N, prefer N + N*NB)
1294*
1295 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1296 $ work( iwork ), lwork-iwork+1, ierr )
1297 ie = itau
1298 itauq = ie + n
1299 itaup = itauq + n
1300 iwork = itaup + n
1301*
1302* Zero out below R in A
1303*
1304 IF( n .GT. 1 ) THEN
1305 CALL dlaset( 'L', n-1, n-1, zero, zero,
1306 $ a( 2, 1 ), lda )
1307 END IF
1308*
1309* Bidiagonalize R in A
1310* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1311*
1312 CALL dgebrd( n, n, a, lda, s, work( ie ),
1313 $ work( itauq ), work( itaup ),
1314 $ work( iwork ), lwork-iwork+1, ierr )
1315*
1316* Multiply Q in U by left vectors bidiagonalizing R
1317* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1318*
1319 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1320 $ work( itauq ), u, ldu, work( iwork ),
1321 $ lwork-iwork+1, ierr )
1322*
1323* Generate right vectors bidiagonalizing R in A
1324* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1325*
1326 CALL dorgbr( 'P', n, n, n, a, lda,
1327 $ work( itaup ),
1328 $ work( iwork ), lwork-iwork+1, ierr )
1329 iwork = ie + n
1330*
1331* Perform bidiagonal QR iteration, computing left
1332* singular vectors of A in U and computing right
1333* singular vectors of A in A
1334* (Workspace: need BDSPAC)
1335*
1336 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1337 $ lda, u, ldu, dum, 1, work( iwork ),
1338 $ info )
1339*
1340 END IF
1341*
1342 ELSE IF( wntvas ) THEN
1343*
1344* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1345* or 'A')
1346* N left singular vectors to be computed in U and
1347* N right singular vectors to be computed in VT
1348*
1349 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1350*
1351* Sufficient workspace for a fast algorithm
1352*
1353 iu = 1
1354 IF( lwork.GE.wrkbl+lda*n ) THEN
1355*
1356* WORK(IU) is LDA by N
1357*
1358 ldwrku = lda
1359 ELSE
1360*
1361* WORK(IU) is N by N
1362*
1363 ldwrku = n
1364 END IF
1365 itau = iu + ldwrku*n
1366 iwork = itau + n
1367*
1368* Compute A=Q*R
1369* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1370*
1371 CALL dgeqrf( m, n, a, lda, work( itau ),
1372 $ work( iwork ), lwork-iwork+1, ierr )
1373*
1374* Copy R to WORK(IU), zeroing out below it
1375*
1376 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1377 $ ldwrku )
1378 CALL dlaset( 'L', n-1, n-1, zero, zero,
1379 $ work( iu+1 ), ldwrku )
1380*
1381* Generate Q in A
1382* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1383*
1384 CALL dorgqr( m, n, n, a, lda, work( itau ),
1385 $ work( iwork ), lwork-iwork+1, ierr )
1386 ie = itau
1387 itauq = ie + n
1388 itaup = itauq + n
1389 iwork = itaup + n
1390*
1391* Bidiagonalize R in WORK(IU), copying result to VT
1392* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1393*
1394 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1395 $ work( ie ), work( itauq ),
1396 $ work( itaup ), work( iwork ),
1397 $ lwork-iwork+1, ierr )
1398 CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1399 $ ldvt )
1400*
1401* Generate left bidiagonalizing vectors in WORK(IU)
1402* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1403*
1404 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1405 $ work( itauq ), work( iwork ),
1406 $ lwork-iwork+1, ierr )
1407*
1408* Generate right bidiagonalizing vectors in VT
1409* (Workspace: need N*N + 4*N-1,
1410* prefer N*N+3*N+(N-1)*NB)
1411*
1412 CALL dorgbr( 'P', n, n, n, vt, ldvt,
1413 $ work( itaup ),
1414 $ work( iwork ), lwork-iwork+1, ierr )
1415 iwork = ie + n
1416*
1417* Perform bidiagonal QR iteration, computing left
1418* singular vectors of R in WORK(IU) and computing
1419* right singular vectors of R in VT
1420* (Workspace: need N*N + BDSPAC)
1421*
1422 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1423 $ ldvt, work( iu ), ldwrku, dum, 1,
1424 $ work( iwork ), info )
1425*
1426* Multiply Q in A by left singular vectors of R in
1427* WORK(IU), storing result in U
1428* (Workspace: need N*N)
1429*
1430 CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1431 $ work( iu ), ldwrku, zero, u, ldu )
1432*
1433 ELSE
1434*
1435* Insufficient workspace for a fast algorithm
1436*
1437 itau = 1
1438 iwork = itau + n
1439*
1440* Compute A=Q*R, copying result to U
1441* (Workspace: need 2*N, prefer N + N*NB)
1442*
1443 CALL dgeqrf( m, n, a, lda, work( itau ),
1444 $ work( iwork ), lwork-iwork+1, ierr )
1445 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1446*
1447* Generate Q in U
1448* (Workspace: need 2*N, prefer N + N*NB)
1449*
1450 CALL dorgqr( m, n, n, u, ldu, work( itau ),
1451 $ work( iwork ), lwork-iwork+1, ierr )
1452*
1453* Copy R to VT, zeroing out below it
1454*
1455 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1456 IF( n.GT.1 )
1457 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1458 $ vt( 2, 1 ), ldvt )
1459 ie = itau
1460 itauq = ie + n
1461 itaup = itauq + n
1462 iwork = itaup + n
1463*
1464* Bidiagonalize R in VT
1465* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1466*
1467 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1468 $ work( itauq ), work( itaup ),
1469 $ work( iwork ), lwork-iwork+1, ierr )
1470*
1471* Multiply Q in U by left bidiagonalizing vectors
1472* in VT
1473* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1474*
1475 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1476 $ work( itauq ), u, ldu, work( iwork ),
1477 $ lwork-iwork+1, ierr )
1478*
1479* Generate right bidiagonalizing vectors in VT
1480* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1481*
1482 CALL dorgbr( 'P', n, n, n, vt, ldvt,
1483 $ work( itaup ),
1484 $ work( iwork ), lwork-iwork+1, ierr )
1485 iwork = ie + n
1486*
1487* Perform bidiagonal QR iteration, computing left
1488* singular vectors of A in U and computing right
1489* singular vectors of A in VT
1490* (Workspace: need BDSPAC)
1491*
1492 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1493 $ ldvt, u, ldu, dum, 1, work( iwork ),
1494 $ info )
1495*
1496 END IF
1497*
1498 END IF
1499*
1500 ELSE IF( wntua ) THEN
1501*
1502 IF( wntvn ) THEN
1503*
1504* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1505* M left singular vectors to be computed in U and
1506* no right singular vectors to be computed
1507*
1508 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1509*
1510* Sufficient workspace for a fast algorithm
1511*
1512 ir = 1
1513 IF( lwork.GE.wrkbl+lda*n ) THEN
1514*
1515* WORK(IR) is LDA by N
1516*
1517 ldwrkr = lda
1518 ELSE
1519*
1520* WORK(IR) is N by N
1521*
1522 ldwrkr = n
1523 END IF
1524 itau = ir + ldwrkr*n
1525 iwork = itau + n
1526*
1527* Compute A=Q*R, copying result to U
1528* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1529*
1530 CALL dgeqrf( m, n, a, lda, work( itau ),
1531 $ work( iwork ), lwork-iwork+1, ierr )
1532 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1533*
1534* Copy R to WORK(IR), zeroing out below it
1535*
1536 CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1537 $ ldwrkr )
1538 CALL dlaset( 'L', n-1, n-1, zero, zero,
1539 $ work( ir+1 ), ldwrkr )
1540*
1541* Generate Q in U
1542* (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1543*
1544 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1545 $ work( iwork ), lwork-iwork+1, ierr )
1546 ie = itau
1547 itauq = ie + n
1548 itaup = itauq + n
1549 iwork = itaup + n
1550*
1551* Bidiagonalize R in WORK(IR)
1552* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1553*
1554 CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1555 $ work( ie ), work( itauq ),
1556 $ work( itaup ), work( iwork ),
1557 $ lwork-iwork+1, ierr )
1558*
1559* Generate left bidiagonalizing vectors in WORK(IR)
1560* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1561*
1562 CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1563 $ work( itauq ), work( iwork ),
1564 $ lwork-iwork+1, ierr )
1565 iwork = ie + n
1566*
1567* Perform bidiagonal QR iteration, computing left
1568* singular vectors of R in WORK(IR)
1569* (Workspace: need N*N + BDSPAC)
1570*
1571 CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ),
1572 $ dum,
1573 $ 1, work( ir ), ldwrkr, dum, 1,
1574 $ work( iwork ), info )
1575*
1576* Multiply Q in U by left singular vectors of R in
1577* WORK(IR), storing result in A
1578* (Workspace: need N*N)
1579*
1580 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1581 $ work( ir ), ldwrkr, zero, a, lda )
1582*
1583* Copy left singular vectors of A from A to U
1584*
1585 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1586*
1587 ELSE
1588*
1589* Insufficient workspace for a fast algorithm
1590*
1591 itau = 1
1592 iwork = itau + n
1593*
1594* Compute A=Q*R, copying result to U
1595* (Workspace: need 2*N, prefer N + N*NB)
1596*
1597 CALL dgeqrf( m, n, a, lda, work( itau ),
1598 $ work( iwork ), lwork-iwork+1, ierr )
1599 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1600*
1601* Generate Q in U
1602* (Workspace: need N + M, prefer N + M*NB)
1603*
1604 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1605 $ work( iwork ), lwork-iwork+1, ierr )
1606 ie = itau
1607 itauq = ie + n
1608 itaup = itauq + n
1609 iwork = itaup + n
1610*
1611* Zero out below R in A
1612*
1613 IF( n .GT. 1 ) THEN
1614 CALL dlaset( 'L', n-1, n-1, zero, zero,
1615 $ a( 2, 1 ), lda )
1616 END IF
1617*
1618* Bidiagonalize R in A
1619* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1620*
1621 CALL dgebrd( n, n, a, lda, s, work( ie ),
1622 $ work( itauq ), work( itaup ),
1623 $ work( iwork ), lwork-iwork+1, ierr )
1624*
1625* Multiply Q in U by left bidiagonalizing vectors
1626* in A
1627* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1628*
1629 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1630 $ work( itauq ), u, ldu, work( iwork ),
1631 $ lwork-iwork+1, ierr )
1632 iwork = ie + n
1633*
1634* Perform bidiagonal QR iteration, computing left
1635* singular vectors of A in U
1636* (Workspace: need BDSPAC)
1637*
1638 CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ),
1639 $ dum,
1640 $ 1, u, ldu, dum, 1, work( iwork ),
1641 $ info )
1642*
1643 END IF
1644*
1645 ELSE IF( wntvo ) THEN
1646*
1647* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1648* M left singular vectors to be computed in U and
1649* N right singular vectors to be overwritten on A
1650*
1651 IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1652*
1653* Sufficient workspace for a fast algorithm
1654*
1655 iu = 1
1656 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1657*
1658* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1659*
1660 ldwrku = lda
1661 ir = iu + ldwrku*n
1662 ldwrkr = lda
1663 ELSE IF( lwork.GE.wrkbl+( lda + n )*n ) THEN
1664*
1665* WORK(IU) is LDA by N and WORK(IR) is N by N
1666*
1667 ldwrku = lda
1668 ir = iu + ldwrku*n
1669 ldwrkr = n
1670 ELSE
1671*
1672* WORK(IU) is N by N and WORK(IR) is N by N
1673*
1674 ldwrku = n
1675 ir = iu + ldwrku*n
1676 ldwrkr = n
1677 END IF
1678 itau = ir + ldwrkr*n
1679 iwork = itau + n
1680*
1681* Compute A=Q*R, copying result to U
1682* (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
1683*
1684 CALL dgeqrf( m, n, a, lda, work( itau ),
1685 $ work( iwork ), lwork-iwork+1, ierr )
1686 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1687*
1688* Generate Q in U
1689* (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB)
1690*
1691 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1692 $ work( iwork ), lwork-iwork+1, ierr )
1693*
1694* Copy R to WORK(IU), zeroing out below it
1695*
1696 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1697 $ ldwrku )
1698 CALL dlaset( 'L', n-1, n-1, zero, zero,
1699 $ work( iu+1 ), ldwrku )
1700 ie = itau
1701 itauq = ie + n
1702 itaup = itauq + n
1703 iwork = itaup + n
1704*
1705* Bidiagonalize R in WORK(IU), copying result to
1706* WORK(IR)
1707* (Workspace: need 2*N*N + 4*N,
1708* prefer 2*N*N+3*N+2*N*NB)
1709*
1710 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1711 $ work( ie ), work( itauq ),
1712 $ work( itaup ), work( iwork ),
1713 $ lwork-iwork+1, ierr )
1714 CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1715 $ work( ir ), ldwrkr )
1716*
1717* Generate left bidiagonalizing vectors in WORK(IU)
1718* (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
1719*
1720 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1721 $ work( itauq ), work( iwork ),
1722 $ lwork-iwork+1, ierr )
1723*
1724* Generate right bidiagonalizing vectors in WORK(IR)
1725* (Workspace: need 2*N*N + 4*N-1,
1726* prefer 2*N*N+3*N+(N-1)*NB)
1727*
1728 CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1729 $ work( itaup ), work( iwork ),
1730 $ lwork-iwork+1, ierr )
1731 iwork = ie + n
1732*
1733* Perform bidiagonal QR iteration, computing left
1734* singular vectors of R in WORK(IU) and computing
1735* right singular vectors of R in WORK(IR)
1736* (Workspace: need 2*N*N + BDSPAC)
1737*
1738 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1739 $ work( ir ), ldwrkr, work( iu ),
1740 $ ldwrku, dum, 1, work( iwork ), info )
1741*
1742* Multiply Q in U by left singular vectors of R in
1743* WORK(IU), storing result in A
1744* (Workspace: need N*N)
1745*
1746 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1747 $ work( iu ), ldwrku, zero, a, lda )
1748*
1749* Copy left singular vectors of A from A to U
1750*
1751 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1752*
1753* Copy right singular vectors of R from WORK(IR) to A
1754*
1755 CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1756 $ lda )
1757*
1758 ELSE
1759*
1760* Insufficient workspace for a fast algorithm
1761*
1762 itau = 1
1763 iwork = itau + n
1764*
1765* Compute A=Q*R, copying result to U
1766* (Workspace: need 2*N, prefer N + N*NB)
1767*
1768 CALL dgeqrf( m, n, a, lda, work( itau ),
1769 $ work( iwork ), lwork-iwork+1, ierr )
1770 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1771*
1772* Generate Q in U
1773* (Workspace: need N + M, prefer N + M*NB)
1774*
1775 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1776 $ work( iwork ), lwork-iwork+1, ierr )
1777 ie = itau
1778 itauq = ie + n
1779 itaup = itauq + n
1780 iwork = itaup + n
1781*
1782* Zero out below R in A
1783*
1784 IF( n .GT. 1 ) THEN
1785 CALL dlaset( 'L', n-1, n-1, zero, zero,
1786 $ a( 2, 1 ), lda )
1787 END IF
1788*
1789* Bidiagonalize R in A
1790* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1791*
1792 CALL dgebrd( n, n, a, lda, s, work( ie ),
1793 $ work( itauq ), work( itaup ),
1794 $ work( iwork ), lwork-iwork+1, ierr )
1795*
1796* Multiply Q in U by left bidiagonalizing vectors
1797* in A
1798* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1799*
1800 CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1801 $ work( itauq ), u, ldu, work( iwork ),
1802 $ lwork-iwork+1, ierr )
1803*
1804* Generate right bidiagonalizing vectors in A
1805* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1806*
1807 CALL dorgbr( 'P', n, n, n, a, lda,
1808 $ work( itaup ),
1809 $ work( iwork ), lwork-iwork+1, ierr )
1810 iwork = ie + n
1811*
1812* Perform bidiagonal QR iteration, computing left
1813* singular vectors of A in U and computing right
1814* singular vectors of A in A
1815* (Workspace: need BDSPAC)
1816*
1817 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1818 $ lda, u, ldu, dum, 1, work( iwork ),
1819 $ info )
1820*
1821 END IF
1822*
1823 ELSE IF( wntvas ) THEN
1824*
1825* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1826* or 'A')
1827* M left singular vectors to be computed in U and
1828* N right singular vectors to be computed in VT
1829*
1830 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1831*
1832* Sufficient workspace for a fast algorithm
1833*
1834 iu = 1
1835 IF( lwork.GE.wrkbl+lda*n ) THEN
1836*
1837* WORK(IU) is LDA by N
1838*
1839 ldwrku = lda
1840 ELSE
1841*
1842* WORK(IU) is N by N
1843*
1844 ldwrku = n
1845 END IF
1846 itau = iu + ldwrku*n
1847 iwork = itau + n
1848*
1849* Compute A=Q*R, copying result to U
1850* (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
1851*
1852 CALL dgeqrf( m, n, a, lda, work( itau ),
1853 $ work( iwork ), lwork-iwork+1, ierr )
1854 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1855*
1856* Generate Q in U
1857* (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
1858*
1859 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1860 $ work( iwork ), lwork-iwork+1, ierr )
1861*
1862* Copy R to WORK(IU), zeroing out below it
1863*
1864 CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1865 $ ldwrku )
1866 CALL dlaset( 'L', n-1, n-1, zero, zero,
1867 $ work( iu+1 ), ldwrku )
1868 ie = itau
1869 itauq = ie + n
1870 itaup = itauq + n
1871 iwork = itaup + n
1872*
1873* Bidiagonalize R in WORK(IU), copying result to VT
1874* (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
1875*
1876 CALL dgebrd( n, n, work( iu ), ldwrku, s,
1877 $ work( ie ), work( itauq ),
1878 $ work( itaup ), work( iwork ),
1879 $ lwork-iwork+1, ierr )
1880 CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1881 $ ldvt )
1882*
1883* Generate left bidiagonalizing vectors in WORK(IU)
1884* (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
1885*
1886 CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1887 $ work( itauq ), work( iwork ),
1888 $ lwork-iwork+1, ierr )
1889*
1890* Generate right bidiagonalizing vectors in VT
1891* (Workspace: need N*N + 4*N-1,
1892* prefer N*N+3*N+(N-1)*NB)
1893*
1894 CALL dorgbr( 'P', n, n, n, vt, ldvt,
1895 $ work( itaup ),
1896 $ work( iwork ), lwork-iwork+1, ierr )
1897 iwork = ie + n
1898*
1899* Perform bidiagonal QR iteration, computing left
1900* singular vectors of R in WORK(IU) and computing
1901* right singular vectors of R in VT
1902* (Workspace: need N*N + BDSPAC)
1903*
1904 CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1905 $ ldvt, work( iu ), ldwrku, dum, 1,
1906 $ work( iwork ), info )
1907*
1908* Multiply Q in U by left singular vectors of R in
1909* WORK(IU), storing result in A
1910* (Workspace: need N*N)
1911*
1912 CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1913 $ work( iu ), ldwrku, zero, a, lda )
1914*
1915* Copy left singular vectors of A from A to U
1916*
1917 CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1918*
1919 ELSE
1920*
1921* Insufficient workspace for a fast algorithm
1922*
1923 itau = 1
1924 iwork = itau + n
1925*
1926* Compute A=Q*R, copying result to U
1927* (Workspace: need 2*N, prefer N + N*NB)
1928*
1929 CALL dgeqrf( m, n, a, lda, work( itau ),
1930 $ work( iwork ), lwork-iwork+1, ierr )
1931 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1932*
1933* Generate Q in U
1934* (Workspace: need N + M, prefer N + M*NB)
1935*
1936 CALL dorgqr( m, m, n, u, ldu, work( itau ),
1937 $ work( iwork ), lwork-iwork+1, ierr )
1938*
1939* Copy R from A to VT, zeroing out below it
1940*
1941 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1942 IF( n.GT.1 )
1943 $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1944 $ vt( 2, 1 ), ldvt )
1945 ie = itau
1946 itauq = ie + n
1947 itaup = itauq + n
1948 iwork = itaup + n
1949*
1950* Bidiagonalize R in VT
1951* (Workspace: need 4*N, prefer 3*N + 2*N*NB)
1952*
1953 CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1954 $ work( itauq ), work( itaup ),
1955 $ work( iwork ), lwork-iwork+1, ierr )
1956*
1957* Multiply Q in U by left bidiagonalizing vectors
1958* in VT
1959* (Workspace: need 3*N + M, prefer 3*N + M*NB)
1960*
1961 CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1962 $ work( itauq ), u, ldu, work( iwork ),
1963 $ lwork-iwork+1, ierr )
1964*
1965* Generate right bidiagonalizing vectors in VT
1966* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
1967*
1968 CALL dorgbr( 'P', n, n, n, vt, ldvt,
1969 $ work( itaup ),
1970 $ work( iwork ), lwork-iwork+1, ierr )
1971 iwork = ie + n
1972*
1973* Perform bidiagonal QR iteration, computing left
1974* singular vectors of A in U and computing right
1975* singular vectors of A in VT
1976* (Workspace: need BDSPAC)
1977*
1978 CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1979 $ ldvt, u, ldu, dum, 1, work( iwork ),
1980 $ info )
1981*
1982 END IF
1983*
1984 END IF
1985*
1986 END IF
1987*
1988 ELSE
1989*
1990* M .LT. MNTHR
1991*
1992* Path 10 (M at least N, but not much larger)
1993* Reduce to bidiagonal form without QR decomposition
1994*
1995 ie = 1
1996 itauq = ie + n
1997 itaup = itauq + n
1998 iwork = itaup + n
1999*
2000* Bidiagonalize A
2001* (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
2002*
2003 CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
2004 $ work( itaup ), work( iwork ), lwork-iwork+1,
2005 $ ierr )
2006 IF( wntuas ) THEN
2007*
2008* If left singular vectors desired in U, copy result to U
2009* and generate left bidiagonalizing vectors in U
2010* (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB)
2011*
2012 CALL dlacpy( 'L', m, n, a, lda, u, ldu )
2013 IF( wntus )
2014 $ ncu = n
2015 IF( wntua )
2016 $ ncu = m
2017 CALL dorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2018 $ work( iwork ), lwork-iwork+1, ierr )
2019 END IF
2020 IF( wntvas ) THEN
2021*
2022* If right singular vectors desired in VT, copy result to
2023* VT and generate right bidiagonalizing vectors in VT
2024* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2025*
2026 CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
2027 CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2028 $ work( iwork ), lwork-iwork+1, ierr )
2029 END IF
2030 IF( wntuo ) THEN
2031*
2032* If left singular vectors desired in A, generate left
2033* bidiagonalizing vectors in A
2034* (Workspace: need 4*N, prefer 3*N + N*NB)
2035*
2036 CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2037 $ work( iwork ), lwork-iwork+1, ierr )
2038 END IF
2039 IF( wntvo ) THEN
2040*
2041* If right singular vectors desired in A, generate right
2042* bidiagonalizing vectors in A
2043* (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
2044*
2045 CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
2046 $ work( iwork ), lwork-iwork+1, ierr )
2047 END IF
2048 iwork = ie + n
2049 IF( wntuas .OR. wntuo )
2050 $ nru = m
2051 IF( wntun )
2052 $ nru = 0
2053 IF( wntvas .OR. wntvo )
2054 $ ncvt = n
2055 IF( wntvn )
2056 $ ncvt = 0
2057 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2058*
2059* Perform bidiagonal QR iteration, if desired, computing
2060* left singular vectors in U and computing right singular
2061* vectors in VT
2062* (Workspace: need BDSPAC)
2063*
2064 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2065 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2066 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2067*
2068* Perform bidiagonal QR iteration, if desired, computing
2069* left singular vectors in U and computing right singular
2070* vectors in A
2071* (Workspace: need BDSPAC)
2072*
2073 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a,
2074 $ lda,
2075 $ u, ldu, dum, 1, work( iwork ), info )
2076 ELSE
2077*
2078* Perform bidiagonal QR iteration, if desired, computing
2079* left singular vectors in A and computing right singular
2080* vectors in VT
2081* (Workspace: need BDSPAC)
2082*
2083 CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2084 $ ldvt, a, lda, dum, 1, work( iwork ), info )
2085 END IF
2086*
2087 END IF
2088*
2089 ELSE
2090*
2091* A has more columns than rows. If A has sufficiently more
2092* columns than rows, first reduce using the LQ decomposition (if
2093* sufficient workspace available)
2094*
2095 IF( n.GE.mnthr ) THEN
2096*
2097 IF( wntvn ) THEN
2098*
2099* Path 1t(N much larger than M, JOBVT='N')
2100* No right singular vectors to be computed
2101*
2102 itau = 1
2103 iwork = itau + m
2104*
2105* Compute A=L*Q
2106* (Workspace: need 2*M, prefer M + M*NB)
2107*
2108 CALL dgelqf( m, n, a, lda, work( itau ),
2109 $ work( iwork ),
2110 $ lwork-iwork+1, ierr )
2111*
2112* Zero out above L
2113*
2114 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2115 $ lda )
2116 ie = 1
2117 itauq = ie + m
2118 itaup = itauq + m
2119 iwork = itaup + m
2120*
2121* Bidiagonalize L in A
2122* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2123*
2124 CALL dgebrd( m, m, a, lda, s, work( ie ),
2125 $ work( itauq ),
2126 $ work( itaup ), work( iwork ), lwork-iwork+1,
2127 $ ierr )
2128 IF( wntuo .OR. wntuas ) THEN
2129*
2130* If left singular vectors desired, generate Q
2131* (Workspace: need 4*M, prefer 3*M + M*NB)
2132*
2133 CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2134 $ work( iwork ), lwork-iwork+1, ierr )
2135 END IF
2136 iwork = ie + m
2137 nru = 0
2138 IF( wntuo .OR. wntuas )
2139 $ nru = m
2140*
2141* Perform bidiagonal QR iteration, computing left singular
2142* vectors of A in A if desired
2143* (Workspace: need BDSPAC)
2144*
2145 CALL dbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1,
2146 $ a,
2147 $ lda, dum, 1, work( iwork ), info )
2148*
2149* If left singular vectors desired in U, copy them there
2150*
2151 IF( wntuas )
2152 $ CALL dlacpy( 'F', m, m, a, lda, u, ldu )
2153*
2154 ELSE IF( wntvo .AND. wntun ) THEN
2155*
2156* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2157* M right singular vectors to be overwritten on A and
2158* no left singular vectors to be computed
2159*
2160 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2161*
2162* Sufficient workspace for a fast algorithm
2163*
2164 ir = 1
2165 IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2166*
2167* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2168*
2169 ldwrku = lda
2170 chunk = n
2171 ldwrkr = lda
2172 ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2173*
2174* WORK(IU) is LDA by N and WORK(IR) is M by M
2175*
2176 ldwrku = lda
2177 chunk = n
2178 ldwrkr = m
2179 ELSE
2180*
2181* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2182*
2183 ldwrku = m
2184 chunk = ( lwork-m*m-m ) / m
2185 ldwrkr = m
2186 END IF
2187 itau = ir + ldwrkr*m
2188 iwork = itau + m
2189*
2190* Compute A=L*Q
2191* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2192*
2193 CALL dgelqf( m, n, a, lda, work( itau ),
2194 $ work( iwork ), lwork-iwork+1, ierr )
2195*
2196* Copy L to WORK(IR) and zero out above it
2197*
2198 CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2199 $ ldwrkr )
2200 CALL dlaset( 'U', m-1, m-1, zero, zero,
2201 $ work( ir+ldwrkr ), ldwrkr )
2202*
2203* Generate Q in A
2204* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2205*
2206 CALL dorglq( m, n, m, a, lda, work( itau ),
2207 $ work( iwork ), lwork-iwork+1, ierr )
2208 ie = itau
2209 itauq = ie + m
2210 itaup = itauq + m
2211 iwork = itaup + m
2212*
2213* Bidiagonalize L in WORK(IR)
2214* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2215*
2216 CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2217 $ work( ie ),
2218 $ work( itauq ), work( itaup ),
2219 $ work( iwork ), lwork-iwork+1, ierr )
2220*
2221* Generate right vectors bidiagonalizing L
2222* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2223*
2224 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2225 $ work( itaup ), work( iwork ),
2226 $ lwork-iwork+1, ierr )
2227 iwork = ie + m
2228*
2229* Perform bidiagonal QR iteration, computing right
2230* singular vectors of L in WORK(IR)
2231* (Workspace: need M*M + BDSPAC)
2232*
2233 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2234 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2235 $ work( iwork ), info )
2236 iu = ie + m
2237*
2238* Multiply right singular vectors of L in WORK(IR) by Q
2239* in A, storing result in WORK(IU) and copying to A
2240* (Workspace: need M*M + 2*M, prefer M*M + M*N + M)
2241*
2242 DO 30 i = 1, n, chunk
2243 blk = min( n-i+1, chunk )
2244 CALL dgemm( 'N', 'N', m, blk, m, one,
2245 $ work( ir ),
2246 $ ldwrkr, a( 1, i ), lda, zero,
2247 $ work( iu ), ldwrku )
2248 CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2249 $ a( 1, i ), lda )
2250 30 CONTINUE
2251*
2252 ELSE
2253*
2254* Insufficient workspace for a fast algorithm
2255*
2256 ie = 1
2257 itauq = ie + m
2258 itaup = itauq + m
2259 iwork = itaup + m
2260*
2261* Bidiagonalize A
2262* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
2263*
2264 CALL dgebrd( m, n, a, lda, s, work( ie ),
2265 $ work( itauq ), work( itaup ),
2266 $ work( iwork ), lwork-iwork+1, ierr )
2267*
2268* Generate right vectors bidiagonalizing A
2269* (Workspace: need 4*M, prefer 3*M + M*NB)
2270*
2271 CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
2272 $ work( iwork ), lwork-iwork+1, ierr )
2273 iwork = ie + m
2274*
2275* Perform bidiagonal QR iteration, computing right
2276* singular vectors of A in A
2277* (Workspace: need BDSPAC)
2278*
2279 CALL dbdsqr( 'L', m, n, 0, 0, s, work( ie ), a,
2280 $ lda,
2281 $ dum, 1, dum, 1, work( iwork ), info )
2282*
2283 END IF
2284*
2285 ELSE IF( wntvo .AND. wntuas ) THEN
2286*
2287* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2288* M right singular vectors to be overwritten on A and
2289* M left singular vectors to be computed in U
2290*
2291 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2292*
2293* Sufficient workspace for a fast algorithm
2294*
2295 ir = 1
2296 IF( lwork.GE.max( wrkbl, lda*n + m ) + lda*m ) THEN
2297*
2298* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2299*
2300 ldwrku = lda
2301 chunk = n
2302 ldwrkr = lda
2303 ELSE IF( lwork.GE.max( wrkbl, lda*n + m ) + m*m ) THEN
2304*
2305* WORK(IU) is LDA by N and WORK(IR) is M by M
2306*
2307 ldwrku = lda
2308 chunk = n
2309 ldwrkr = m
2310 ELSE
2311*
2312* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2313*
2314 ldwrku = m
2315 chunk = ( lwork-m*m-m ) / m
2316 ldwrkr = m
2317 END IF
2318 itau = ir + ldwrkr*m
2319 iwork = itau + m
2320*
2321* Compute A=L*Q
2322* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2323*
2324 CALL dgelqf( m, n, a, lda, work( itau ),
2325 $ work( iwork ), lwork-iwork+1, ierr )
2326*
2327* Copy L to U, zeroing about above it
2328*
2329 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2330 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2331 $ ldu )
2332*
2333* Generate Q in A
2334* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2335*
2336 CALL dorglq( m, n, m, a, lda, work( itau ),
2337 $ work( iwork ), lwork-iwork+1, ierr )
2338 ie = itau
2339 itauq = ie + m
2340 itaup = itauq + m
2341 iwork = itaup + m
2342*
2343* Bidiagonalize L in U, copying result to WORK(IR)
2344* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2345*
2346 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2347 $ work( itauq ), work( itaup ),
2348 $ work( iwork ), lwork-iwork+1, ierr )
2349 CALL dlacpy( 'U', m, m, u, ldu, work( ir ),
2350 $ ldwrkr )
2351*
2352* Generate right vectors bidiagonalizing L in WORK(IR)
2353* (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
2354*
2355 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2356 $ work( itaup ), work( iwork ),
2357 $ lwork-iwork+1, ierr )
2358*
2359* Generate left vectors bidiagonalizing L in U
2360* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2361*
2362 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2363 $ work( iwork ), lwork-iwork+1, ierr )
2364 iwork = ie + m
2365*
2366* Perform bidiagonal QR iteration, computing left
2367* singular vectors of L in U, and computing right
2368* singular vectors of L in WORK(IR)
2369* (Workspace: need M*M + BDSPAC)
2370*
2371 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2372 $ work( ir ), ldwrkr, u, ldu, dum, 1,
2373 $ work( iwork ), info )
2374 iu = ie + m
2375*
2376* Multiply right singular vectors of L in WORK(IR) by Q
2377* in A, storing result in WORK(IU) and copying to A
2378* (Workspace: need M*M + 2*M, prefer M*M + M*N + M))
2379*
2380 DO 40 i = 1, n, chunk
2381 blk = min( n-i+1, chunk )
2382 CALL dgemm( 'N', 'N', m, blk, m, one,
2383 $ work( ir ),
2384 $ ldwrkr, a( 1, i ), lda, zero,
2385 $ work( iu ), ldwrku )
2386 CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2387 $ a( 1, i ), lda )
2388 40 CONTINUE
2389*
2390 ELSE
2391*
2392* Insufficient workspace for a fast algorithm
2393*
2394 itau = 1
2395 iwork = itau + m
2396*
2397* Compute A=L*Q
2398* (Workspace: need 2*M, prefer M + M*NB)
2399*
2400 CALL dgelqf( m, n, a, lda, work( itau ),
2401 $ work( iwork ), lwork-iwork+1, ierr )
2402*
2403* Copy L to U, zeroing out above it
2404*
2405 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2406 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2407 $ ldu )
2408*
2409* Generate Q in A
2410* (Workspace: need 2*M, prefer M + M*NB)
2411*
2412 CALL dorglq( m, n, m, a, lda, work( itau ),
2413 $ work( iwork ), lwork-iwork+1, ierr )
2414 ie = itau
2415 itauq = ie + m
2416 itaup = itauq + m
2417 iwork = itaup + m
2418*
2419* Bidiagonalize L in U
2420* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2421*
2422 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2423 $ work( itauq ), work( itaup ),
2424 $ work( iwork ), lwork-iwork+1, ierr )
2425*
2426* Multiply right vectors bidiagonalizing L by Q in A
2427* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2428*
2429 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2430 $ work( itaup ), a, lda, work( iwork ),
2431 $ lwork-iwork+1, ierr )
2432*
2433* Generate left vectors bidiagonalizing L in U
2434* (Workspace: need 4*M, prefer 3*M + M*NB)
2435*
2436 CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2437 $ work( iwork ), lwork-iwork+1, ierr )
2438 iwork = ie + m
2439*
2440* Perform bidiagonal QR iteration, computing left
2441* singular vectors of A in U and computing right
2442* singular vectors of A in A
2443* (Workspace: need BDSPAC)
2444*
2445 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), a,
2446 $ lda,
2447 $ u, ldu, dum, 1, work( iwork ), info )
2448*
2449 END IF
2450*
2451 ELSE IF( wntvs ) THEN
2452*
2453 IF( wntun ) THEN
2454*
2455* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2456* M right singular vectors to be computed in VT and
2457* no left singular vectors to be computed
2458*
2459 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2460*
2461* Sufficient workspace for a fast algorithm
2462*
2463 ir = 1
2464 IF( lwork.GE.wrkbl+lda*m ) THEN
2465*
2466* WORK(IR) is LDA by M
2467*
2468 ldwrkr = lda
2469 ELSE
2470*
2471* WORK(IR) is M by M
2472*
2473 ldwrkr = m
2474 END IF
2475 itau = ir + ldwrkr*m
2476 iwork = itau + m
2477*
2478* Compute A=L*Q
2479* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2480*
2481 CALL dgelqf( m, n, a, lda, work( itau ),
2482 $ work( iwork ), lwork-iwork+1, ierr )
2483*
2484* Copy L to WORK(IR), zeroing out above it
2485*
2486 CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2487 $ ldwrkr )
2488 CALL dlaset( 'U', m-1, m-1, zero, zero,
2489 $ work( ir+ldwrkr ), ldwrkr )
2490*
2491* Generate Q in A
2492* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2493*
2494 CALL dorglq( m, n, m, a, lda, work( itau ),
2495 $ work( iwork ), lwork-iwork+1, ierr )
2496 ie = itau
2497 itauq = ie + m
2498 itaup = itauq + m
2499 iwork = itaup + m
2500*
2501* Bidiagonalize L in WORK(IR)
2502* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2503*
2504 CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2505 $ work( ie ), work( itauq ),
2506 $ work( itaup ), work( iwork ),
2507 $ lwork-iwork+1, ierr )
2508*
2509* Generate right vectors bidiagonalizing L in
2510* WORK(IR)
2511* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
2512*
2513 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2514 $ work( itaup ), work( iwork ),
2515 $ lwork-iwork+1, ierr )
2516 iwork = ie + m
2517*
2518* Perform bidiagonal QR iteration, computing right
2519* singular vectors of L in WORK(IR)
2520* (Workspace: need M*M + BDSPAC)
2521*
2522 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2523 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2524 $ work( iwork ), info )
2525*
2526* Multiply right singular vectors of L in WORK(IR) by
2527* Q in A, storing result in VT
2528* (Workspace: need M*M)
2529*
2530 CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2531 $ ldwrkr, a, lda, zero, vt, ldvt )
2532*
2533 ELSE
2534*
2535* Insufficient workspace for a fast algorithm
2536*
2537 itau = 1
2538 iwork = itau + m
2539*
2540* Compute A=L*Q
2541* (Workspace: need 2*M, prefer M + M*NB)
2542*
2543 CALL dgelqf( m, n, a, lda, work( itau ),
2544 $ work( iwork ), lwork-iwork+1, ierr )
2545*
2546* Copy result to VT
2547*
2548 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2549*
2550* Generate Q in VT
2551* (Workspace: need 2*M, prefer M + M*NB)
2552*
2553 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2554 $ work( iwork ), lwork-iwork+1, ierr )
2555 ie = itau
2556 itauq = ie + m
2557 itaup = itauq + m
2558 iwork = itaup + m
2559*
2560* Zero out above L in A
2561*
2562 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1,
2563 $ 2 ),
2564 $ lda )
2565*
2566* Bidiagonalize L in A
2567* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2568*
2569 CALL dgebrd( m, m, a, lda, s, work( ie ),
2570 $ work( itauq ), work( itaup ),
2571 $ work( iwork ), lwork-iwork+1, ierr )
2572*
2573* Multiply right vectors bidiagonalizing L by Q in VT
2574* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2575*
2576 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2577 $ work( itaup ), vt, ldvt,
2578 $ work( iwork ), lwork-iwork+1, ierr )
2579 iwork = ie + m
2580*
2581* Perform bidiagonal QR iteration, computing right
2582* singular vectors of A in VT
2583* (Workspace: need BDSPAC)
2584*
2585 CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2586 $ ldvt, dum, 1, dum, 1, work( iwork ),
2587 $ info )
2588*
2589 END IF
2590*
2591 ELSE IF( wntuo ) THEN
2592*
2593* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2594* M right singular vectors to be computed in VT and
2595* M left singular vectors to be overwritten on A
2596*
2597 IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2598*
2599* Sufficient workspace for a fast algorithm
2600*
2601 iu = 1
2602 IF( lwork.GE.wrkbl+2*lda*m ) THEN
2603*
2604* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2605*
2606 ldwrku = lda
2607 ir = iu + ldwrku*m
2608 ldwrkr = lda
2609 ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
2610*
2611* WORK(IU) is LDA by M and WORK(IR) is M by M
2612*
2613 ldwrku = lda
2614 ir = iu + ldwrku*m
2615 ldwrkr = m
2616 ELSE
2617*
2618* WORK(IU) is M by M and WORK(IR) is M by M
2619*
2620 ldwrku = m
2621 ir = iu + ldwrku*m
2622 ldwrkr = m
2623 END IF
2624 itau = ir + ldwrkr*m
2625 iwork = itau + m
2626*
2627* Compute A=L*Q
2628* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2629*
2630 CALL dgelqf( m, n, a, lda, work( itau ),
2631 $ work( iwork ), lwork-iwork+1, ierr )
2632*
2633* Copy L to WORK(IU), zeroing out below it
2634*
2635 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2636 $ ldwrku )
2637 CALL dlaset( 'U', m-1, m-1, zero, zero,
2638 $ work( iu+ldwrku ), ldwrku )
2639*
2640* Generate Q in A
2641* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
2642*
2643 CALL dorglq( m, n, m, a, lda, work( itau ),
2644 $ work( iwork ), lwork-iwork+1, ierr )
2645 ie = itau
2646 itauq = ie + m
2647 itaup = itauq + m
2648 iwork = itaup + m
2649*
2650* Bidiagonalize L in WORK(IU), copying result to
2651* WORK(IR)
2652* (Workspace: need 2*M*M + 4*M,
2653* prefer 2*M*M+3*M+2*M*NB)
2654*
2655 CALL dgebrd( m, m, work( iu ), ldwrku, s,
2656 $ work( ie ), work( itauq ),
2657 $ work( itaup ), work( iwork ),
2658 $ lwork-iwork+1, ierr )
2659 CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
2660 $ work( ir ), ldwrkr )
2661*
2662* Generate right bidiagonalizing vectors in WORK(IU)
2663* (Workspace: need 2*M*M + 4*M-1,
2664* prefer 2*M*M+3*M+(M-1)*NB)
2665*
2666 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2667 $ work( itaup ), work( iwork ),
2668 $ lwork-iwork+1, ierr )
2669*
2670* Generate left bidiagonalizing vectors in WORK(IR)
2671* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
2672*
2673 CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2674 $ work( itauq ), work( iwork ),
2675 $ lwork-iwork+1, ierr )
2676 iwork = ie + m
2677*
2678* Perform bidiagonal QR iteration, computing left
2679* singular vectors of L in WORK(IR) and computing
2680* right singular vectors of L in WORK(IU)
2681* (Workspace: need 2*M*M + BDSPAC)
2682*
2683 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2684 $ work( iu ), ldwrku, work( ir ),
2685 $ ldwrkr, dum, 1, work( iwork ), info )
2686*
2687* Multiply right singular vectors of L in WORK(IU) by
2688* Q in A, storing result in VT
2689* (Workspace: need M*M)
2690*
2691 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2692 $ ldwrku, a, lda, zero, vt, ldvt )
2693*
2694* Copy left singular vectors of L to A
2695* (Workspace: need M*M)
2696*
2697 CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2698 $ lda )
2699*
2700 ELSE
2701*
2702* Insufficient workspace for a fast algorithm
2703*
2704 itau = 1
2705 iwork = itau + m
2706*
2707* Compute A=L*Q, copying result to VT
2708* (Workspace: need 2*M, prefer M + M*NB)
2709*
2710 CALL dgelqf( m, n, a, lda, work( itau ),
2711 $ work( iwork ), lwork-iwork+1, ierr )
2712 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2713*
2714* Generate Q in VT
2715* (Workspace: need 2*M, prefer M + M*NB)
2716*
2717 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2718 $ work( iwork ), lwork-iwork+1, ierr )
2719 ie = itau
2720 itauq = ie + m
2721 itaup = itauq + m
2722 iwork = itaup + m
2723*
2724* Zero out above L in A
2725*
2726 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1,
2727 $ 2 ),
2728 $ lda )
2729*
2730* Bidiagonalize L in A
2731* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2732*
2733 CALL dgebrd( m, m, a, lda, s, work( ie ),
2734 $ work( itauq ), work( itaup ),
2735 $ work( iwork ), lwork-iwork+1, ierr )
2736*
2737* Multiply right vectors bidiagonalizing L by Q in VT
2738* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2739*
2740 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2741 $ work( itaup ), vt, ldvt,
2742 $ work( iwork ), lwork-iwork+1, ierr )
2743*
2744* Generate left bidiagonalizing vectors of L in A
2745* (Workspace: need 4*M, prefer 3*M + M*NB)
2746*
2747 CALL dorgbr( 'Q', m, m, m, a, lda,
2748 $ work( itauq ),
2749 $ work( iwork ), lwork-iwork+1, ierr )
2750 iwork = ie + m
2751*
2752* Perform bidiagonal QR iteration, compute left
2753* singular vectors of A in A and compute right
2754* singular vectors of A in VT
2755* (Workspace: need BDSPAC)
2756*
2757 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2758 $ ldvt, a, lda, dum, 1, work( iwork ),
2759 $ info )
2760*
2761 END IF
2762*
2763 ELSE IF( wntuas ) THEN
2764*
2765* Path 6t(N much larger than M, JOBU='S' or 'A',
2766* JOBVT='S')
2767* M right singular vectors to be computed in VT and
2768* M left singular vectors to be computed in U
2769*
2770 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2771*
2772* Sufficient workspace for a fast algorithm
2773*
2774 iu = 1
2775 IF( lwork.GE.wrkbl+lda*m ) THEN
2776*
2777* WORK(IU) is LDA by N
2778*
2779 ldwrku = lda
2780 ELSE
2781*
2782* WORK(IU) is LDA by M
2783*
2784 ldwrku = m
2785 END IF
2786 itau = iu + ldwrku*m
2787 iwork = itau + m
2788*
2789* Compute A=L*Q
2790* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2791*
2792 CALL dgelqf( m, n, a, lda, work( itau ),
2793 $ work( iwork ), lwork-iwork+1, ierr )
2794*
2795* Copy L to WORK(IU), zeroing out above it
2796*
2797 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2798 $ ldwrku )
2799 CALL dlaset( 'U', m-1, m-1, zero, zero,
2800 $ work( iu+ldwrku ), ldwrku )
2801*
2802* Generate Q in A
2803* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2804*
2805 CALL dorglq( m, n, m, a, lda, work( itau ),
2806 $ work( iwork ), lwork-iwork+1, ierr )
2807 ie = itau
2808 itauq = ie + m
2809 itaup = itauq + m
2810 iwork = itaup + m
2811*
2812* Bidiagonalize L in WORK(IU), copying result to U
2813* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2814*
2815 CALL dgebrd( m, m, work( iu ), ldwrku, s,
2816 $ work( ie ), work( itauq ),
2817 $ work( itaup ), work( iwork ),
2818 $ lwork-iwork+1, ierr )
2819 CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
2820 $ ldu )
2821*
2822* Generate right bidiagonalizing vectors in WORK(IU)
2823* (Workspace: need M*M + 4*M-1,
2824* prefer M*M+3*M+(M-1)*NB)
2825*
2826 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2827 $ work( itaup ), work( iwork ),
2828 $ lwork-iwork+1, ierr )
2829*
2830* Generate left bidiagonalizing vectors in U
2831* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
2832*
2833 CALL dorgbr( 'Q', m, m, m, u, ldu,
2834 $ work( itauq ),
2835 $ work( iwork ), lwork-iwork+1, ierr )
2836 iwork = ie + m
2837*
2838* Perform bidiagonal QR iteration, computing left
2839* singular vectors of L in U and computing right
2840* singular vectors of L in WORK(IU)
2841* (Workspace: need M*M + BDSPAC)
2842*
2843 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2844 $ work( iu ), ldwrku, u, ldu, dum, 1,
2845 $ work( iwork ), info )
2846*
2847* Multiply right singular vectors of L in WORK(IU) by
2848* Q in A, storing result in VT
2849* (Workspace: need M*M)
2850*
2851 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2852 $ ldwrku, a, lda, zero, vt, ldvt )
2853*
2854 ELSE
2855*
2856* Insufficient workspace for a fast algorithm
2857*
2858 itau = 1
2859 iwork = itau + m
2860*
2861* Compute A=L*Q, copying result to VT
2862* (Workspace: need 2*M, prefer M + M*NB)
2863*
2864 CALL dgelqf( m, n, a, lda, work( itau ),
2865 $ work( iwork ), lwork-iwork+1, ierr )
2866 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2867*
2868* Generate Q in VT
2869* (Workspace: need 2*M, prefer M + M*NB)
2870*
2871 CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2872 $ work( iwork ), lwork-iwork+1, ierr )
2873*
2874* Copy L to U, zeroing out above it
2875*
2876 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2877 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1,
2878 $ 2 ),
2879 $ ldu )
2880 ie = itau
2881 itauq = ie + m
2882 itaup = itauq + m
2883 iwork = itaup + m
2884*
2885* Bidiagonalize L in U
2886* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
2887*
2888 CALL dgebrd( m, m, u, ldu, s, work( ie ),
2889 $ work( itauq ), work( itaup ),
2890 $ work( iwork ), lwork-iwork+1, ierr )
2891*
2892* Multiply right bidiagonalizing vectors in U by Q
2893* in VT
2894* (Workspace: need 3*M + N, prefer 3*M + N*NB)
2895*
2896 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2897 $ work( itaup ), vt, ldvt,
2898 $ work( iwork ), lwork-iwork+1, ierr )
2899*
2900* Generate left bidiagonalizing vectors in U
2901* (Workspace: need 4*M, prefer 3*M + M*NB)
2902*
2903 CALL dorgbr( 'Q', m, m, m, u, ldu,
2904 $ work( itauq ),
2905 $ work( iwork ), lwork-iwork+1, ierr )
2906 iwork = ie + m
2907*
2908* Perform bidiagonal QR iteration, computing left
2909* singular vectors of A in U and computing right
2910* singular vectors of A in VT
2911* (Workspace: need BDSPAC)
2912*
2913 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2914 $ ldvt, u, ldu, dum, 1, work( iwork ),
2915 $ info )
2916*
2917 END IF
2918*
2919 END IF
2920*
2921 ELSE IF( wntva ) THEN
2922*
2923 IF( wntun ) THEN
2924*
2925* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2926* N right singular vectors to be computed in VT and
2927* no left singular vectors to be computed
2928*
2929 IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
2930*
2931* Sufficient workspace for a fast algorithm
2932*
2933 ir = 1
2934 IF( lwork.GE.wrkbl+lda*m ) THEN
2935*
2936* WORK(IR) is LDA by M
2937*
2938 ldwrkr = lda
2939 ELSE
2940*
2941* WORK(IR) is M by M
2942*
2943 ldwrkr = m
2944 END IF
2945 itau = ir + ldwrkr*m
2946 iwork = itau + m
2947*
2948* Compute A=L*Q, copying result to VT
2949* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
2950*
2951 CALL dgelqf( m, n, a, lda, work( itau ),
2952 $ work( iwork ), lwork-iwork+1, ierr )
2953 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2954*
2955* Copy L to WORK(IR), zeroing out above it
2956*
2957 CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2958 $ ldwrkr )
2959 CALL dlaset( 'U', m-1, m-1, zero, zero,
2960 $ work( ir+ldwrkr ), ldwrkr )
2961*
2962* Generate Q in VT
2963* (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
2964*
2965 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2966 $ work( iwork ), lwork-iwork+1, ierr )
2967 ie = itau
2968 itauq = ie + m
2969 itaup = itauq + m
2970 iwork = itaup + m
2971*
2972* Bidiagonalize L in WORK(IR)
2973* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
2974*
2975 CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2976 $ work( ie ), work( itauq ),
2977 $ work( itaup ), work( iwork ),
2978 $ lwork-iwork+1, ierr )
2979*
2980* Generate right bidiagonalizing vectors in WORK(IR)
2981* (Workspace: need M*M + 4*M-1,
2982* prefer M*M+3*M+(M-1)*NB)
2983*
2984 CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2985 $ work( itaup ), work( iwork ),
2986 $ lwork-iwork+1, ierr )
2987 iwork = ie + m
2988*
2989* Perform bidiagonal QR iteration, computing right
2990* singular vectors of L in WORK(IR)
2991* (Workspace: need M*M + BDSPAC)
2992*
2993 CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2994 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2995 $ work( iwork ), info )
2996*
2997* Multiply right singular vectors of L in WORK(IR) by
2998* Q in VT, storing result in A
2999* (Workspace: need M*M)
3000*
3001 CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
3002 $ ldwrkr, vt, ldvt, zero, a, lda )
3003*
3004* Copy right singular vectors of A from A to VT
3005*
3006 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3007*
3008 ELSE
3009*
3010* Insufficient workspace for a fast algorithm
3011*
3012 itau = 1
3013 iwork = itau + m
3014*
3015* Compute A=L*Q, copying result to VT
3016* (Workspace: need 2*M, prefer M + M*NB)
3017*
3018 CALL dgelqf( m, n, a, lda, work( itau ),
3019 $ work( iwork ), lwork-iwork+1, ierr )
3020 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3021*
3022* Generate Q in VT
3023* (Workspace: need M + N, prefer M + N*NB)
3024*
3025 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3026 $ work( iwork ), lwork-iwork+1, ierr )
3027 ie = itau
3028 itauq = ie + m
3029 itaup = itauq + m
3030 iwork = itaup + m
3031*
3032* Zero out above L in A
3033*
3034 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1,
3035 $ 2 ),
3036 $ lda )
3037*
3038* Bidiagonalize L in A
3039* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3040*
3041 CALL dgebrd( m, m, a, lda, s, work( ie ),
3042 $ work( itauq ), work( itaup ),
3043 $ work( iwork ), lwork-iwork+1, ierr )
3044*
3045* Multiply right bidiagonalizing vectors in A by Q
3046* in VT
3047* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3048*
3049 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3050 $ work( itaup ), vt, ldvt,
3051 $ work( iwork ), lwork-iwork+1, ierr )
3052 iwork = ie + m
3053*
3054* Perform bidiagonal QR iteration, computing right
3055* singular vectors of A in VT
3056* (Workspace: need BDSPAC)
3057*
3058 CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3059 $ ldvt, dum, 1, dum, 1, work( iwork ),
3060 $ info )
3061*
3062 END IF
3063*
3064 ELSE IF( wntuo ) THEN
3065*
3066* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3067* N right singular vectors to be computed in VT and
3068* M left singular vectors to be overwritten on A
3069*
3070 IF( lwork.GE.2*m*m+max( n + m, 4*m, bdspac ) ) THEN
3071*
3072* Sufficient workspace for a fast algorithm
3073*
3074 iu = 1
3075 IF( lwork.GE.wrkbl+2*lda*m ) THEN
3076*
3077* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3078*
3079 ldwrku = lda
3080 ir = iu + ldwrku*m
3081 ldwrkr = lda
3082 ELSE IF( lwork.GE.wrkbl+( lda + m )*m ) THEN
3083*
3084* WORK(IU) is LDA by M and WORK(IR) is M by M
3085*
3086 ldwrku = lda
3087 ir = iu + ldwrku*m
3088 ldwrkr = m
3089 ELSE
3090*
3091* WORK(IU) is M by M and WORK(IR) is M by M
3092*
3093 ldwrku = m
3094 ir = iu + ldwrku*m
3095 ldwrkr = m
3096 END IF
3097 itau = ir + ldwrkr*m
3098 iwork = itau + m
3099*
3100* Compute A=L*Q, copying result to VT
3101* (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
3102*
3103 CALL dgelqf( m, n, a, lda, work( itau ),
3104 $ work( iwork ), lwork-iwork+1, ierr )
3105 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3106*
3107* Generate Q in VT
3108* (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB)
3109*
3110 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3111 $ work( iwork ), lwork-iwork+1, ierr )
3112*
3113* Copy L to WORK(IU), zeroing out above it
3114*
3115 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3116 $ ldwrku )
3117 CALL dlaset( 'U', m-1, m-1, zero, zero,
3118 $ work( iu+ldwrku ), ldwrku )
3119 ie = itau
3120 itauq = ie + m
3121 itaup = itauq + m
3122 iwork = itaup + m
3123*
3124* Bidiagonalize L in WORK(IU), copying result to
3125* WORK(IR)
3126* (Workspace: need 2*M*M + 4*M,
3127* prefer 2*M*M+3*M+2*M*NB)
3128*
3129 CALL dgebrd( m, m, work( iu ), ldwrku, s,
3130 $ work( ie ), work( itauq ),
3131 $ work( itaup ), work( iwork ),
3132 $ lwork-iwork+1, ierr )
3133 CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
3134 $ work( ir ), ldwrkr )
3135*
3136* Generate right bidiagonalizing vectors in WORK(IU)
3137* (Workspace: need 2*M*M + 4*M-1,
3138* prefer 2*M*M+3*M+(M-1)*NB)
3139*
3140 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3141 $ work( itaup ), work( iwork ),
3142 $ lwork-iwork+1, ierr )
3143*
3144* Generate left bidiagonalizing vectors in WORK(IR)
3145* (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
3146*
3147 CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3148 $ work( itauq ), work( iwork ),
3149 $ lwork-iwork+1, ierr )
3150 iwork = ie + m
3151*
3152* Perform bidiagonal QR iteration, computing left
3153* singular vectors of L in WORK(IR) and computing
3154* right singular vectors of L in WORK(IU)
3155* (Workspace: need 2*M*M + BDSPAC)
3156*
3157 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3158 $ work( iu ), ldwrku, work( ir ),
3159 $ ldwrkr, dum, 1, work( iwork ), info )
3160*
3161* Multiply right singular vectors of L in WORK(IU) by
3162* Q in VT, storing result in A
3163* (Workspace: need M*M)
3164*
3165 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3166 $ ldwrku, vt, ldvt, zero, a, lda )
3167*
3168* Copy right singular vectors of A from A to VT
3169*
3170 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3171*
3172* Copy left singular vectors of A from WORK(IR) to A
3173*
3174 CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3175 $ lda )
3176*
3177 ELSE
3178*
3179* Insufficient workspace for a fast algorithm
3180*
3181 itau = 1
3182 iwork = itau + m
3183*
3184* Compute A=L*Q, copying result to VT
3185* (Workspace: need 2*M, prefer M + M*NB)
3186*
3187 CALL dgelqf( m, n, a, lda, work( itau ),
3188 $ work( iwork ), lwork-iwork+1, ierr )
3189 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3190*
3191* Generate Q in VT
3192* (Workspace: need M + N, prefer M + N*NB)
3193*
3194 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3195 $ work( iwork ), lwork-iwork+1, ierr )
3196 ie = itau
3197 itauq = ie + m
3198 itaup = itauq + m
3199 iwork = itaup + m
3200*
3201* Zero out above L in A
3202*
3203 CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1,
3204 $ 2 ),
3205 $ lda )
3206*
3207* Bidiagonalize L in A
3208* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3209*
3210 CALL dgebrd( m, m, a, lda, s, work( ie ),
3211 $ work( itauq ), work( itaup ),
3212 $ work( iwork ), lwork-iwork+1, ierr )
3213*
3214* Multiply right bidiagonalizing vectors in A by Q
3215* in VT
3216* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3217*
3218 CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3219 $ work( itaup ), vt, ldvt,
3220 $ work( iwork ), lwork-iwork+1, ierr )
3221*
3222* Generate left bidiagonalizing vectors in A
3223* (Workspace: need 4*M, prefer 3*M + M*NB)
3224*
3225 CALL dorgbr( 'Q', m, m, m, a, lda,
3226 $ work( itauq ),
3227 $ work( iwork ), lwork-iwork+1, ierr )
3228 iwork = ie + m
3229*
3230* Perform bidiagonal QR iteration, computing left
3231* singular vectors of A in A and computing right
3232* singular vectors of A in VT
3233* (Workspace: need BDSPAC)
3234*
3235 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3236 $ ldvt, a, lda, dum, 1, work( iwork ),
3237 $ info )
3238*
3239 END IF
3240*
3241 ELSE IF( wntuas ) THEN
3242*
3243* Path 9t(N much larger than M, JOBU='S' or 'A',
3244* JOBVT='A')
3245* N right singular vectors to be computed in VT and
3246* M left singular vectors to be computed in U
3247*
3248 IF( lwork.GE.m*m+max( n + m, 4*m, bdspac ) ) THEN
3249*
3250* Sufficient workspace for a fast algorithm
3251*
3252 iu = 1
3253 IF( lwork.GE.wrkbl+lda*m ) THEN
3254*
3255* WORK(IU) is LDA by M
3256*
3257 ldwrku = lda
3258 ELSE
3259*
3260* WORK(IU) is M by M
3261*
3262 ldwrku = m
3263 END IF
3264 itau = iu + ldwrku*m
3265 iwork = itau + m
3266*
3267* Compute A=L*Q, copying result to VT
3268* (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
3269*
3270 CALL dgelqf( m, n, a, lda, work( itau ),
3271 $ work( iwork ), lwork-iwork+1, ierr )
3272 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3273*
3274* Generate Q in VT
3275* (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
3276*
3277 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3278 $ work( iwork ), lwork-iwork+1, ierr )
3279*
3280* Copy L to WORK(IU), zeroing out above it
3281*
3282 CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3283 $ ldwrku )
3284 CALL dlaset( 'U', m-1, m-1, zero, zero,
3285 $ work( iu+ldwrku ), ldwrku )
3286 ie = itau
3287 itauq = ie + m
3288 itaup = itauq + m
3289 iwork = itaup + m
3290*
3291* Bidiagonalize L in WORK(IU), copying result to U
3292* (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
3293*
3294 CALL dgebrd( m, m, work( iu ), ldwrku, s,
3295 $ work( ie ), work( itauq ),
3296 $ work( itaup ), work( iwork ),
3297 $ lwork-iwork+1, ierr )
3298 CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
3299 $ ldu )
3300*
3301* Generate right bidiagonalizing vectors in WORK(IU)
3302* (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
3303*
3304 CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3305 $ work( itaup ), work( iwork ),
3306 $ lwork-iwork+1, ierr )
3307*
3308* Generate left bidiagonalizing vectors in U
3309* (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
3310*
3311 CALL dorgbr( 'Q', m, m, m, u, ldu,
3312 $ work( itauq ),
3313 $ work( iwork ), lwork-iwork+1, ierr )
3314 iwork = ie + m
3315*
3316* Perform bidiagonal QR iteration, computing left
3317* singular vectors of L in U and computing right
3318* singular vectors of L in WORK(IU)
3319* (Workspace: need M*M + BDSPAC)
3320*
3321 CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3322 $ work( iu ), ldwrku, u, ldu, dum, 1,
3323 $ work( iwork ), info )
3324*
3325* Multiply right singular vectors of L in WORK(IU) by
3326* Q in VT, storing result in A
3327* (Workspace: need M*M)
3328*
3329 CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3330 $ ldwrku, vt, ldvt, zero, a, lda )
3331*
3332* Copy right singular vectors of A from A to VT
3333*
3334 CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3335*
3336 ELSE
3337*
3338* Insufficient workspace for a fast algorithm
3339*
3340 itau = 1
3341 iwork = itau + m
3342*
3343* Compute A=L*Q, copying result to VT
3344* (Workspace: need 2*M, prefer M + M*NB)
3345*
3346 CALL dgelqf( m, n, a, lda, work( itau ),
3347 $ work( iwork ), lwork-iwork+1, ierr )
3348 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3349*
3350* Generate Q in VT
3351* (Workspace: need M + N, prefer M + N*NB)
3352*
3353 CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3354 $ work( iwork ), lwork-iwork+1, ierr )
3355*
3356* Copy L to U, zeroing out above it
3357*
3358 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3359 CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1,
3360 $ 2 ),
3361 $ ldu )
3362 ie = itau
3363 itauq = ie + m
3364 itaup = itauq + m
3365 iwork = itaup + m
3366*
3367* Bidiagonalize L in U
3368* (Workspace: need 4*M, prefer 3*M + 2*M*NB)
3369*
3370 CALL dgebrd( m, m, u, ldu, s, work( ie ),
3371 $ work( itauq ), work( itaup ),
3372 $ work( iwork ), lwork-iwork+1, ierr )
3373*
3374* Multiply right bidiagonalizing vectors in U by Q
3375* in VT
3376* (Workspace: need 3*M + N, prefer 3*M + N*NB)
3377*
3378 CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3379 $ work( itaup ), vt, ldvt,
3380 $ work( iwork ), lwork-iwork+1, ierr )
3381*
3382* Generate left bidiagonalizing vectors in U
3383* (Workspace: need 4*M, prefer 3*M + M*NB)
3384*
3385 CALL dorgbr( 'Q', m, m, m, u, ldu,
3386 $ work( itauq ),
3387 $ work( iwork ), lwork-iwork+1, ierr )
3388 iwork = ie + m
3389*
3390* Perform bidiagonal QR iteration, computing left
3391* singular vectors of A in U and computing right
3392* singular vectors of A in VT
3393* (Workspace: need BDSPAC)
3394*
3395 CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3396 $ ldvt, u, ldu, dum, 1, work( iwork ),
3397 $ info )
3398*
3399 END IF
3400*
3401 END IF
3402*
3403 END IF
3404*
3405 ELSE
3406*
3407* N .LT. MNTHR
3408*
3409* Path 10t(N greater than M, but not much larger)
3410* Reduce to bidiagonal form without LQ decomposition
3411*
3412 ie = 1
3413 itauq = ie + m
3414 itaup = itauq + m
3415 iwork = itaup + m
3416*
3417* Bidiagonalize A
3418* (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
3419*
3420 CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3421 $ work( itaup ), work( iwork ), lwork-iwork+1,
3422 $ ierr )
3423 IF( wntuas ) THEN
3424*
3425* If left singular vectors desired in U, copy result to U
3426* and generate left bidiagonalizing vectors in U
3427* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3428*
3429 CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3430 CALL dorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3431 $ work( iwork ), lwork-iwork+1, ierr )
3432 END IF
3433 IF( wntvas ) THEN
3434*
3435* If right singular vectors desired in VT, copy result to
3436* VT and generate right bidiagonalizing vectors in VT
3437* (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB)
3438*
3439 CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3440 IF( wntva )
3441 $ nrvt = n
3442 IF( wntvs )
3443 $ nrvt = m
3444 CALL dorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3445 $ work( iwork ), lwork-iwork+1, ierr )
3446 END IF
3447 IF( wntuo ) THEN
3448*
3449* If left singular vectors desired in A, generate left
3450* bidiagonalizing vectors in A
3451* (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
3452*
3453 CALL dorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3454 $ work( iwork ), lwork-iwork+1, ierr )
3455 END IF
3456 IF( wntvo ) THEN
3457*
3458* If right singular vectors desired in A, generate right
3459* bidiagonalizing vectors in A
3460* (Workspace: need 4*M, prefer 3*M + M*NB)
3461*
3462 CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
3463 $ work( iwork ), lwork-iwork+1, ierr )
3464 END IF
3465 iwork = ie + m
3466 IF( wntuas .OR. wntuo )
3467 $ nru = m
3468 IF( wntun )
3469 $ nru = 0
3470 IF( wntvas .OR. wntvo )
3471 $ ncvt = n
3472 IF( wntvn )
3473 $ ncvt = 0
3474 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3475*
3476* Perform bidiagonal QR iteration, if desired, computing
3477* left singular vectors in U and computing right singular
3478* vectors in VT
3479* (Workspace: need BDSPAC)
3480*
3481 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3482 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3483 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3484*
3485* Perform bidiagonal QR iteration, if desired, computing
3486* left singular vectors in U and computing right singular
3487* vectors in A
3488* (Workspace: need BDSPAC)
3489*
3490 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a,
3491 $ lda,
3492 $ u, ldu, dum, 1, work( iwork ), info )
3493 ELSE
3494*
3495* Perform bidiagonal QR iteration, if desired, computing
3496* left singular vectors in A and computing right singular
3497* vectors in VT
3498* (Workspace: need BDSPAC)
3499*
3500 CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3501 $ ldvt, a, lda, dum, 1, work( iwork ), info )
3502 END IF
3503*
3504 END IF
3505*
3506 END IF
3507*
3508* If DBDSQR failed to converge, copy unconverged superdiagonals
3509* to WORK( 2:MINMN )
3510*
3511 IF( info.NE.0 ) THEN
3512 IF( ie.GT.2 ) THEN
3513 DO 50 i = 1, minmn - 1
3514 work( i+1 ) = work( i+ie-1 )
3515 50 CONTINUE
3516 END IF
3517 IF( ie.LT.2 ) THEN
3518 DO 60 i = minmn - 1, 1, -1
3519 work( i+1 ) = work( i+ie-1 )
3520 60 CONTINUE
3521 END IF
3522 END IF
3523*
3524* Undo scaling if necessary
3525*
3526 IF( iscl.EQ.1 ) THEN
3527 IF( anrm.GT.bignum )
3528 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3529 $ ierr )
3530 IF( info.NE.0 .AND. anrm.GT.bignum )
3531 $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3532 $ work( 2 ),
3533 $ minmn, ierr )
3534 IF( anrm.LT.smlnum )
3535 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3536 $ ierr )
3537 IF( info.NE.0 .AND. anrm.LT.smlnum )
3538 $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3539 $ work( 2 ),
3540 $ minmn, ierr )
3541 END IF
3542*
3543* Return optimal workspace in WORK(1)
3544*
3545 work( 1 ) = maxwrk
3546*
3547 RETURN
3548*
3549* End of DGESVD
3550*
3551 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine dbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, work, info)
DBDSQR
Definition dbdsqr.f:241
subroutine dgebrd(m, n, a, lda, d, e, tauq, taup, work, lwork, info)
DGEBRD
Definition dgebrd.f:204
subroutine dgelqf(m, n, a, lda, tau, work, lwork, info)
DGELQF
Definition dgelqf.f:142
subroutine dgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
DGEMM
Definition dgemm.f:188
subroutine dgeqrf(m, n, a, lda, tau, work, lwork, info)
DGEQRF
Definition dgeqrf.f:144
subroutine dgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info)
DGESVD computes the singular value decomposition (SVD) for GE matrices
Definition dgesvd.f:209
subroutine dlacpy(uplo, m, n, a, lda, b, ldb)
DLACPY copies all or part of one two-dimensional array to another.
Definition dlacpy.f:101
subroutine dlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition dlascl.f:142
subroutine dlaset(uplo, m, n, alpha, beta, a, lda)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition dlaset.f:108
subroutine dorgbr(vect, m, n, k, a, lda, tau, work, lwork, info)
DORGBR
Definition dorgbr.f:156
subroutine dorglq(m, n, k, a, lda, tau, work, lwork, info)
DORGLQ
Definition dorglq.f:125
subroutine dorgqr(m, n, k, a, lda, tau, work, lwork, info)
DORGQR
Definition dorgqr.f:126
subroutine dormbr(vect, side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
DORMBR
Definition dormbr.f:193