LAPACK 3.12.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
sgesvd.f
Go to the documentation of this file.
1*> \brief <b> SGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> \htmlonly
9*> Download SGESVD + dependencies
10*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvd.f">
11*> [TGZ]</a>
12*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvd.f">
13*> [ZIP]</a>
14*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvd.f">
15*> [TXT]</a>
16*> \endhtmlonly
17*
18* Definition:
19* ===========
20*
21* SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22* WORK, LWORK, INFO )
23*
24* .. Scalar Arguments ..
25* CHARACTER JOBU, JOBVT
26* INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27* ..
28* .. Array Arguments ..
29* REAL A( LDA, * ), S( * ), U( LDU, * ),
30* $ VT( LDVT, * ), WORK( * )
31* ..
32*
33*
34*> \par Purpose:
35* =============
36*>
37*> \verbatim
38*>
39*> SGESVD computes the singular value decomposition (SVD) of a real
40*> M-by-N matrix A, optionally computing the left and/or right singular
41*> vectors. The SVD is written
42*>
43*> A = U * SIGMA * transpose(V)
44*>
45*> where SIGMA is an M-by-N matrix which is zero except for its
46*> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47*> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
48*> are the singular values of A; they are real and non-negative, and
49*> are returned in descending order. The first min(m,n) columns of
50*> U and V are the left and right singular vectors of A.
51*>
52*> Note that the routine returns V**T, not V.
53*> \endverbatim
54*
55* Arguments:
56* ==========
57*
58*> \param[in] JOBU
59*> \verbatim
60*> JOBU is CHARACTER*1
61*> Specifies options for computing all or part of the matrix U:
62*> = 'A': all M columns of U are returned in array U:
63*> = 'S': the first min(m,n) columns of U (the left singular
64*> vectors) are returned in the array U;
65*> = 'O': the first min(m,n) columns of U (the left singular
66*> vectors) are overwritten on the array A;
67*> = 'N': no columns of U (no left singular vectors) are
68*> computed.
69*> \endverbatim
70*>
71*> \param[in] JOBVT
72*> \verbatim
73*> JOBVT is CHARACTER*1
74*> Specifies options for computing all or part of the matrix
75*> V**T:
76*> = 'A': all N rows of V**T are returned in the array VT;
77*> = 'S': the first min(m,n) rows of V**T (the right singular
78*> vectors) are returned in the array VT;
79*> = 'O': the first min(m,n) rows of V**T (the right singular
80*> vectors) are overwritten on the array A;
81*> = 'N': no rows of V**T (no right singular vectors) are
82*> computed.
83*>
84*> JOBVT and JOBU cannot both be 'O'.
85*> \endverbatim
86*>
87*> \param[in] M
88*> \verbatim
89*> M is INTEGER
90*> The number of rows of the input matrix A. M >= 0.
91*> \endverbatim
92*>
93*> \param[in] N
94*> \verbatim
95*> N is INTEGER
96*> The number of columns of the input matrix A. N >= 0.
97*> \endverbatim
98*>
99*> \param[in,out] A
100*> \verbatim
101*> A is REAL array, dimension (LDA,N)
102*> On entry, the M-by-N matrix A.
103*> On exit,
104*> if JOBU = 'O', A is overwritten with the first min(m,n)
105*> columns of U (the left singular vectors,
106*> stored columnwise);
107*> if JOBVT = 'O', A is overwritten with the first min(m,n)
108*> rows of V**T (the right singular vectors,
109*> stored rowwise);
110*> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111*> are destroyed.
112*> \endverbatim
113*>
114*> \param[in] LDA
115*> \verbatim
116*> LDA is INTEGER
117*> The leading dimension of the array A. LDA >= max(1,M).
118*> \endverbatim
119*>
120*> \param[out] S
121*> \verbatim
122*> S is REAL array, dimension (min(M,N))
123*> The singular values of A, sorted so that S(i) >= S(i+1).
124*> \endverbatim
125*>
126*> \param[out] U
127*> \verbatim
128*> U is REAL array, dimension (LDU,UCOL)
129*> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130*> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131*> if JOBU = 'S', U contains the first min(m,n) columns of U
132*> (the left singular vectors, stored columnwise);
133*> if JOBU = 'N' or 'O', U is not referenced.
134*> \endverbatim
135*>
136*> \param[in] LDU
137*> \verbatim
138*> LDU is INTEGER
139*> The leading dimension of the array U. LDU >= 1; if
140*> JOBU = 'S' or 'A', LDU >= M.
141*> \endverbatim
142*>
143*> \param[out] VT
144*> \verbatim
145*> VT is REAL array, dimension (LDVT,N)
146*> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147*> V**T;
148*> if JOBVT = 'S', VT contains the first min(m,n) rows of
149*> V**T (the right singular vectors, stored rowwise);
150*> if JOBVT = 'N' or 'O', VT is not referenced.
151*> \endverbatim
152*>
153*> \param[in] LDVT
154*> \verbatim
155*> LDVT is INTEGER
156*> The leading dimension of the array VT. LDVT >= 1; if
157*> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158*> \endverbatim
159*>
160*> \param[out] WORK
161*> \verbatim
162*> WORK is REAL array, dimension (MAX(1,LWORK))
163*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164*> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165*> superdiagonal elements of an upper bidiagonal matrix B
166*> whose diagonal is in S (not necessarily sorted). B
167*> satisfies A = U * B * VT, so it has the same singular values
168*> as A, and singular vectors related by U and VT.
169*> \endverbatim
170*>
171*> \param[in] LWORK
172*> \verbatim
173*> LWORK is INTEGER
174*> The dimension of the array WORK.
175*> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176*> - PATH 1 (M much larger than N, JOBU='N')
177*> - PATH 1t (N much larger than M, JOBVT='N')
178*> LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
179*> For good performance, LWORK should generally be larger.
180*>
181*> If LWORK = -1, then a workspace query is assumed; the routine
182*> only calculates the optimal size of the WORK array, returns
183*> this value as the first entry of the WORK array, and no error
184*> message related to LWORK is issued by XERBLA.
185*> \endverbatim
186*>
187*> \param[out] INFO
188*> \verbatim
189*> INFO is INTEGER
190*> = 0: successful exit.
191*> < 0: if INFO = -i, the i-th argument had an illegal value.
192*> > 0: if SBDSQR did not converge, INFO specifies how many
193*> superdiagonals of an intermediate bidiagonal form B
194*> did not converge to zero. See the description of WORK
195*> above for details.
196*> \endverbatim
197*
198* Authors:
199* ========
200*
201*> \author Univ. of Tennessee
202*> \author Univ. of California Berkeley
203*> \author Univ. of Colorado Denver
204*> \author NAG Ltd.
205*
206*> \ingroup gesvd
207*
208* =====================================================================
209 SUBROUTINE sgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
210 $ WORK, LWORK, INFO )
211*
212* -- LAPACK driver routine --
213* -- LAPACK is a software package provided by Univ. of Tennessee, --
214* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
215*
216* .. Scalar Arguments ..
217 CHARACTER JOBU, JOBVT
218 INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
219* ..
220* .. Array Arguments ..
221 REAL A( LDA, * ), S( * ), U( LDU, * ),
222 $ vt( ldvt, * ), work( * )
223* ..
224*
225* =====================================================================
226*
227* .. Parameters ..
228 REAL ZERO, ONE
229 parameter( zero = 0.0e0, one = 1.0e0 )
230* ..
231* .. Local Scalars ..
232 LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
233 $ wntva, wntvas, wntvn, wntvo, wntvs
234 INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
235 $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
236 $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
237 $ nrvt, wrkbl
238 INTEGER LWORK_SGEQRF, LWORK_SORGQR_N, LWORK_SORGQR_M,
239 $ lwork_sgebrd, lwork_sorgbr_p, lwork_sorgbr_q,
240 $ lwork_sgelqf, lwork_sorglq_n, lwork_sorglq_m
241 REAL ANRM, BIGNUM, EPS, SMLNUM
242* ..
243* .. Local Arrays ..
244 REAL DUM( 1 )
245* ..
246* .. External Subroutines ..
247 EXTERNAL sbdsqr, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
249 $ xerbla
250* ..
251* .. External Functions ..
252 LOGICAL LSAME
253 INTEGER ILAENV
254 REAL SLAMCH, SLANGE, SROUNDUP_LWORK
255 EXTERNAL lsame, ilaenv, slamch, slange, sroundup_lwork
256* ..
257* .. Intrinsic Functions ..
258 INTRINSIC max, min, sqrt
259* ..
260* .. Executable Statements ..
261*
262* Test the input arguments
263*
264 info = 0
265 minmn = min( m, n )
266 wntua = lsame( jobu, 'A' )
267 wntus = lsame( jobu, 'S' )
268 wntuas = wntua .OR. wntus
269 wntuo = lsame( jobu, 'O' )
270 wntun = lsame( jobu, 'N' )
271 wntva = lsame( jobvt, 'A' )
272 wntvs = lsame( jobvt, 'S' )
273 wntvas = wntva .OR. wntvs
274 wntvo = lsame( jobvt, 'O' )
275 wntvn = lsame( jobvt, 'N' )
276 lquery = ( lwork.EQ.-1 )
277*
278 IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
279 info = -1
280 ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
281 $ ( wntvo .AND. wntuo ) ) THEN
282 info = -2
283 ELSE IF( m.LT.0 ) THEN
284 info = -3
285 ELSE IF( n.LT.0 ) THEN
286 info = -4
287 ELSE IF( lda.LT.max( 1, m ) ) THEN
288 info = -6
289 ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
290 info = -9
291 ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
292 $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
293 info = -11
294 END IF
295*
296* Compute workspace
297* (Note: Comments in the code beginning "Workspace:" describe the
298* minimal amount of workspace needed at that point in the code,
299* as well as the preferred amount for good performance.
300* NB refers to the optimal block size for the immediately
301* following subroutine, as returned by ILAENV.)
302*
303 IF( info.EQ.0 ) THEN
304 minwrk = 1
305 maxwrk = 1
306 IF( m.GE.n .AND. minmn.GT.0 ) THEN
307*
308* Compute space needed for SBDSQR
309*
310 mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
311 bdspac = 5*n
312* Compute space needed for SGEQRF
313 CALL sgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
314 lwork_sgeqrf = int( dum(1) )
315* Compute space needed for SORGQR
316 CALL sorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
317 lwork_sorgqr_n = int( dum(1) )
318 CALL sorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
319 lwork_sorgqr_m = int( dum(1) )
320* Compute space needed for SGEBRD
321 CALL sgebrd( n, n, a, lda, s, dum(1), dum(1),
322 $ dum(1), dum(1), -1, ierr )
323 lwork_sgebrd = int( dum(1) )
324* Compute space needed for SORGBR P
325 CALL sorgbr( 'P', n, n, n, a, lda, dum(1),
326 $ dum(1), -1, ierr )
327 lwork_sorgbr_p = int( dum(1) )
328* Compute space needed for SORGBR Q
329 CALL sorgbr( 'Q', n, n, n, a, lda, dum(1),
330 $ dum(1), -1, ierr )
331 lwork_sorgbr_q = int( dum(1) )
332*
333 IF( m.GE.mnthr ) THEN
334 IF( wntun ) THEN
335*
336* Path 1 (M much larger than N, JOBU='N')
337*
338 maxwrk = n + lwork_sgeqrf
339 maxwrk = max( maxwrk, 3*n+lwork_sgebrd )
340 IF( wntvo .OR. wntvas )
341 $ maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
342 maxwrk = max( maxwrk, bdspac )
343 minwrk = max( 4*n, bdspac )
344 ELSE IF( wntuo .AND. wntvn ) THEN
345*
346* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
347*
348 wrkbl = n + lwork_sgeqrf
349 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
350 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
351 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
352 wrkbl = max( wrkbl, bdspac )
353 maxwrk = max( n*n+wrkbl, n*n+m*n+n )
354 minwrk = max( 3*n+m, bdspac )
355 ELSE IF( wntuo .AND. wntvas ) THEN
356*
357* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
358* 'A')
359*
360 wrkbl = n + lwork_sgeqrf
361 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
362 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
363 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
364 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
365 wrkbl = max( wrkbl, bdspac )
366 maxwrk = max( n*n+wrkbl, n*n+m*n+n )
367 minwrk = max( 3*n+m, bdspac )
368 ELSE IF( wntus .AND. wntvn ) THEN
369*
370* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
371*
372 wrkbl = n + lwork_sgeqrf
373 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
374 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
375 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
376 wrkbl = max( wrkbl, bdspac )
377 maxwrk = n*n + wrkbl
378 minwrk = max( 3*n+m, bdspac )
379 ELSE IF( wntus .AND. wntvo ) THEN
380*
381* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
382*
383 wrkbl = n + lwork_sgeqrf
384 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
385 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
386 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
387 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
388 wrkbl = max( wrkbl, bdspac )
389 maxwrk = 2*n*n + wrkbl
390 minwrk = max( 3*n+m, bdspac )
391 ELSE IF( wntus .AND. wntvas ) THEN
392*
393* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
394* 'A')
395*
396 wrkbl = n + lwork_sgeqrf
397 wrkbl = max( wrkbl, n+lwork_sorgqr_n )
398 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
399 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
400 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
401 wrkbl = max( wrkbl, bdspac )
402 maxwrk = n*n + wrkbl
403 minwrk = max( 3*n+m, bdspac )
404 ELSE IF( wntua .AND. wntvn ) THEN
405*
406* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
407*
408 wrkbl = n + lwork_sgeqrf
409 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
410 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
411 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
412 wrkbl = max( wrkbl, bdspac )
413 maxwrk = n*n + wrkbl
414 minwrk = max( 3*n+m, bdspac )
415 ELSE IF( wntua .AND. wntvo ) THEN
416*
417* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
418*
419 wrkbl = n + lwork_sgeqrf
420 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
421 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
422 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
423 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
424 wrkbl = max( wrkbl, bdspac )
425 maxwrk = 2*n*n + wrkbl
426 minwrk = max( 3*n+m, bdspac )
427 ELSE IF( wntua .AND. wntvas ) THEN
428*
429* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
430* 'A')
431*
432 wrkbl = n + lwork_sgeqrf
433 wrkbl = max( wrkbl, n+lwork_sorgqr_m )
434 wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
435 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
436 wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
437 wrkbl = max( wrkbl, bdspac )
438 maxwrk = n*n + wrkbl
439 minwrk = max( 3*n+m, bdspac )
440 END IF
441 ELSE
442*
443* Path 10 (M at least N, but not much larger)
444*
445 CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
446 $ dum(1), dum(1), -1, ierr )
447 lwork_sgebrd = int( dum(1) )
448 maxwrk = 3*n + lwork_sgebrd
449 IF( wntus .OR. wntuo ) THEN
450 CALL sorgbr( 'Q', m, n, n, a, lda, dum(1),
451 $ dum(1), -1, ierr )
452 lwork_sorgbr_q = int( dum(1) )
453 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
454 END IF
455 IF( wntua ) THEN
456 CALL sorgbr( 'Q', m, m, n, a, lda, dum(1),
457 $ dum(1), -1, ierr )
458 lwork_sorgbr_q = int( dum(1) )
459 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
460 END IF
461 IF( .NOT.wntvn ) THEN
462 maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
463 END IF
464 maxwrk = max( maxwrk, bdspac )
465 minwrk = max( 3*n+m, bdspac )
466 END IF
467 ELSE IF( minmn.GT.0 ) THEN
468*
469* Compute space needed for SBDSQR
470*
471 mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
472 bdspac = 5*m
473* Compute space needed for SGELQF
474 CALL sgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
475 lwork_sgelqf = int( dum(1) )
476* Compute space needed for SORGLQ
477 CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
478 lwork_sorglq_n = int( dum(1) )
479 CALL sorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
480 lwork_sorglq_m = int( dum(1) )
481* Compute space needed for SGEBRD
482 CALL sgebrd( m, m, a, lda, s, dum(1), dum(1),
483 $ dum(1), dum(1), -1, ierr )
484 lwork_sgebrd = int( dum(1) )
485* Compute space needed for SORGBR P
486 CALL sorgbr( 'P', m, m, m, a, n, dum(1),
487 $ dum(1), -1, ierr )
488 lwork_sorgbr_p = int( dum(1) )
489* Compute space needed for SORGBR Q
490 CALL sorgbr( 'Q', m, m, m, a, n, dum(1),
491 $ dum(1), -1, ierr )
492 lwork_sorgbr_q = int( dum(1) )
493 IF( n.GE.mnthr ) THEN
494 IF( wntvn ) THEN
495*
496* Path 1t(N much larger than M, JOBVT='N')
497*
498 maxwrk = m + lwork_sgelqf
499 maxwrk = max( maxwrk, 3*m+lwork_sgebrd )
500 IF( wntuo .OR. wntuas )
501 $ maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
502 maxwrk = max( maxwrk, bdspac )
503 minwrk = max( 4*m, bdspac )
504 ELSE IF( wntvo .AND. wntun ) THEN
505*
506* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
507*
508 wrkbl = m + lwork_sgelqf
509 wrkbl = max( wrkbl, m+lwork_sorglq_m )
510 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
511 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
512 wrkbl = max( wrkbl, bdspac )
513 maxwrk = max( m*m+wrkbl, m*m+m*n+m )
514 minwrk = max( 3*m+n, bdspac )
515 ELSE IF( wntvo .AND. wntuas ) THEN
516*
517* Path 3t(N much larger than M, JOBU='S' or 'A',
518* JOBVT='O')
519*
520 wrkbl = m + lwork_sgelqf
521 wrkbl = max( wrkbl, m+lwork_sorglq_m )
522 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
523 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
524 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
525 wrkbl = max( wrkbl, bdspac )
526 maxwrk = max( m*m+wrkbl, m*m+m*n+m )
527 minwrk = max( 3*m+n, bdspac )
528 ELSE IF( wntvs .AND. wntun ) THEN
529*
530* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
531*
532 wrkbl = m + lwork_sgelqf
533 wrkbl = max( wrkbl, m+lwork_sorglq_m )
534 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
535 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
536 wrkbl = max( wrkbl, bdspac )
537 maxwrk = m*m + wrkbl
538 minwrk = max( 3*m+n, bdspac )
539 ELSE IF( wntvs .AND. wntuo ) THEN
540*
541* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
542*
543 wrkbl = m + lwork_sgelqf
544 wrkbl = max( wrkbl, m+lwork_sorglq_m )
545 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
546 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
547 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
548 wrkbl = max( wrkbl, bdspac )
549 maxwrk = 2*m*m + wrkbl
550 minwrk = max( 3*m+n, bdspac )
551 maxwrk = max( maxwrk, minwrk )
552 ELSE IF( wntvs .AND. wntuas ) THEN
553*
554* Path 6t(N much larger than M, JOBU='S' or 'A',
555* JOBVT='S')
556*
557 wrkbl = m + lwork_sgelqf
558 wrkbl = max( wrkbl, m+lwork_sorglq_m )
559 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
560 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
561 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
562 wrkbl = max( wrkbl, bdspac )
563 maxwrk = m*m + wrkbl
564 minwrk = max( 3*m+n, bdspac )
565 ELSE IF( wntva .AND. wntun ) THEN
566*
567* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
568*
569 wrkbl = m + lwork_sgelqf
570 wrkbl = max( wrkbl, m+lwork_sorglq_n )
571 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
572 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
573 wrkbl = max( wrkbl, bdspac )
574 maxwrk = m*m + wrkbl
575 minwrk = max( 3*m+n, bdspac )
576 ELSE IF( wntva .AND. wntuo ) THEN
577*
578* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
579*
580 wrkbl = m + lwork_sgelqf
581 wrkbl = max( wrkbl, m+lwork_sorglq_n )
582 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
583 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
584 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
585 wrkbl = max( wrkbl, bdspac )
586 maxwrk = 2*m*m + wrkbl
587 minwrk = max( 3*m+n, bdspac )
588 ELSE IF( wntva .AND. wntuas ) THEN
589*
590* Path 9t(N much larger than M, JOBU='S' or 'A',
591* JOBVT='A')
592*
593 wrkbl = m + lwork_sgelqf
594 wrkbl = max( wrkbl, m+lwork_sorglq_n )
595 wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
596 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
597 wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
598 wrkbl = max( wrkbl, bdspac )
599 maxwrk = m*m + wrkbl
600 minwrk = max( 3*m+n, bdspac )
601 END IF
602 ELSE
603*
604* Path 10t(N greater than M, but not much larger)
605*
606 CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
607 $ dum(1), dum(1), -1, ierr )
608 lwork_sgebrd = int( dum(1) )
609 maxwrk = 3*m + lwork_sgebrd
610 IF( wntvs .OR. wntvo ) THEN
611* Compute space needed for SORGBR P
612 CALL sorgbr( 'P', m, n, m, a, n, dum(1),
613 $ dum(1), -1, ierr )
614 lwork_sorgbr_p = int( dum(1) )
615 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
616 END IF
617 IF( wntva ) THEN
618 CALL sorgbr( 'P', n, n, m, a, n, dum(1),
619 $ dum(1), -1, ierr )
620 lwork_sorgbr_p = int( dum(1) )
621 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
622 END IF
623 IF( .NOT.wntun ) THEN
624 maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
625 END IF
626 maxwrk = max( maxwrk, bdspac )
627 minwrk = max( 3*m+n, bdspac )
628 END IF
629 END IF
630 maxwrk = max( maxwrk, minwrk )
631 work( 1 ) = sroundup_lwork(maxwrk)
632*
633 IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
634 info = -13
635 END IF
636 END IF
637*
638 IF( info.NE.0 ) THEN
639 CALL xerbla( 'SGESVD', -info )
640 RETURN
641 ELSE IF( lquery ) THEN
642 RETURN
643 END IF
644*
645* Quick return if possible
646*
647 IF( m.EQ.0 .OR. n.EQ.0 ) THEN
648 RETURN
649 END IF
650*
651* Get machine constants
652*
653 eps = slamch( 'P' )
654 smlnum = sqrt( slamch( 'S' ) ) / eps
655 bignum = one / smlnum
656*
657* Scale A if max element outside range [SMLNUM,BIGNUM]
658*
659 anrm = slange( 'M', m, n, a, lda, dum )
660 iscl = 0
661 IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
662 iscl = 1
663 CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
664 ELSE IF( anrm.GT.bignum ) THEN
665 iscl = 1
666 CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
667 END IF
668*
669 IF( m.GE.n ) THEN
670*
671* A has at least as many rows as columns. If A has sufficiently
672* more rows than columns, first reduce using the QR
673* decomposition (if sufficient workspace available)
674*
675 IF( m.GE.mnthr ) THEN
676*
677 IF( wntun ) THEN
678*
679* Path 1 (M much larger than N, JOBU='N')
680* No left singular vectors to be computed
681*
682 itau = 1
683 iwork = itau + n
684*
685* Compute A=Q*R
686* (Workspace: need 2*N, prefer N+N*NB)
687*
688 CALL sgeqrf( m, n, a, lda, work( itau ), work( iwork ),
689 $ lwork-iwork+1, ierr )
690*
691* Zero out below R
692*
693 IF( n .GT. 1 ) THEN
694 CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
695 $ lda )
696 END IF
697 ie = 1
698 itauq = ie + n
699 itaup = itauq + n
700 iwork = itaup + n
701*
702* Bidiagonalize R in A
703* (Workspace: need 4*N, prefer 3*N+2*N*NB)
704*
705 CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
706 $ work( itaup ), work( iwork ), lwork-iwork+1,
707 $ ierr )
708 ncvt = 0
709 IF( wntvo .OR. wntvas ) THEN
710*
711* If right singular vectors desired, generate P'.
712* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
713*
714 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
715 $ work( iwork ), lwork-iwork+1, ierr )
716 ncvt = n
717 END IF
718 iwork = ie + n
719*
720* Perform bidiagonal QR iteration, computing right
721* singular vectors of A in A if desired
722* (Workspace: need BDSPAC)
723*
724 CALL sbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
725 $ dum, 1, dum, 1, work( iwork ), info )
726*
727* If right singular vectors desired in VT, copy them there
728*
729 IF( wntvas )
730 $ CALL slacpy( 'F', n, n, a, lda, vt, ldvt )
731*
732 ELSE IF( wntuo .AND. wntvn ) THEN
733*
734* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
735* N left singular vectors to be overwritten on A and
736* no right singular vectors to be computed
737*
738 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
739*
740* Sufficient workspace for a fast algorithm
741*
742 ir = 1
743 IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
744*
745* WORK(IU) is LDA by N, WORK(IR) is LDA by N
746*
747 ldwrku = lda
748 ldwrkr = lda
749 ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
750*
751* WORK(IU) is LDA by N, WORK(IR) is N by N
752*
753 ldwrku = lda
754 ldwrkr = n
755 ELSE
756*
757* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
758*
759 ldwrku = ( lwork-n*n-n ) / n
760 ldwrkr = n
761 END IF
762 itau = ir + ldwrkr*n
763 iwork = itau + n
764*
765* Compute A=Q*R
766* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
767*
768 CALL sgeqrf( m, n, a, lda, work( itau ),
769 $ work( iwork ), lwork-iwork+1, ierr )
770*
771* Copy R to WORK(IR) and zero out below it
772*
773 CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
774 CALL slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
775 $ ldwrkr )
776*
777* Generate Q in A
778* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
779*
780 CALL sorgqr( m, n, n, a, lda, work( itau ),
781 $ work( iwork ), lwork-iwork+1, ierr )
782 ie = itau
783 itauq = ie + n
784 itaup = itauq + n
785 iwork = itaup + n
786*
787* Bidiagonalize R in WORK(IR)
788* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
789*
790 CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
791 $ work( itauq ), work( itaup ),
792 $ work( iwork ), lwork-iwork+1, ierr )
793*
794* Generate left vectors bidiagonalizing R
795* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
796*
797 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
798 $ work( itauq ), work( iwork ),
799 $ lwork-iwork+1, ierr )
800 iwork = ie + n
801*
802* Perform bidiagonal QR iteration, computing left
803* singular vectors of R in WORK(IR)
804* (Workspace: need N*N+BDSPAC)
805*
806 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
807 $ work( ir ), ldwrkr, dum, 1,
808 $ work( iwork ), info )
809 iu = ie + n
810*
811* Multiply Q in A by left singular vectors of R in
812* WORK(IR), storing result in WORK(IU) and copying to A
813* (Workspace: need N*N+2*N, prefer N*N+M*N+N)
814*
815 DO 10 i = 1, m, ldwrku
816 chunk = min( m-i+1, ldwrku )
817 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
818 $ lda, work( ir ), ldwrkr, zero,
819 $ work( iu ), ldwrku )
820 CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
821 $ a( i, 1 ), lda )
822 10 CONTINUE
823*
824 ELSE
825*
826* Insufficient workspace for a fast algorithm
827*
828 ie = 1
829 itauq = ie + n
830 itaup = itauq + n
831 iwork = itaup + n
832*
833* Bidiagonalize A
834* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
835*
836 CALL sgebrd( m, n, a, lda, s, work( ie ),
837 $ work( itauq ), work( itaup ),
838 $ work( iwork ), lwork-iwork+1, ierr )
839*
840* Generate left vectors bidiagonalizing A
841* (Workspace: need 4*N, prefer 3*N+N*NB)
842*
843 CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
844 $ work( iwork ), lwork-iwork+1, ierr )
845 iwork = ie + n
846*
847* Perform bidiagonal QR iteration, computing left
848* singular vectors of A in A
849* (Workspace: need BDSPAC)
850*
851 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
852 $ a, lda, dum, 1, work( iwork ), info )
853*
854 END IF
855*
856 ELSE IF( wntuo .AND. wntvas ) THEN
857*
858* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
859* N left singular vectors to be overwritten on A and
860* N right singular vectors to be computed in VT
861*
862 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
863*
864* Sufficient workspace for a fast algorithm
865*
866 ir = 1
867 IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
868*
869* WORK(IU) is LDA by N and WORK(IR) is LDA by N
870*
871 ldwrku = lda
872 ldwrkr = lda
873 ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
874*
875* WORK(IU) is LDA by N and WORK(IR) is N by N
876*
877 ldwrku = lda
878 ldwrkr = n
879 ELSE
880*
881* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
882*
883 ldwrku = ( lwork-n*n-n ) / n
884 ldwrkr = n
885 END IF
886 itau = ir + ldwrkr*n
887 iwork = itau + n
888*
889* Compute A=Q*R
890* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
891*
892 CALL sgeqrf( m, n, a, lda, work( itau ),
893 $ work( iwork ), lwork-iwork+1, ierr )
894*
895* Copy R to VT, zeroing out below it
896*
897 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
898 IF( n.GT.1 )
899 $ CALL slaset( 'L', n-1, n-1, zero, zero,
900 $ vt( 2, 1 ), ldvt )
901*
902* Generate Q in A
903* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
904*
905 CALL sorgqr( m, n, n, a, lda, work( itau ),
906 $ work( iwork ), lwork-iwork+1, ierr )
907 ie = itau
908 itauq = ie + n
909 itaup = itauq + n
910 iwork = itaup + n
911*
912* Bidiagonalize R in VT, copying result to WORK(IR)
913* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
914*
915 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
916 $ work( itauq ), work( itaup ),
917 $ work( iwork ), lwork-iwork+1, ierr )
918 CALL slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
919*
920* Generate left vectors bidiagonalizing R in WORK(IR)
921* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
922*
923 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
924 $ work( itauq ), work( iwork ),
925 $ lwork-iwork+1, ierr )
926*
927* Generate right vectors bidiagonalizing R in VT
928* (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
929*
930 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
931 $ work( iwork ), lwork-iwork+1, ierr )
932 iwork = ie + n
933*
934* Perform bidiagonal QR iteration, computing left
935* singular vectors of R in WORK(IR) and computing right
936* singular vectors of R in VT
937* (Workspace: need N*N+BDSPAC)
938*
939 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
940 $ work( ir ), ldwrkr, dum, 1,
941 $ work( iwork ), info )
942 iu = ie + n
943*
944* Multiply Q in A by left singular vectors of R in
945* WORK(IR), storing result in WORK(IU) and copying to A
946* (Workspace: need N*N+2*N, prefer N*N+M*N+N)
947*
948 DO 20 i = 1, m, ldwrku
949 chunk = min( m-i+1, ldwrku )
950 CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
951 $ lda, work( ir ), ldwrkr, zero,
952 $ work( iu ), ldwrku )
953 CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
954 $ a( i, 1 ), lda )
955 20 CONTINUE
956*
957 ELSE
958*
959* Insufficient workspace for a fast algorithm
960*
961 itau = 1
962 iwork = itau + n
963*
964* Compute A=Q*R
965* (Workspace: need 2*N, prefer N+N*NB)
966*
967 CALL sgeqrf( m, n, a, lda, work( itau ),
968 $ work( iwork ), lwork-iwork+1, ierr )
969*
970* Copy R to VT, zeroing out below it
971*
972 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
973 IF( n.GT.1 )
974 $ CALL slaset( 'L', n-1, n-1, zero, zero,
975 $ vt( 2, 1 ), ldvt )
976*
977* Generate Q in A
978* (Workspace: need 2*N, prefer N+N*NB)
979*
980 CALL sorgqr( m, n, n, a, lda, work( itau ),
981 $ work( iwork ), lwork-iwork+1, ierr )
982 ie = itau
983 itauq = ie + n
984 itaup = itauq + n
985 iwork = itaup + n
986*
987* Bidiagonalize R in VT
988* (Workspace: need 4*N, prefer 3*N+2*N*NB)
989*
990 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
991 $ work( itauq ), work( itaup ),
992 $ work( iwork ), lwork-iwork+1, ierr )
993*
994* Multiply Q in A by left vectors bidiagonalizing R
995* (Workspace: need 3*N+M, prefer 3*N+M*NB)
996*
997 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
998 $ work( itauq ), a, lda, work( iwork ),
999 $ lwork-iwork+1, ierr )
1000*
1001* Generate right vectors bidiagonalizing R in VT
1002* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1003*
1004 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1005 $ work( iwork ), lwork-iwork+1, ierr )
1006 iwork = ie + n
1007*
1008* Perform bidiagonal QR iteration, computing left
1009* singular vectors of A in A and computing right
1010* singular vectors of A in VT
1011* (Workspace: need BDSPAC)
1012*
1013 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1014 $ a, lda, dum, 1, work( iwork ), info )
1015*
1016 END IF
1017*
1018 ELSE IF( wntus ) THEN
1019*
1020 IF( wntvn ) THEN
1021*
1022* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1023* N left singular vectors to be computed in U and
1024* no right singular vectors to be computed
1025*
1026 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1027*
1028* Sufficient workspace for a fast algorithm
1029*
1030 ir = 1
1031 IF( lwork.GE.wrkbl+lda*n ) THEN
1032*
1033* WORK(IR) is LDA by N
1034*
1035 ldwrkr = lda
1036 ELSE
1037*
1038* WORK(IR) is N by N
1039*
1040 ldwrkr = n
1041 END IF
1042 itau = ir + ldwrkr*n
1043 iwork = itau + n
1044*
1045* Compute A=Q*R
1046* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1047*
1048 CALL sgeqrf( m, n, a, lda, work( itau ),
1049 $ work( iwork ), lwork-iwork+1, ierr )
1050*
1051* Copy R to WORK(IR), zeroing out below it
1052*
1053 CALL slacpy( 'U', n, n, a, lda, work( ir ),
1054 $ ldwrkr )
1055 CALL slaset( 'L', n-1, n-1, zero, zero,
1056 $ work( ir+1 ), ldwrkr )
1057*
1058* Generate Q in A
1059* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1060*
1061 CALL sorgqr( m, n, n, a, lda, work( itau ),
1062 $ work( iwork ), lwork-iwork+1, ierr )
1063 ie = itau
1064 itauq = ie + n
1065 itaup = itauq + n
1066 iwork = itaup + n
1067*
1068* Bidiagonalize R in WORK(IR)
1069* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1070*
1071 CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1072 $ work( ie ), work( itauq ),
1073 $ work( itaup ), work( iwork ),
1074 $ lwork-iwork+1, ierr )
1075*
1076* Generate left vectors bidiagonalizing R in WORK(IR)
1077* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1078*
1079 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1080 $ work( itauq ), work( iwork ),
1081 $ lwork-iwork+1, ierr )
1082 iwork = ie + n
1083*
1084* Perform bidiagonal QR iteration, computing left
1085* singular vectors of R in WORK(IR)
1086* (Workspace: need N*N+BDSPAC)
1087*
1088 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1089 $ 1, work( ir ), ldwrkr, dum, 1,
1090 $ work( iwork ), info )
1091*
1092* Multiply Q in A by left singular vectors of R in
1093* WORK(IR), storing result in U
1094* (Workspace: need N*N)
1095*
1096 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1097 $ work( ir ), ldwrkr, zero, u, ldu )
1098*
1099 ELSE
1100*
1101* Insufficient workspace for a fast algorithm
1102*
1103 itau = 1
1104 iwork = itau + n
1105*
1106* Compute A=Q*R, copying result to U
1107* (Workspace: need 2*N, prefer N+N*NB)
1108*
1109 CALL sgeqrf( m, n, a, lda, work( itau ),
1110 $ work( iwork ), lwork-iwork+1, ierr )
1111 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1112*
1113* Generate Q in U
1114* (Workspace: need 2*N, prefer N+N*NB)
1115*
1116 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1117 $ work( iwork ), lwork-iwork+1, ierr )
1118 ie = itau
1119 itauq = ie + n
1120 itaup = itauq + n
1121 iwork = itaup + n
1122*
1123* Zero out below R in A
1124*
1125 IF( n .GT. 1 ) THEN
1126 CALL slaset( 'L', n-1, n-1, zero, zero,
1127 $ a( 2, 1 ), lda )
1128 END IF
1129*
1130* Bidiagonalize R in A
1131* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1132*
1133 CALL sgebrd( n, n, a, lda, s, work( ie ),
1134 $ work( itauq ), work( itaup ),
1135 $ work( iwork ), lwork-iwork+1, ierr )
1136*
1137* Multiply Q in U by left vectors bidiagonalizing R
1138* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1139*
1140 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1141 $ work( itauq ), u, ldu, work( iwork ),
1142 $ lwork-iwork+1, ierr )
1143 iwork = ie + n
1144*
1145* Perform bidiagonal QR iteration, computing left
1146* singular vectors of A in U
1147* (Workspace: need BDSPAC)
1148*
1149 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1150 $ 1, u, ldu, dum, 1, work( iwork ),
1151 $ info )
1152*
1153 END IF
1154*
1155 ELSE IF( wntvo ) THEN
1156*
1157* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1158* N left singular vectors to be computed in U and
1159* N right singular vectors to be overwritten on A
1160*
1161 IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1162*
1163* Sufficient workspace for a fast algorithm
1164*
1165 iu = 1
1166 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1167*
1168* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1169*
1170 ldwrku = lda
1171 ir = iu + ldwrku*n
1172 ldwrkr = lda
1173 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1174*
1175* WORK(IU) is LDA by N and WORK(IR) is N by N
1176*
1177 ldwrku = lda
1178 ir = iu + ldwrku*n
1179 ldwrkr = n
1180 ELSE
1181*
1182* WORK(IU) is N by N and WORK(IR) is N by N
1183*
1184 ldwrku = n
1185 ir = iu + ldwrku*n
1186 ldwrkr = n
1187 END IF
1188 itau = ir + ldwrkr*n
1189 iwork = itau + n
1190*
1191* Compute A=Q*R
1192* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1193*
1194 CALL sgeqrf( m, n, a, lda, work( itau ),
1195 $ work( iwork ), lwork-iwork+1, ierr )
1196*
1197* Copy R to WORK(IU), zeroing out below it
1198*
1199 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1200 $ ldwrku )
1201 CALL slaset( 'L', n-1, n-1, zero, zero,
1202 $ work( iu+1 ), ldwrku )
1203*
1204* Generate Q in A
1205* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1206*
1207 CALL sorgqr( m, n, n, a, lda, work( itau ),
1208 $ work( iwork ), lwork-iwork+1, ierr )
1209 ie = itau
1210 itauq = ie + n
1211 itaup = itauq + n
1212 iwork = itaup + n
1213*
1214* Bidiagonalize R in WORK(IU), copying result to
1215* WORK(IR)
1216* (Workspace: need 2*N*N+4*N,
1217* prefer 2*N*N+3*N+2*N*NB)
1218*
1219 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1220 $ work( ie ), work( itauq ),
1221 $ work( itaup ), work( iwork ),
1222 $ lwork-iwork+1, ierr )
1223 CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1224 $ work( ir ), ldwrkr )
1225*
1226* Generate left bidiagonalizing vectors in WORK(IU)
1227* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1228*
1229 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1230 $ work( itauq ), work( iwork ),
1231 $ lwork-iwork+1, ierr )
1232*
1233* Generate right bidiagonalizing vectors in WORK(IR)
1234* (Workspace: need 2*N*N+4*N-1,
1235* prefer 2*N*N+3*N+(N-1)*NB)
1236*
1237 CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1238 $ work( itaup ), work( iwork ),
1239 $ lwork-iwork+1, ierr )
1240 iwork = ie + n
1241*
1242* Perform bidiagonal QR iteration, computing left
1243* singular vectors of R in WORK(IU) and computing
1244* right singular vectors of R in WORK(IR)
1245* (Workspace: need 2*N*N+BDSPAC)
1246*
1247 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1248 $ work( ir ), ldwrkr, work( iu ),
1249 $ ldwrku, dum, 1, work( iwork ), info )
1250*
1251* Multiply Q in A by left singular vectors of R in
1252* WORK(IU), storing result in U
1253* (Workspace: need N*N)
1254*
1255 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1256 $ work( iu ), ldwrku, zero, u, ldu )
1257*
1258* Copy right singular vectors of R to A
1259* (Workspace: need N*N)
1260*
1261 CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1262 $ lda )
1263*
1264 ELSE
1265*
1266* Insufficient workspace for a fast algorithm
1267*
1268 itau = 1
1269 iwork = itau + n
1270*
1271* Compute A=Q*R, copying result to U
1272* (Workspace: need 2*N, prefer N+N*NB)
1273*
1274 CALL sgeqrf( m, n, a, lda, work( itau ),
1275 $ work( iwork ), lwork-iwork+1, ierr )
1276 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1277*
1278* Generate Q in U
1279* (Workspace: need 2*N, prefer N+N*NB)
1280*
1281 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1282 $ work( iwork ), lwork-iwork+1, ierr )
1283 ie = itau
1284 itauq = ie + n
1285 itaup = itauq + n
1286 iwork = itaup + n
1287*
1288* Zero out below R in A
1289*
1290 IF( n .GT. 1 ) THEN
1291 CALL slaset( 'L', n-1, n-1, zero, zero,
1292 $ a( 2, 1 ), lda )
1293 END IF
1294*
1295* Bidiagonalize R in A
1296* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1297*
1298 CALL sgebrd( n, n, a, lda, s, work( ie ),
1299 $ work( itauq ), work( itaup ),
1300 $ work( iwork ), lwork-iwork+1, ierr )
1301*
1302* Multiply Q in U by left vectors bidiagonalizing R
1303* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1304*
1305 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1306 $ work( itauq ), u, ldu, work( iwork ),
1307 $ lwork-iwork+1, ierr )
1308*
1309* Generate right vectors bidiagonalizing R in A
1310* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1311*
1312 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1313 $ work( iwork ), lwork-iwork+1, ierr )
1314 iwork = ie + n
1315*
1316* Perform bidiagonal QR iteration, computing left
1317* singular vectors of A in U and computing right
1318* singular vectors of A in A
1319* (Workspace: need BDSPAC)
1320*
1321 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1322 $ lda, u, ldu, dum, 1, work( iwork ),
1323 $ info )
1324*
1325 END IF
1326*
1327 ELSE IF( wntvas ) THEN
1328*
1329* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1330* or 'A')
1331* N left singular vectors to be computed in U and
1332* N right singular vectors to be computed in VT
1333*
1334 IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1335*
1336* Sufficient workspace for a fast algorithm
1337*
1338 iu = 1
1339 IF( lwork.GE.wrkbl+lda*n ) THEN
1340*
1341* WORK(IU) is LDA by N
1342*
1343 ldwrku = lda
1344 ELSE
1345*
1346* WORK(IU) is N by N
1347*
1348 ldwrku = n
1349 END IF
1350 itau = iu + ldwrku*n
1351 iwork = itau + n
1352*
1353* Compute A=Q*R
1354* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1355*
1356 CALL sgeqrf( m, n, a, lda, work( itau ),
1357 $ work( iwork ), lwork-iwork+1, ierr )
1358*
1359* Copy R to WORK(IU), zeroing out below it
1360*
1361 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1362 $ ldwrku )
1363 CALL slaset( 'L', n-1, n-1, zero, zero,
1364 $ work( iu+1 ), ldwrku )
1365*
1366* Generate Q in A
1367* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1368*
1369 CALL sorgqr( m, n, n, a, lda, work( itau ),
1370 $ work( iwork ), lwork-iwork+1, ierr )
1371 ie = itau
1372 itauq = ie + n
1373 itaup = itauq + n
1374 iwork = itaup + n
1375*
1376* Bidiagonalize R in WORK(IU), copying result to VT
1377* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1378*
1379 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1380 $ work( ie ), work( itauq ),
1381 $ work( itaup ), work( iwork ),
1382 $ lwork-iwork+1, ierr )
1383 CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1384 $ ldvt )
1385*
1386* Generate left bidiagonalizing vectors in WORK(IU)
1387* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1388*
1389 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1390 $ work( itauq ), work( iwork ),
1391 $ lwork-iwork+1, ierr )
1392*
1393* Generate right bidiagonalizing vectors in VT
1394* (Workspace: need N*N+4*N-1,
1395* prefer N*N+3*N+(N-1)*NB)
1396*
1397 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1398 $ work( iwork ), lwork-iwork+1, ierr )
1399 iwork = ie + n
1400*
1401* Perform bidiagonal QR iteration, computing left
1402* singular vectors of R in WORK(IU) and computing
1403* right singular vectors of R in VT
1404* (Workspace: need N*N+BDSPAC)
1405*
1406 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1407 $ ldvt, work( iu ), ldwrku, dum, 1,
1408 $ work( iwork ), info )
1409*
1410* Multiply Q in A by left singular vectors of R in
1411* WORK(IU), storing result in U
1412* (Workspace: need N*N)
1413*
1414 CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1415 $ work( iu ), ldwrku, zero, u, ldu )
1416*
1417 ELSE
1418*
1419* Insufficient workspace for a fast algorithm
1420*
1421 itau = 1
1422 iwork = itau + n
1423*
1424* Compute A=Q*R, copying result to U
1425* (Workspace: need 2*N, prefer N+N*NB)
1426*
1427 CALL sgeqrf( m, n, a, lda, work( itau ),
1428 $ work( iwork ), lwork-iwork+1, ierr )
1429 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1430*
1431* Generate Q in U
1432* (Workspace: need 2*N, prefer N+N*NB)
1433*
1434 CALL sorgqr( m, n, n, u, ldu, work( itau ),
1435 $ work( iwork ), lwork-iwork+1, ierr )
1436*
1437* Copy R to VT, zeroing out below it
1438*
1439 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1440 IF( n.GT.1 )
1441 $ CALL slaset( 'L', n-1, n-1, zero, zero,
1442 $ vt( 2, 1 ), ldvt )
1443 ie = itau
1444 itauq = ie + n
1445 itaup = itauq + n
1446 iwork = itaup + n
1447*
1448* Bidiagonalize R in VT
1449* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1450*
1451 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1452 $ work( itauq ), work( itaup ),
1453 $ work( iwork ), lwork-iwork+1, ierr )
1454*
1455* Multiply Q in U by left bidiagonalizing vectors
1456* in VT
1457* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1458*
1459 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1460 $ work( itauq ), u, ldu, work( iwork ),
1461 $ lwork-iwork+1, ierr )
1462*
1463* Generate right bidiagonalizing vectors in VT
1464* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1465*
1466 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1467 $ work( iwork ), lwork-iwork+1, ierr )
1468 iwork = ie + n
1469*
1470* Perform bidiagonal QR iteration, computing left
1471* singular vectors of A in U and computing right
1472* singular vectors of A in VT
1473* (Workspace: need BDSPAC)
1474*
1475 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1476 $ ldvt, u, ldu, dum, 1, work( iwork ),
1477 $ info )
1478*
1479 END IF
1480*
1481 END IF
1482*
1483 ELSE IF( wntua ) THEN
1484*
1485 IF( wntvn ) THEN
1486*
1487* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1488* M left singular vectors to be computed in U and
1489* no right singular vectors to be computed
1490*
1491 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1492*
1493* Sufficient workspace for a fast algorithm
1494*
1495 ir = 1
1496 IF( lwork.GE.wrkbl+lda*n ) THEN
1497*
1498* WORK(IR) is LDA by N
1499*
1500 ldwrkr = lda
1501 ELSE
1502*
1503* WORK(IR) is N by N
1504*
1505 ldwrkr = n
1506 END IF
1507 itau = ir + ldwrkr*n
1508 iwork = itau + n
1509*
1510* Compute A=Q*R, copying result to U
1511* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1512*
1513 CALL sgeqrf( m, n, a, lda, work( itau ),
1514 $ work( iwork ), lwork-iwork+1, ierr )
1515 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1516*
1517* Copy R to WORK(IR), zeroing out below it
1518*
1519 CALL slacpy( 'U', n, n, a, lda, work( ir ),
1520 $ ldwrkr )
1521 CALL slaset( 'L', n-1, n-1, zero, zero,
1522 $ work( ir+1 ), ldwrkr )
1523*
1524* Generate Q in U
1525* (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1526*
1527 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1528 $ work( iwork ), lwork-iwork+1, ierr )
1529 ie = itau
1530 itauq = ie + n
1531 itaup = itauq + n
1532 iwork = itaup + n
1533*
1534* Bidiagonalize R in WORK(IR)
1535* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1536*
1537 CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1538 $ work( ie ), work( itauq ),
1539 $ work( itaup ), work( iwork ),
1540 $ lwork-iwork+1, ierr )
1541*
1542* Generate left bidiagonalizing vectors in WORK(IR)
1543* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1544*
1545 CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1546 $ work( itauq ), work( iwork ),
1547 $ lwork-iwork+1, ierr )
1548 iwork = ie + n
1549*
1550* Perform bidiagonal QR iteration, computing left
1551* singular vectors of R in WORK(IR)
1552* (Workspace: need N*N+BDSPAC)
1553*
1554 CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1555 $ 1, work( ir ), ldwrkr, dum, 1,
1556 $ work( iwork ), info )
1557*
1558* Multiply Q in U by left singular vectors of R in
1559* WORK(IR), storing result in A
1560* (Workspace: need N*N)
1561*
1562 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1563 $ work( ir ), ldwrkr, zero, a, lda )
1564*
1565* Copy left singular vectors of A from A to U
1566*
1567 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1568*
1569 ELSE
1570*
1571* Insufficient workspace for a fast algorithm
1572*
1573 itau = 1
1574 iwork = itau + n
1575*
1576* Compute A=Q*R, copying result to U
1577* (Workspace: need 2*N, prefer N+N*NB)
1578*
1579 CALL sgeqrf( m, n, a, lda, work( itau ),
1580 $ work( iwork ), lwork-iwork+1, ierr )
1581 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1582*
1583* Generate Q in U
1584* (Workspace: need N+M, prefer N+M*NB)
1585*
1586 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1587 $ work( iwork ), lwork-iwork+1, ierr )
1588 ie = itau
1589 itauq = ie + n
1590 itaup = itauq + n
1591 iwork = itaup + n
1592*
1593* Zero out below R in A
1594*
1595 IF( n .GT. 1 ) THEN
1596 CALL slaset( 'L', n-1, n-1, zero, zero,
1597 $ a( 2, 1 ), lda )
1598 END IF
1599*
1600* Bidiagonalize R in A
1601* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1602*
1603 CALL sgebrd( n, n, a, lda, s, work( ie ),
1604 $ work( itauq ), work( itaup ),
1605 $ work( iwork ), lwork-iwork+1, ierr )
1606*
1607* Multiply Q in U by left bidiagonalizing vectors
1608* in A
1609* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1610*
1611 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1612 $ work( itauq ), u, ldu, work( iwork ),
1613 $ lwork-iwork+1, ierr )
1614 iwork = ie + n
1615*
1616* Perform bidiagonal QR iteration, computing left
1617* singular vectors of A in U
1618* (Workspace: need BDSPAC)
1619*
1620 CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1621 $ 1, u, ldu, dum, 1, work( iwork ),
1622 $ info )
1623*
1624 END IF
1625*
1626 ELSE IF( wntvo ) THEN
1627*
1628* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1629* M left singular vectors to be computed in U and
1630* N right singular vectors to be overwritten on A
1631*
1632 IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1633*
1634* Sufficient workspace for a fast algorithm
1635*
1636 iu = 1
1637 IF( lwork.GE.wrkbl+2*lda*n ) THEN
1638*
1639* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1640*
1641 ldwrku = lda
1642 ir = iu + ldwrku*n
1643 ldwrkr = lda
1644 ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1645*
1646* WORK(IU) is LDA by N and WORK(IR) is N by N
1647*
1648 ldwrku = lda
1649 ir = iu + ldwrku*n
1650 ldwrkr = n
1651 ELSE
1652*
1653* WORK(IU) is N by N and WORK(IR) is N by N
1654*
1655 ldwrku = n
1656 ir = iu + ldwrku*n
1657 ldwrkr = n
1658 END IF
1659 itau = ir + ldwrkr*n
1660 iwork = itau + n
1661*
1662* Compute A=Q*R, copying result to U
1663* (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1664*
1665 CALL sgeqrf( m, n, a, lda, work( itau ),
1666 $ work( iwork ), lwork-iwork+1, ierr )
1667 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1668*
1669* Generate Q in U
1670* (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1671*
1672 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1673 $ work( iwork ), lwork-iwork+1, ierr )
1674*
1675* Copy R to WORK(IU), zeroing out below it
1676*
1677 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1678 $ ldwrku )
1679 CALL slaset( 'L', n-1, n-1, zero, zero,
1680 $ work( iu+1 ), ldwrku )
1681 ie = itau
1682 itauq = ie + n
1683 itaup = itauq + n
1684 iwork = itaup + n
1685*
1686* Bidiagonalize R in WORK(IU), copying result to
1687* WORK(IR)
1688* (Workspace: need 2*N*N+4*N,
1689* prefer 2*N*N+3*N+2*N*NB)
1690*
1691 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1692 $ work( ie ), work( itauq ),
1693 $ work( itaup ), work( iwork ),
1694 $ lwork-iwork+1, ierr )
1695 CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1696 $ work( ir ), ldwrkr )
1697*
1698* Generate left bidiagonalizing vectors in WORK(IU)
1699* (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1700*
1701 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1702 $ work( itauq ), work( iwork ),
1703 $ lwork-iwork+1, ierr )
1704*
1705* Generate right bidiagonalizing vectors in WORK(IR)
1706* (Workspace: need 2*N*N+4*N-1,
1707* prefer 2*N*N+3*N+(N-1)*NB)
1708*
1709 CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1710 $ work( itaup ), work( iwork ),
1711 $ lwork-iwork+1, ierr )
1712 iwork = ie + n
1713*
1714* Perform bidiagonal QR iteration, computing left
1715* singular vectors of R in WORK(IU) and computing
1716* right singular vectors of R in WORK(IR)
1717* (Workspace: need 2*N*N+BDSPAC)
1718*
1719 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1720 $ work( ir ), ldwrkr, work( iu ),
1721 $ ldwrku, dum, 1, work( iwork ), info )
1722*
1723* Multiply Q in U by left singular vectors of R in
1724* WORK(IU), storing result in A
1725* (Workspace: need N*N)
1726*
1727 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1728 $ work( iu ), ldwrku, zero, a, lda )
1729*
1730* Copy left singular vectors of A from A to U
1731*
1732 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1733*
1734* Copy right singular vectors of R from WORK(IR) to A
1735*
1736 CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1737 $ lda )
1738*
1739 ELSE
1740*
1741* Insufficient workspace for a fast algorithm
1742*
1743 itau = 1
1744 iwork = itau + n
1745*
1746* Compute A=Q*R, copying result to U
1747* (Workspace: need 2*N, prefer N+N*NB)
1748*
1749 CALL sgeqrf( m, n, a, lda, work( itau ),
1750 $ work( iwork ), lwork-iwork+1, ierr )
1751 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1752*
1753* Generate Q in U
1754* (Workspace: need N+M, prefer N+M*NB)
1755*
1756 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1757 $ work( iwork ), lwork-iwork+1, ierr )
1758 ie = itau
1759 itauq = ie + n
1760 itaup = itauq + n
1761 iwork = itaup + n
1762*
1763* Zero out below R in A
1764*
1765 IF( n .GT. 1 ) THEN
1766 CALL slaset( 'L', n-1, n-1, zero, zero,
1767 $ a( 2, 1 ), lda )
1768 END IF
1769*
1770* Bidiagonalize R in A
1771* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1772*
1773 CALL sgebrd( n, n, a, lda, s, work( ie ),
1774 $ work( itauq ), work( itaup ),
1775 $ work( iwork ), lwork-iwork+1, ierr )
1776*
1777* Multiply Q in U by left bidiagonalizing vectors
1778* in A
1779* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1780*
1781 CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1782 $ work( itauq ), u, ldu, work( iwork ),
1783 $ lwork-iwork+1, ierr )
1784*
1785* Generate right bidiagonalizing vectors in A
1786* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1787*
1788 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1789 $ work( iwork ), lwork-iwork+1, ierr )
1790 iwork = ie + n
1791*
1792* Perform bidiagonal QR iteration, computing left
1793* singular vectors of A in U and computing right
1794* singular vectors of A in A
1795* (Workspace: need BDSPAC)
1796*
1797 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1798 $ lda, u, ldu, dum, 1, work( iwork ),
1799 $ info )
1800*
1801 END IF
1802*
1803 ELSE IF( wntvas ) THEN
1804*
1805* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1806* or 'A')
1807* M left singular vectors to be computed in U and
1808* N right singular vectors to be computed in VT
1809*
1810 IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1811*
1812* Sufficient workspace for a fast algorithm
1813*
1814 iu = 1
1815 IF( lwork.GE.wrkbl+lda*n ) THEN
1816*
1817* WORK(IU) is LDA by N
1818*
1819 ldwrku = lda
1820 ELSE
1821*
1822* WORK(IU) is N by N
1823*
1824 ldwrku = n
1825 END IF
1826 itau = iu + ldwrku*n
1827 iwork = itau + n
1828*
1829* Compute A=Q*R, copying result to U
1830* (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1831*
1832 CALL sgeqrf( m, n, a, lda, work( itau ),
1833 $ work( iwork ), lwork-iwork+1, ierr )
1834 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1835*
1836* Generate Q in U
1837* (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1838*
1839 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1840 $ work( iwork ), lwork-iwork+1, ierr )
1841*
1842* Copy R to WORK(IU), zeroing out below it
1843*
1844 CALL slacpy( 'U', n, n, a, lda, work( iu ),
1845 $ ldwrku )
1846 CALL slaset( 'L', n-1, n-1, zero, zero,
1847 $ work( iu+1 ), ldwrku )
1848 ie = itau
1849 itauq = ie + n
1850 itaup = itauq + n
1851 iwork = itaup + n
1852*
1853* Bidiagonalize R in WORK(IU), copying result to VT
1854* (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1855*
1856 CALL sgebrd( n, n, work( iu ), ldwrku, s,
1857 $ work( ie ), work( itauq ),
1858 $ work( itaup ), work( iwork ),
1859 $ lwork-iwork+1, ierr )
1860 CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1861 $ ldvt )
1862*
1863* Generate left bidiagonalizing vectors in WORK(IU)
1864* (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1865*
1866 CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1867 $ work( itauq ), work( iwork ),
1868 $ lwork-iwork+1, ierr )
1869*
1870* Generate right bidiagonalizing vectors in VT
1871* (Workspace: need N*N+4*N-1,
1872* prefer N*N+3*N+(N-1)*NB)
1873*
1874 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1875 $ work( iwork ), lwork-iwork+1, ierr )
1876 iwork = ie + n
1877*
1878* Perform bidiagonal QR iteration, computing left
1879* singular vectors of R in WORK(IU) and computing
1880* right singular vectors of R in VT
1881* (Workspace: need N*N+BDSPAC)
1882*
1883 CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1884 $ ldvt, work( iu ), ldwrku, dum, 1,
1885 $ work( iwork ), info )
1886*
1887* Multiply Q in U by left singular vectors of R in
1888* WORK(IU), storing result in A
1889* (Workspace: need N*N)
1890*
1891 CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1892 $ work( iu ), ldwrku, zero, a, lda )
1893*
1894* Copy left singular vectors of A from A to U
1895*
1896 CALL slacpy( 'F', m, n, a, lda, u, ldu )
1897*
1898 ELSE
1899*
1900* Insufficient workspace for a fast algorithm
1901*
1902 itau = 1
1903 iwork = itau + n
1904*
1905* Compute A=Q*R, copying result to U
1906* (Workspace: need 2*N, prefer N+N*NB)
1907*
1908 CALL sgeqrf( m, n, a, lda, work( itau ),
1909 $ work( iwork ), lwork-iwork+1, ierr )
1910 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1911*
1912* Generate Q in U
1913* (Workspace: need N+M, prefer N+M*NB)
1914*
1915 CALL sorgqr( m, m, n, u, ldu, work( itau ),
1916 $ work( iwork ), lwork-iwork+1, ierr )
1917*
1918* Copy R from A to VT, zeroing out below it
1919*
1920 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1921 IF( n.GT.1 )
1922 $ CALL slaset( 'L', n-1, n-1, zero, zero,
1923 $ vt( 2, 1 ), ldvt )
1924 ie = itau
1925 itauq = ie + n
1926 itaup = itauq + n
1927 iwork = itaup + n
1928*
1929* Bidiagonalize R in VT
1930* (Workspace: need 4*N, prefer 3*N+2*N*NB)
1931*
1932 CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1933 $ work( itauq ), work( itaup ),
1934 $ work( iwork ), lwork-iwork+1, ierr )
1935*
1936* Multiply Q in U by left bidiagonalizing vectors
1937* in VT
1938* (Workspace: need 3*N+M, prefer 3*N+M*NB)
1939*
1940 CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1941 $ work( itauq ), u, ldu, work( iwork ),
1942 $ lwork-iwork+1, ierr )
1943*
1944* Generate right bidiagonalizing vectors in VT
1945* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1946*
1947 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1948 $ work( iwork ), lwork-iwork+1, ierr )
1949 iwork = ie + n
1950*
1951* Perform bidiagonal QR iteration, computing left
1952* singular vectors of A in U and computing right
1953* singular vectors of A in VT
1954* (Workspace: need BDSPAC)
1955*
1956 CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1957 $ ldvt, u, ldu, dum, 1, work( iwork ),
1958 $ info )
1959*
1960 END IF
1961*
1962 END IF
1963*
1964 END IF
1965*
1966 ELSE
1967*
1968* M .LT. MNTHR
1969*
1970* Path 10 (M at least N, but not much larger)
1971* Reduce to bidiagonal form without QR decomposition
1972*
1973 ie = 1
1974 itauq = ie + n
1975 itaup = itauq + n
1976 iwork = itaup + n
1977*
1978* Bidiagonalize A
1979* (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1980*
1981 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1982 $ work( itaup ), work( iwork ), lwork-iwork+1,
1983 $ ierr )
1984 IF( wntuas ) THEN
1985*
1986* If left singular vectors desired in U, copy result to U
1987* and generate left bidiagonalizing vectors in U
1988* (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1989*
1990 CALL slacpy( 'L', m, n, a, lda, u, ldu )
1991 IF( wntus )
1992 $ ncu = n
1993 IF( wntua )
1994 $ ncu = m
1995 CALL sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1996 $ work( iwork ), lwork-iwork+1, ierr )
1997 END IF
1998 IF( wntvas ) THEN
1999*
2000* If right singular vectors desired in VT, copy result to
2001* VT and generate right bidiagonalizing vectors in VT
2002* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2003*
2004 CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
2005 CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2006 $ work( iwork ), lwork-iwork+1, ierr )
2007 END IF
2008 IF( wntuo ) THEN
2009*
2010* If left singular vectors desired in A, generate left
2011* bidiagonalizing vectors in A
2012* (Workspace: need 4*N, prefer 3*N+N*NB)
2013*
2014 CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2015 $ work( iwork ), lwork-iwork+1, ierr )
2016 END IF
2017 IF( wntvo ) THEN
2018*
2019* If right singular vectors desired in A, generate right
2020* bidiagonalizing vectors in A
2021* (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2022*
2023 CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
2024 $ work( iwork ), lwork-iwork+1, ierr )
2025 END IF
2026 iwork = ie + n
2027 IF( wntuas .OR. wntuo )
2028 $ nru = m
2029 IF( wntun )
2030 $ nru = 0
2031 IF( wntvas .OR. wntvo )
2032 $ ncvt = n
2033 IF( wntvn )
2034 $ ncvt = 0
2035 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2036*
2037* Perform bidiagonal QR iteration, if desired, computing
2038* left singular vectors in U and computing right singular
2039* vectors in VT
2040* (Workspace: need BDSPAC)
2041*
2042 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2043 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2044 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2045*
2046* Perform bidiagonal QR iteration, if desired, computing
2047* left singular vectors in U and computing right singular
2048* vectors in A
2049* (Workspace: need BDSPAC)
2050*
2051 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2052 $ u, ldu, dum, 1, work( iwork ), info )
2053 ELSE
2054*
2055* Perform bidiagonal QR iteration, if desired, computing
2056* left singular vectors in A and computing right singular
2057* vectors in VT
2058* (Workspace: need BDSPAC)
2059*
2060 CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2061 $ ldvt, a, lda, dum, 1, work( iwork ), info )
2062 END IF
2063*
2064 END IF
2065*
2066 ELSE
2067*
2068* A has more columns than rows. If A has sufficiently more
2069* columns than rows, first reduce using the LQ decomposition (if
2070* sufficient workspace available)
2071*
2072 IF( n.GE.mnthr ) THEN
2073*
2074 IF( wntvn ) THEN
2075*
2076* Path 1t(N much larger than M, JOBVT='N')
2077* No right singular vectors to be computed
2078*
2079 itau = 1
2080 iwork = itau + m
2081*
2082* Compute A=L*Q
2083* (Workspace: need 2*M, prefer M+M*NB)
2084*
2085 CALL sgelqf( m, n, a, lda, work( itau ), work( iwork ),
2086 $ lwork-iwork+1, ierr )
2087*
2088* Zero out above L
2089*
2090 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2091 ie = 1
2092 itauq = ie + m
2093 itaup = itauq + m
2094 iwork = itaup + m
2095*
2096* Bidiagonalize L in A
2097* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2098*
2099 CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2100 $ work( itaup ), work( iwork ), lwork-iwork+1,
2101 $ ierr )
2102 IF( wntuo .OR. wntuas ) THEN
2103*
2104* If left singular vectors desired, generate Q
2105* (Workspace: need 4*M, prefer 3*M+M*NB)
2106*
2107 CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2108 $ work( iwork ), lwork-iwork+1, ierr )
2109 END IF
2110 iwork = ie + m
2111 nru = 0
2112 IF( wntuo .OR. wntuas )
2113 $ nru = m
2114*
2115* Perform bidiagonal QR iteration, computing left singular
2116* vectors of A in A if desired
2117* (Workspace: need BDSPAC)
2118*
2119 CALL sbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2120 $ lda, dum, 1, work( iwork ), info )
2121*
2122* If left singular vectors desired in U, copy them there
2123*
2124 IF( wntuas )
2125 $ CALL slacpy( 'F', m, m, a, lda, u, ldu )
2126*
2127 ELSE IF( wntvo .AND. wntun ) THEN
2128*
2129* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2130* M right singular vectors to be overwritten on A and
2131* no left singular vectors to be computed
2132*
2133 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2134*
2135* Sufficient workspace for a fast algorithm
2136*
2137 ir = 1
2138 IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2139*
2140* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2141*
2142 ldwrku = lda
2143 chunk = n
2144 ldwrkr = lda
2145 ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2146*
2147* WORK(IU) is LDA by N and WORK(IR) is M by M
2148*
2149 ldwrku = lda
2150 chunk = n
2151 ldwrkr = m
2152 ELSE
2153*
2154* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2155*
2156 ldwrku = m
2157 chunk = ( lwork-m*m-m ) / m
2158 ldwrkr = m
2159 END IF
2160 itau = ir + ldwrkr*m
2161 iwork = itau + m
2162*
2163* Compute A=L*Q
2164* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2165*
2166 CALL sgelqf( m, n, a, lda, work( itau ),
2167 $ work( iwork ), lwork-iwork+1, ierr )
2168*
2169* Copy L to WORK(IR) and zero out above it
2170*
2171 CALL slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2172 CALL slaset( 'U', m-1, m-1, zero, zero,
2173 $ work( ir+ldwrkr ), ldwrkr )
2174*
2175* Generate Q in A
2176* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2177*
2178 CALL sorglq( m, n, m, a, lda, work( itau ),
2179 $ work( iwork ), lwork-iwork+1, ierr )
2180 ie = itau
2181 itauq = ie + m
2182 itaup = itauq + m
2183 iwork = itaup + m
2184*
2185* Bidiagonalize L in WORK(IR)
2186* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2187*
2188 CALL sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2189 $ work( itauq ), work( itaup ),
2190 $ work( iwork ), lwork-iwork+1, ierr )
2191*
2192* Generate right vectors bidiagonalizing L
2193* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2194*
2195 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2196 $ work( itaup ), work( iwork ),
2197 $ lwork-iwork+1, ierr )
2198 iwork = ie + m
2199*
2200* Perform bidiagonal QR iteration, computing right
2201* singular vectors of L in WORK(IR)
2202* (Workspace: need M*M+BDSPAC)
2203*
2204 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2205 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2206 $ work( iwork ), info )
2207 iu = ie + m
2208*
2209* Multiply right singular vectors of L in WORK(IR) by Q
2210* in A, storing result in WORK(IU) and copying to A
2211* (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2212*
2213 DO 30 i = 1, n, chunk
2214 blk = min( n-i+1, chunk )
2215 CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2216 $ ldwrkr, a( 1, i ), lda, zero,
2217 $ work( iu ), ldwrku )
2218 CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2219 $ a( 1, i ), lda )
2220 30 CONTINUE
2221*
2222 ELSE
2223*
2224* Insufficient workspace for a fast algorithm
2225*
2226 ie = 1
2227 itauq = ie + m
2228 itaup = itauq + m
2229 iwork = itaup + m
2230*
2231* Bidiagonalize A
2232* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2233*
2234 CALL sgebrd( m, n, a, lda, s, work( ie ),
2235 $ work( itauq ), work( itaup ),
2236 $ work( iwork ), lwork-iwork+1, ierr )
2237*
2238* Generate right vectors bidiagonalizing A
2239* (Workspace: need 4*M, prefer 3*M+M*NB)
2240*
2241 CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
2242 $ work( iwork ), lwork-iwork+1, ierr )
2243 iwork = ie + m
2244*
2245* Perform bidiagonal QR iteration, computing right
2246* singular vectors of A in A
2247* (Workspace: need BDSPAC)
2248*
2249 CALL sbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2250 $ dum, 1, dum, 1, work( iwork ), info )
2251*
2252 END IF
2253*
2254 ELSE IF( wntvo .AND. wntuas ) THEN
2255*
2256* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2257* M right singular vectors to be overwritten on A and
2258* M left singular vectors to be computed in U
2259*
2260 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2261*
2262* Sufficient workspace for a fast algorithm
2263*
2264 ir = 1
2265 IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2266*
2267* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2268*
2269 ldwrku = lda
2270 chunk = n
2271 ldwrkr = lda
2272 ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2273*
2274* WORK(IU) is LDA by N and WORK(IR) is M by M
2275*
2276 ldwrku = lda
2277 chunk = n
2278 ldwrkr = m
2279 ELSE
2280*
2281* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2282*
2283 ldwrku = m
2284 chunk = ( lwork-m*m-m ) / m
2285 ldwrkr = m
2286 END IF
2287 itau = ir + ldwrkr*m
2288 iwork = itau + m
2289*
2290* Compute A=L*Q
2291* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2292*
2293 CALL sgelqf( m, n, a, lda, work( itau ),
2294 $ work( iwork ), lwork-iwork+1, ierr )
2295*
2296* Copy L to U, zeroing about above it
2297*
2298 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2299 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2300 $ ldu )
2301*
2302* Generate Q in A
2303* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2304*
2305 CALL sorglq( m, n, m, a, lda, work( itau ),
2306 $ work( iwork ), lwork-iwork+1, ierr )
2307 ie = itau
2308 itauq = ie + m
2309 itaup = itauq + m
2310 iwork = itaup + m
2311*
2312* Bidiagonalize L in U, copying result to WORK(IR)
2313* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2314*
2315 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2316 $ work( itauq ), work( itaup ),
2317 $ work( iwork ), lwork-iwork+1, ierr )
2318 CALL slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2319*
2320* Generate right vectors bidiagonalizing L in WORK(IR)
2321* (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2322*
2323 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2324 $ work( itaup ), work( iwork ),
2325 $ lwork-iwork+1, ierr )
2326*
2327* Generate left vectors bidiagonalizing L in U
2328* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2329*
2330 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2331 $ work( iwork ), lwork-iwork+1, ierr )
2332 iwork = ie + m
2333*
2334* Perform bidiagonal QR iteration, computing left
2335* singular vectors of L in U, and computing right
2336* singular vectors of L in WORK(IR)
2337* (Workspace: need M*M+BDSPAC)
2338*
2339 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2340 $ work( ir ), ldwrkr, u, ldu, dum, 1,
2341 $ work( iwork ), info )
2342 iu = ie + m
2343*
2344* Multiply right singular vectors of L in WORK(IR) by Q
2345* in A, storing result in WORK(IU) and copying to A
2346* (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2347*
2348 DO 40 i = 1, n, chunk
2349 blk = min( n-i+1, chunk )
2350 CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2351 $ ldwrkr, a( 1, i ), lda, zero,
2352 $ work( iu ), ldwrku )
2353 CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2354 $ a( 1, i ), lda )
2355 40 CONTINUE
2356*
2357 ELSE
2358*
2359* Insufficient workspace for a fast algorithm
2360*
2361 itau = 1
2362 iwork = itau + m
2363*
2364* Compute A=L*Q
2365* (Workspace: need 2*M, prefer M+M*NB)
2366*
2367 CALL sgelqf( m, n, a, lda, work( itau ),
2368 $ work( iwork ), lwork-iwork+1, ierr )
2369*
2370* Copy L to U, zeroing out above it
2371*
2372 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2373 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2374 $ ldu )
2375*
2376* Generate Q in A
2377* (Workspace: need 2*M, prefer M+M*NB)
2378*
2379 CALL sorglq( m, n, m, a, lda, work( itau ),
2380 $ work( iwork ), lwork-iwork+1, ierr )
2381 ie = itau
2382 itauq = ie + m
2383 itaup = itauq + m
2384 iwork = itaup + m
2385*
2386* Bidiagonalize L in U
2387* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2388*
2389 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2390 $ work( itauq ), work( itaup ),
2391 $ work( iwork ), lwork-iwork+1, ierr )
2392*
2393* Multiply right vectors bidiagonalizing L by Q in A
2394* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2395*
2396 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2397 $ work( itaup ), a, lda, work( iwork ),
2398 $ lwork-iwork+1, ierr )
2399*
2400* Generate left vectors bidiagonalizing L in U
2401* (Workspace: need 4*M, prefer 3*M+M*NB)
2402*
2403 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2404 $ work( iwork ), lwork-iwork+1, ierr )
2405 iwork = ie + m
2406*
2407* Perform bidiagonal QR iteration, computing left
2408* singular vectors of A in U and computing right
2409* singular vectors of A in A
2410* (Workspace: need BDSPAC)
2411*
2412 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2413 $ u, ldu, dum, 1, work( iwork ), info )
2414*
2415 END IF
2416*
2417 ELSE IF( wntvs ) THEN
2418*
2419 IF( wntun ) THEN
2420*
2421* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2422* M right singular vectors to be computed in VT and
2423* no left singular vectors to be computed
2424*
2425 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2426*
2427* Sufficient workspace for a fast algorithm
2428*
2429 ir = 1
2430 IF( lwork.GE.wrkbl+lda*m ) THEN
2431*
2432* WORK(IR) is LDA by M
2433*
2434 ldwrkr = lda
2435 ELSE
2436*
2437* WORK(IR) is M by M
2438*
2439 ldwrkr = m
2440 END IF
2441 itau = ir + ldwrkr*m
2442 iwork = itau + m
2443*
2444* Compute A=L*Q
2445* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2446*
2447 CALL sgelqf( m, n, a, lda, work( itau ),
2448 $ work( iwork ), lwork-iwork+1, ierr )
2449*
2450* Copy L to WORK(IR), zeroing out above it
2451*
2452 CALL slacpy( 'L', m, m, a, lda, work( ir ),
2453 $ ldwrkr )
2454 CALL slaset( 'U', m-1, m-1, zero, zero,
2455 $ work( ir+ldwrkr ), ldwrkr )
2456*
2457* Generate Q in A
2458* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2459*
2460 CALL sorglq( m, n, m, a, lda, work( itau ),
2461 $ work( iwork ), lwork-iwork+1, ierr )
2462 ie = itau
2463 itauq = ie + m
2464 itaup = itauq + m
2465 iwork = itaup + m
2466*
2467* Bidiagonalize L in WORK(IR)
2468* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2469*
2470 CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2471 $ work( ie ), work( itauq ),
2472 $ work( itaup ), work( iwork ),
2473 $ lwork-iwork+1, ierr )
2474*
2475* Generate right vectors bidiagonalizing L in
2476* WORK(IR)
2477* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2478*
2479 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2480 $ work( itaup ), work( iwork ),
2481 $ lwork-iwork+1, ierr )
2482 iwork = ie + m
2483*
2484* Perform bidiagonal QR iteration, computing right
2485* singular vectors of L in WORK(IR)
2486* (Workspace: need M*M+BDSPAC)
2487*
2488 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2489 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2490 $ work( iwork ), info )
2491*
2492* Multiply right singular vectors of L in WORK(IR) by
2493* Q in A, storing result in VT
2494* (Workspace: need M*M)
2495*
2496 CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2497 $ ldwrkr, a, lda, zero, vt, ldvt )
2498*
2499 ELSE
2500*
2501* Insufficient workspace for a fast algorithm
2502*
2503 itau = 1
2504 iwork = itau + m
2505*
2506* Compute A=L*Q
2507* (Workspace: need 2*M, prefer M+M*NB)
2508*
2509 CALL sgelqf( m, n, a, lda, work( itau ),
2510 $ work( iwork ), lwork-iwork+1, ierr )
2511*
2512* Copy result to VT
2513*
2514 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2515*
2516* Generate Q in VT
2517* (Workspace: need 2*M, prefer M+M*NB)
2518*
2519 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2520 $ work( iwork ), lwork-iwork+1, ierr )
2521 ie = itau
2522 itauq = ie + m
2523 itaup = itauq + m
2524 iwork = itaup + m
2525*
2526* Zero out above L in A
2527*
2528 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2529 $ lda )
2530*
2531* Bidiagonalize L in A
2532* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2533*
2534 CALL sgebrd( m, m, a, lda, s, work( ie ),
2535 $ work( itauq ), work( itaup ),
2536 $ work( iwork ), lwork-iwork+1, ierr )
2537*
2538* Multiply right vectors bidiagonalizing L by Q in VT
2539* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2540*
2541 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2542 $ work( itaup ), vt, ldvt,
2543 $ work( iwork ), lwork-iwork+1, ierr )
2544 iwork = ie + m
2545*
2546* Perform bidiagonal QR iteration, computing right
2547* singular vectors of A in VT
2548* (Workspace: need BDSPAC)
2549*
2550 CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2551 $ ldvt, dum, 1, dum, 1, work( iwork ),
2552 $ info )
2553*
2554 END IF
2555*
2556 ELSE IF( wntuo ) THEN
2557*
2558* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2559* M right singular vectors to be computed in VT and
2560* M left singular vectors to be overwritten on A
2561*
2562 IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2563*
2564* Sufficient workspace for a fast algorithm
2565*
2566 iu = 1
2567 IF( lwork.GE.wrkbl+2*lda*m ) THEN
2568*
2569* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2570*
2571 ldwrku = lda
2572 ir = iu + ldwrku*m
2573 ldwrkr = lda
2574 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2575*
2576* WORK(IU) is LDA by M and WORK(IR) is M by M
2577*
2578 ldwrku = lda
2579 ir = iu + ldwrku*m
2580 ldwrkr = m
2581 ELSE
2582*
2583* WORK(IU) is M by M and WORK(IR) is M by M
2584*
2585 ldwrku = m
2586 ir = iu + ldwrku*m
2587 ldwrkr = m
2588 END IF
2589 itau = ir + ldwrkr*m
2590 iwork = itau + m
2591*
2592* Compute A=L*Q
2593* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2594*
2595 CALL sgelqf( m, n, a, lda, work( itau ),
2596 $ work( iwork ), lwork-iwork+1, ierr )
2597*
2598* Copy L to WORK(IU), zeroing out below it
2599*
2600 CALL slacpy( 'L', m, m, a, lda, work( iu ),
2601 $ ldwrku )
2602 CALL slaset( 'U', m-1, m-1, zero, zero,
2603 $ work( iu+ldwrku ), ldwrku )
2604*
2605* Generate Q in A
2606* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2607*
2608 CALL sorglq( m, n, m, a, lda, work( itau ),
2609 $ work( iwork ), lwork-iwork+1, ierr )
2610 ie = itau
2611 itauq = ie + m
2612 itaup = itauq + m
2613 iwork = itaup + m
2614*
2615* Bidiagonalize L in WORK(IU), copying result to
2616* WORK(IR)
2617* (Workspace: need 2*M*M+4*M,
2618* prefer 2*M*M+3*M+2*M*NB)
2619*
2620 CALL sgebrd( m, m, work( iu ), ldwrku, s,
2621 $ work( ie ), work( itauq ),
2622 $ work( itaup ), work( iwork ),
2623 $ lwork-iwork+1, ierr )
2624 CALL slacpy( 'L', m, m, work( iu ), ldwrku,
2625 $ work( ir ), ldwrkr )
2626*
2627* Generate right bidiagonalizing vectors in WORK(IU)
2628* (Workspace: need 2*M*M+4*M-1,
2629* prefer 2*M*M+3*M+(M-1)*NB)
2630*
2631 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2632 $ work( itaup ), work( iwork ),
2633 $ lwork-iwork+1, ierr )
2634*
2635* Generate left bidiagonalizing vectors in WORK(IR)
2636* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2637*
2638 CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2639 $ work( itauq ), work( iwork ),
2640 $ lwork-iwork+1, ierr )
2641 iwork = ie + m
2642*
2643* Perform bidiagonal QR iteration, computing left
2644* singular vectors of L in WORK(IR) and computing
2645* right singular vectors of L in WORK(IU)
2646* (Workspace: need 2*M*M+BDSPAC)
2647*
2648 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2649 $ work( iu ), ldwrku, work( ir ),
2650 $ ldwrkr, dum, 1, work( iwork ), info )
2651*
2652* Multiply right singular vectors of L in WORK(IU) by
2653* Q in A, storing result in VT
2654* (Workspace: need M*M)
2655*
2656 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2657 $ ldwrku, a, lda, zero, vt, ldvt )
2658*
2659* Copy left singular vectors of L to A
2660* (Workspace: need M*M)
2661*
2662 CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
2663 $ lda )
2664*
2665 ELSE
2666*
2667* Insufficient workspace for a fast algorithm
2668*
2669 itau = 1
2670 iwork = itau + m
2671*
2672* Compute A=L*Q, copying result to VT
2673* (Workspace: need 2*M, prefer M+M*NB)
2674*
2675 CALL sgelqf( m, n, a, lda, work( itau ),
2676 $ work( iwork ), lwork-iwork+1, ierr )
2677 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2678*
2679* Generate Q in VT
2680* (Workspace: need 2*M, prefer M+M*NB)
2681*
2682 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2683 $ work( iwork ), lwork-iwork+1, ierr )
2684 ie = itau
2685 itauq = ie + m
2686 itaup = itauq + m
2687 iwork = itaup + m
2688*
2689* Zero out above L in A
2690*
2691 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2692 $ lda )
2693*
2694* Bidiagonalize L in A
2695* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2696*
2697 CALL sgebrd( m, m, a, lda, s, work( ie ),
2698 $ work( itauq ), work( itaup ),
2699 $ work( iwork ), lwork-iwork+1, ierr )
2700*
2701* Multiply right vectors bidiagonalizing L by Q in VT
2702* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2703*
2704 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2705 $ work( itaup ), vt, ldvt,
2706 $ work( iwork ), lwork-iwork+1, ierr )
2707*
2708* Generate left bidiagonalizing vectors of L in A
2709* (Workspace: need 4*M, prefer 3*M+M*NB)
2710*
2711 CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2712 $ work( iwork ), lwork-iwork+1, ierr )
2713 iwork = ie + m
2714*
2715* Perform bidiagonal QR iteration, compute left
2716* singular vectors of A in A and compute right
2717* singular vectors of A in VT
2718* (Workspace: need BDSPAC)
2719*
2720 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2721 $ ldvt, a, lda, dum, 1, work( iwork ),
2722 $ info )
2723*
2724 END IF
2725*
2726 ELSE IF( wntuas ) THEN
2727*
2728* Path 6t(N much larger than M, JOBU='S' or 'A',
2729* JOBVT='S')
2730* M right singular vectors to be computed in VT and
2731* M left singular vectors to be computed in U
2732*
2733 IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2734*
2735* Sufficient workspace for a fast algorithm
2736*
2737 iu = 1
2738 IF( lwork.GE.wrkbl+lda*m ) THEN
2739*
2740* WORK(IU) is LDA by N
2741*
2742 ldwrku = lda
2743 ELSE
2744*
2745* WORK(IU) is LDA by M
2746*
2747 ldwrku = m
2748 END IF
2749 itau = iu + ldwrku*m
2750 iwork = itau + m
2751*
2752* Compute A=L*Q
2753* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2754*
2755 CALL sgelqf( m, n, a, lda, work( itau ),
2756 $ work( iwork ), lwork-iwork+1, ierr )
2757*
2758* Copy L to WORK(IU), zeroing out above it
2759*
2760 CALL slacpy( 'L', m, m, a, lda, work( iu ),
2761 $ ldwrku )
2762 CALL slaset( 'U', m-1, m-1, zero, zero,
2763 $ work( iu+ldwrku ), ldwrku )
2764*
2765* Generate Q in A
2766* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2767*
2768 CALL sorglq( m, n, m, a, lda, work( itau ),
2769 $ work( iwork ), lwork-iwork+1, ierr )
2770 ie = itau
2771 itauq = ie + m
2772 itaup = itauq + m
2773 iwork = itaup + m
2774*
2775* Bidiagonalize L in WORK(IU), copying result to U
2776* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2777*
2778 CALL sgebrd( m, m, work( iu ), ldwrku, s,
2779 $ work( ie ), work( itauq ),
2780 $ work( itaup ), work( iwork ),
2781 $ lwork-iwork+1, ierr )
2782 CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
2783 $ ldu )
2784*
2785* Generate right bidiagonalizing vectors in WORK(IU)
2786* (Workspace: need M*M+4*M-1,
2787* prefer M*M+3*M+(M-1)*NB)
2788*
2789 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2790 $ work( itaup ), work( iwork ),
2791 $ lwork-iwork+1, ierr )
2792*
2793* Generate left bidiagonalizing vectors in U
2794* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2795*
2796 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2797 $ work( iwork ), lwork-iwork+1, ierr )
2798 iwork = ie + m
2799*
2800* Perform bidiagonal QR iteration, computing left
2801* singular vectors of L in U and computing right
2802* singular vectors of L in WORK(IU)
2803* (Workspace: need M*M+BDSPAC)
2804*
2805 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2806 $ work( iu ), ldwrku, u, ldu, dum, 1,
2807 $ work( iwork ), info )
2808*
2809* Multiply right singular vectors of L in WORK(IU) by
2810* Q in A, storing result in VT
2811* (Workspace: need M*M)
2812*
2813 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2814 $ ldwrku, a, lda, zero, vt, ldvt )
2815*
2816 ELSE
2817*
2818* Insufficient workspace for a fast algorithm
2819*
2820 itau = 1
2821 iwork = itau + m
2822*
2823* Compute A=L*Q, copying result to VT
2824* (Workspace: need 2*M, prefer M+M*NB)
2825*
2826 CALL sgelqf( m, n, a, lda, work( itau ),
2827 $ work( iwork ), lwork-iwork+1, ierr )
2828 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2829*
2830* Generate Q in VT
2831* (Workspace: need 2*M, prefer M+M*NB)
2832*
2833 CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2834 $ work( iwork ), lwork-iwork+1, ierr )
2835*
2836* Copy L to U, zeroing out above it
2837*
2838 CALL slacpy( 'L', m, m, a, lda, u, ldu )
2839 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2840 $ ldu )
2841 ie = itau
2842 itauq = ie + m
2843 itaup = itauq + m
2844 iwork = itaup + m
2845*
2846* Bidiagonalize L in U
2847* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2848*
2849 CALL sgebrd( m, m, u, ldu, s, work( ie ),
2850 $ work( itauq ), work( itaup ),
2851 $ work( iwork ), lwork-iwork+1, ierr )
2852*
2853* Multiply right bidiagonalizing vectors in U by Q
2854* in VT
2855* (Workspace: need 3*M+N, prefer 3*M+N*NB)
2856*
2857 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2858 $ work( itaup ), vt, ldvt,
2859 $ work( iwork ), lwork-iwork+1, ierr )
2860*
2861* Generate left bidiagonalizing vectors in U
2862* (Workspace: need 4*M, prefer 3*M+M*NB)
2863*
2864 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2865 $ work( iwork ), lwork-iwork+1, ierr )
2866 iwork = ie + m
2867*
2868* Perform bidiagonal QR iteration, computing left
2869* singular vectors of A in U and computing right
2870* singular vectors of A in VT
2871* (Workspace: need BDSPAC)
2872*
2873 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2874 $ ldvt, u, ldu, dum, 1, work( iwork ),
2875 $ info )
2876*
2877 END IF
2878*
2879 END IF
2880*
2881 ELSE IF( wntva ) THEN
2882*
2883 IF( wntun ) THEN
2884*
2885* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2886* N right singular vectors to be computed in VT and
2887* no left singular vectors to be computed
2888*
2889 IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
2890*
2891* Sufficient workspace for a fast algorithm
2892*
2893 ir = 1
2894 IF( lwork.GE.wrkbl+lda*m ) THEN
2895*
2896* WORK(IR) is LDA by M
2897*
2898 ldwrkr = lda
2899 ELSE
2900*
2901* WORK(IR) is M by M
2902*
2903 ldwrkr = m
2904 END IF
2905 itau = ir + ldwrkr*m
2906 iwork = itau + m
2907*
2908* Compute A=L*Q, copying result to VT
2909* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2910*
2911 CALL sgelqf( m, n, a, lda, work( itau ),
2912 $ work( iwork ), lwork-iwork+1, ierr )
2913 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2914*
2915* Copy L to WORK(IR), zeroing out above it
2916*
2917 CALL slacpy( 'L', m, m, a, lda, work( ir ),
2918 $ ldwrkr )
2919 CALL slaset( 'U', m-1, m-1, zero, zero,
2920 $ work( ir+ldwrkr ), ldwrkr )
2921*
2922* Generate Q in VT
2923* (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2924*
2925 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2926 $ work( iwork ), lwork-iwork+1, ierr )
2927 ie = itau
2928 itauq = ie + m
2929 itaup = itauq + m
2930 iwork = itaup + m
2931*
2932* Bidiagonalize L in WORK(IR)
2933* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2934*
2935 CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2936 $ work( ie ), work( itauq ),
2937 $ work( itaup ), work( iwork ),
2938 $ lwork-iwork+1, ierr )
2939*
2940* Generate right bidiagonalizing vectors in WORK(IR)
2941* (Workspace: need M*M+4*M-1,
2942* prefer M*M+3*M+(M-1)*NB)
2943*
2944 CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2945 $ work( itaup ), work( iwork ),
2946 $ lwork-iwork+1, ierr )
2947 iwork = ie + m
2948*
2949* Perform bidiagonal QR iteration, computing right
2950* singular vectors of L in WORK(IR)
2951* (Workspace: need M*M+BDSPAC)
2952*
2953 CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2954 $ work( ir ), ldwrkr, dum, 1, dum, 1,
2955 $ work( iwork ), info )
2956*
2957* Multiply right singular vectors of L in WORK(IR) by
2958* Q in VT, storing result in A
2959* (Workspace: need M*M)
2960*
2961 CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2962 $ ldwrkr, vt, ldvt, zero, a, lda )
2963*
2964* Copy right singular vectors of A from A to VT
2965*
2966 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
2967*
2968 ELSE
2969*
2970* Insufficient workspace for a fast algorithm
2971*
2972 itau = 1
2973 iwork = itau + m
2974*
2975* Compute A=L*Q, copying result to VT
2976* (Workspace: need 2*M, prefer M+M*NB)
2977*
2978 CALL sgelqf( m, n, a, lda, work( itau ),
2979 $ work( iwork ), lwork-iwork+1, ierr )
2980 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2981*
2982* Generate Q in VT
2983* (Workspace: need M+N, prefer M+N*NB)
2984*
2985 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2986 $ work( iwork ), lwork-iwork+1, ierr )
2987 ie = itau
2988 itauq = ie + m
2989 itaup = itauq + m
2990 iwork = itaup + m
2991*
2992* Zero out above L in A
2993*
2994 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2995 $ lda )
2996*
2997* Bidiagonalize L in A
2998* (Workspace: need 4*M, prefer 3*M+2*M*NB)
2999*
3000 CALL sgebrd( m, m, a, lda, s, work( ie ),
3001 $ work( itauq ), work( itaup ),
3002 $ work( iwork ), lwork-iwork+1, ierr )
3003*
3004* Multiply right bidiagonalizing vectors in A by Q
3005* in VT
3006* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3007*
3008 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3009 $ work( itaup ), vt, ldvt,
3010 $ work( iwork ), lwork-iwork+1, ierr )
3011 iwork = ie + m
3012*
3013* Perform bidiagonal QR iteration, computing right
3014* singular vectors of A in VT
3015* (Workspace: need BDSPAC)
3016*
3017 CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3018 $ ldvt, dum, 1, dum, 1, work( iwork ),
3019 $ info )
3020*
3021 END IF
3022*
3023 ELSE IF( wntuo ) THEN
3024*
3025* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3026* N right singular vectors to be computed in VT and
3027* M left singular vectors to be overwritten on A
3028*
3029 IF( lwork.GE.2*m*m+max( n+m, 4*m, bdspac ) ) THEN
3030*
3031* Sufficient workspace for a fast algorithm
3032*
3033 iu = 1
3034 IF( lwork.GE.wrkbl+2*lda*m ) THEN
3035*
3036* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3037*
3038 ldwrku = lda
3039 ir = iu + ldwrku*m
3040 ldwrkr = lda
3041 ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3042*
3043* WORK(IU) is LDA by M and WORK(IR) is M by M
3044*
3045 ldwrku = lda
3046 ir = iu + ldwrku*m
3047 ldwrkr = m
3048 ELSE
3049*
3050* WORK(IU) is M by M and WORK(IR) is M by M
3051*
3052 ldwrku = m
3053 ir = iu + ldwrku*m
3054 ldwrkr = m
3055 END IF
3056 itau = ir + ldwrkr*m
3057 iwork = itau + m
3058*
3059* Compute A=L*Q, copying result to VT
3060* (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3061*
3062 CALL sgelqf( m, n, a, lda, work( itau ),
3063 $ work( iwork ), lwork-iwork+1, ierr )
3064 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3065*
3066* Generate Q in VT
3067* (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3068*
3069 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3070 $ work( iwork ), lwork-iwork+1, ierr )
3071*
3072* Copy L to WORK(IU), zeroing out above it
3073*
3074 CALL slacpy( 'L', m, m, a, lda, work( iu ),
3075 $ ldwrku )
3076 CALL slaset( 'U', m-1, m-1, zero, zero,
3077 $ work( iu+ldwrku ), ldwrku )
3078 ie = itau
3079 itauq = ie + m
3080 itaup = itauq + m
3081 iwork = itaup + m
3082*
3083* Bidiagonalize L in WORK(IU), copying result to
3084* WORK(IR)
3085* (Workspace: need 2*M*M+4*M,
3086* prefer 2*M*M+3*M+2*M*NB)
3087*
3088 CALL sgebrd( m, m, work( iu ), ldwrku, s,
3089 $ work( ie ), work( itauq ),
3090 $ work( itaup ), work( iwork ),
3091 $ lwork-iwork+1, ierr )
3092 CALL slacpy( 'L', m, m, work( iu ), ldwrku,
3093 $ work( ir ), ldwrkr )
3094*
3095* Generate right bidiagonalizing vectors in WORK(IU)
3096* (Workspace: need 2*M*M+4*M-1,
3097* prefer 2*M*M+3*M+(M-1)*NB)
3098*
3099 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3100 $ work( itaup ), work( iwork ),
3101 $ lwork-iwork+1, ierr )
3102*
3103* Generate left bidiagonalizing vectors in WORK(IR)
3104* (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3105*
3106 CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3107 $ work( itauq ), work( iwork ),
3108 $ lwork-iwork+1, ierr )
3109 iwork = ie + m
3110*
3111* Perform bidiagonal QR iteration, computing left
3112* singular vectors of L in WORK(IR) and computing
3113* right singular vectors of L in WORK(IU)
3114* (Workspace: need 2*M*M+BDSPAC)
3115*
3116 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3117 $ work( iu ), ldwrku, work( ir ),
3118 $ ldwrkr, dum, 1, work( iwork ), info )
3119*
3120* Multiply right singular vectors of L in WORK(IU) by
3121* Q in VT, storing result in A
3122* (Workspace: need M*M)
3123*
3124 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3125 $ ldwrku, vt, ldvt, zero, a, lda )
3126*
3127* Copy right singular vectors of A from A to VT
3128*
3129 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3130*
3131* Copy left singular vectors of A from WORK(IR) to A
3132*
3133 CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
3134 $ lda )
3135*
3136 ELSE
3137*
3138* Insufficient workspace for a fast algorithm
3139*
3140 itau = 1
3141 iwork = itau + m
3142*
3143* Compute A=L*Q, copying result to VT
3144* (Workspace: need 2*M, prefer M+M*NB)
3145*
3146 CALL sgelqf( m, n, a, lda, work( itau ),
3147 $ work( iwork ), lwork-iwork+1, ierr )
3148 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3149*
3150* Generate Q in VT
3151* (Workspace: need M+N, prefer M+N*NB)
3152*
3153 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3154 $ work( iwork ), lwork-iwork+1, ierr )
3155 ie = itau
3156 itauq = ie + m
3157 itaup = itauq + m
3158 iwork = itaup + m
3159*
3160* Zero out above L in A
3161*
3162 CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3163 $ lda )
3164*
3165* Bidiagonalize L in A
3166* (Workspace: need 4*M, prefer 3*M+2*M*NB)
3167*
3168 CALL sgebrd( m, m, a, lda, s, work( ie ),
3169 $ work( itauq ), work( itaup ),
3170 $ work( iwork ), lwork-iwork+1, ierr )
3171*
3172* Multiply right bidiagonalizing vectors in A by Q
3173* in VT
3174* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3175*
3176 CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3177 $ work( itaup ), vt, ldvt,
3178 $ work( iwork ), lwork-iwork+1, ierr )
3179*
3180* Generate left bidiagonalizing vectors in A
3181* (Workspace: need 4*M, prefer 3*M+M*NB)
3182*
3183 CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3184 $ work( iwork ), lwork-iwork+1, ierr )
3185 iwork = ie + m
3186*
3187* Perform bidiagonal QR iteration, computing left
3188* singular vectors of A in A and computing right
3189* singular vectors of A in VT
3190* (Workspace: need BDSPAC)
3191*
3192 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3193 $ ldvt, a, lda, dum, 1, work( iwork ),
3194 $ info )
3195*
3196 END IF
3197*
3198 ELSE IF( wntuas ) THEN
3199*
3200* Path 9t(N much larger than M, JOBU='S' or 'A',
3201* JOBVT='A')
3202* N right singular vectors to be computed in VT and
3203* M left singular vectors to be computed in U
3204*
3205 IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
3206*
3207* Sufficient workspace for a fast algorithm
3208*
3209 iu = 1
3210 IF( lwork.GE.wrkbl+lda*m ) THEN
3211*
3212* WORK(IU) is LDA by M
3213*
3214 ldwrku = lda
3215 ELSE
3216*
3217* WORK(IU) is M by M
3218*
3219 ldwrku = m
3220 END IF
3221 itau = iu + ldwrku*m
3222 iwork = itau + m
3223*
3224* Compute A=L*Q, copying result to VT
3225* (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3226*
3227 CALL sgelqf( m, n, a, lda, work( itau ),
3228 $ work( iwork ), lwork-iwork+1, ierr )
3229 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3230*
3231* Generate Q in VT
3232* (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3233*
3234 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3235 $ work( iwork ), lwork-iwork+1, ierr )
3236*
3237* Copy L to WORK(IU), zeroing out above it
3238*
3239 CALL slacpy( 'L', m, m, a, lda, work( iu ),
3240 $ ldwrku )
3241 CALL slaset( 'U', m-1, m-1, zero, zero,
3242 $ work( iu+ldwrku ), ldwrku )
3243 ie = itau
3244 itauq = ie + m
3245 itaup = itauq + m
3246 iwork = itaup + m
3247*
3248* Bidiagonalize L in WORK(IU), copying result to U
3249* (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3250*
3251 CALL sgebrd( m, m, work( iu ), ldwrku, s,
3252 $ work( ie ), work( itauq ),
3253 $ work( itaup ), work( iwork ),
3254 $ lwork-iwork+1, ierr )
3255 CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
3256 $ ldu )
3257*
3258* Generate right bidiagonalizing vectors in WORK(IU)
3259* (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3260*
3261 CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3262 $ work( itaup ), work( iwork ),
3263 $ lwork-iwork+1, ierr )
3264*
3265* Generate left bidiagonalizing vectors in U
3266* (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3267*
3268 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3269 $ work( iwork ), lwork-iwork+1, ierr )
3270 iwork = ie + m
3271*
3272* Perform bidiagonal QR iteration, computing left
3273* singular vectors of L in U and computing right
3274* singular vectors of L in WORK(IU)
3275* (Workspace: need M*M+BDSPAC)
3276*
3277 CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3278 $ work( iu ), ldwrku, u, ldu, dum, 1,
3279 $ work( iwork ), info )
3280*
3281* Multiply right singular vectors of L in WORK(IU) by
3282* Q in VT, storing result in A
3283* (Workspace: need M*M)
3284*
3285 CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3286 $ ldwrku, vt, ldvt, zero, a, lda )
3287*
3288* Copy right singular vectors of A from A to VT
3289*
3290 CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3291*
3292 ELSE
3293*
3294* Insufficient workspace for a fast algorithm
3295*
3296 itau = 1
3297 iwork = itau + m
3298*
3299* Compute A=L*Q, copying result to VT
3300* (Workspace: need 2*M, prefer M+M*NB)
3301*
3302 CALL sgelqf( m, n, a, lda, work( itau ),
3303 $ work( iwork ), lwork-iwork+1, ierr )
3304 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3305*
3306* Generate Q in VT
3307* (Workspace: need M+N, prefer M+N*NB)
3308*
3309 CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3310 $ work( iwork ), lwork-iwork+1, ierr )
3311*
3312* Copy L to U, zeroing out above it
3313*
3314 CALL slacpy( 'L', m, m, a, lda, u, ldu )
3315 CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3316 $ ldu )
3317 ie = itau
3318 itauq = ie + m
3319 itaup = itauq + m
3320 iwork = itaup + m
3321*
3322* Bidiagonalize L in U
3323* (Workspace: need 4*M, prefer 3*M+2*M*NB)
3324*
3325 CALL sgebrd( m, m, u, ldu, s, work( ie ),
3326 $ work( itauq ), work( itaup ),
3327 $ work( iwork ), lwork-iwork+1, ierr )
3328*
3329* Multiply right bidiagonalizing vectors in U by Q
3330* in VT
3331* (Workspace: need 3*M+N, prefer 3*M+N*NB)
3332*
3333 CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3334 $ work( itaup ), vt, ldvt,
3335 $ work( iwork ), lwork-iwork+1, ierr )
3336*
3337* Generate left bidiagonalizing vectors in U
3338* (Workspace: need 4*M, prefer 3*M+M*NB)
3339*
3340 CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3341 $ work( iwork ), lwork-iwork+1, ierr )
3342 iwork = ie + m
3343*
3344* Perform bidiagonal QR iteration, computing left
3345* singular vectors of A in U and computing right
3346* singular vectors of A in VT
3347* (Workspace: need BDSPAC)
3348*
3349 CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3350 $ ldvt, u, ldu, dum, 1, work( iwork ),
3351 $ info )
3352*
3353 END IF
3354*
3355 END IF
3356*
3357 END IF
3358*
3359 ELSE
3360*
3361* N .LT. MNTHR
3362*
3363* Path 10t(N greater than M, but not much larger)
3364* Reduce to bidiagonal form without LQ decomposition
3365*
3366 ie = 1
3367 itauq = ie + m
3368 itaup = itauq + m
3369 iwork = itaup + m
3370*
3371* Bidiagonalize A
3372* (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3373*
3374 CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3375 $ work( itaup ), work( iwork ), lwork-iwork+1,
3376 $ ierr )
3377 IF( wntuas ) THEN
3378*
3379* If left singular vectors desired in U, copy result to U
3380* and generate left bidiagonalizing vectors in U
3381* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3382*
3383 CALL slacpy( 'L', m, m, a, lda, u, ldu )
3384 CALL sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3385 $ work( iwork ), lwork-iwork+1, ierr )
3386 END IF
3387 IF( wntvas ) THEN
3388*
3389* If right singular vectors desired in VT, copy result to
3390* VT and generate right bidiagonalizing vectors in VT
3391* (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3392*
3393 CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3394 IF( wntva )
3395 $ nrvt = n
3396 IF( wntvs )
3397 $ nrvt = m
3398 CALL sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3399 $ work( iwork ), lwork-iwork+1, ierr )
3400 END IF
3401 IF( wntuo ) THEN
3402*
3403* If left singular vectors desired in A, generate left
3404* bidiagonalizing vectors in A
3405* (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3406*
3407 CALL sorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3408 $ work( iwork ), lwork-iwork+1, ierr )
3409 END IF
3410 IF( wntvo ) THEN
3411*
3412* If right singular vectors desired in A, generate right
3413* bidiagonalizing vectors in A
3414* (Workspace: need 4*M, prefer 3*M+M*NB)
3415*
3416 CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
3417 $ work( iwork ), lwork-iwork+1, ierr )
3418 END IF
3419 iwork = ie + m
3420 IF( wntuas .OR. wntuo )
3421 $ nru = m
3422 IF( wntun )
3423 $ nru = 0
3424 IF( wntvas .OR. wntvo )
3425 $ ncvt = n
3426 IF( wntvn )
3427 $ ncvt = 0
3428 IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3429*
3430* Perform bidiagonal QR iteration, if desired, computing
3431* left singular vectors in U and computing right singular
3432* vectors in VT
3433* (Workspace: need BDSPAC)
3434*
3435 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3436 $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3437 ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3438*
3439* Perform bidiagonal QR iteration, if desired, computing
3440* left singular vectors in U and computing right singular
3441* vectors in A
3442* (Workspace: need BDSPAC)
3443*
3444 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3445 $ u, ldu, dum, 1, work( iwork ), info )
3446 ELSE
3447*
3448* Perform bidiagonal QR iteration, if desired, computing
3449* left singular vectors in A and computing right singular
3450* vectors in VT
3451* (Workspace: need BDSPAC)
3452*
3453 CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3454 $ ldvt, a, lda, dum, 1, work( iwork ), info )
3455 END IF
3456*
3457 END IF
3458*
3459 END IF
3460*
3461* If SBDSQR failed to converge, copy unconverged superdiagonals
3462* to WORK( 2:MINMN )
3463*
3464 IF( info.NE.0 ) THEN
3465 IF( ie.GT.2 ) THEN
3466 DO 50 i = 1, minmn - 1
3467 work( i+1 ) = work( i+ie-1 )
3468 50 CONTINUE
3469 END IF
3470 IF( ie.LT.2 ) THEN
3471 DO 60 i = minmn - 1, 1, -1
3472 work( i+1 ) = work( i+ie-1 )
3473 60 CONTINUE
3474 END IF
3475 END IF
3476*
3477* Undo scaling if necessary
3478*
3479 IF( iscl.EQ.1 ) THEN
3480 IF( anrm.GT.bignum )
3481 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3482 $ ierr )
3483 IF( info.NE.0 .AND. anrm.GT.bignum )
3484 $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3485 $ minmn, ierr )
3486 IF( anrm.LT.smlnum )
3487 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3488 $ ierr )
3489 IF( info.NE.0 .AND. anrm.LT.smlnum )
3490 $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3491 $ minmn, ierr )
3492 END IF
3493*
3494* Return optimal workspace in WORK(1)
3495*
3496 work( 1 ) = sroundup_lwork(maxwrk)
3497*
3498 RETURN
3499*
3500* End of SGESVD
3501*
3502 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine sbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, work, info)
SBDSQR
Definition sbdsqr.f:240
subroutine sgebrd(m, n, a, lda, d, e, tauq, taup, work, lwork, info)
SGEBRD
Definition sgebrd.f:205
subroutine sgelqf(m, n, a, lda, tau, work, lwork, info)
SGELQF
Definition sgelqf.f:143
subroutine sgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
SGEMM
Definition sgemm.f:188
subroutine sgeqrf(m, n, a, lda, tau, work, lwork, info)
SGEQRF
Definition sgeqrf.f:146
subroutine sgesvd(jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork, info)
SGESVD computes the singular value decomposition (SVD) for GE matrices
Definition sgesvd.f:211
subroutine slacpy(uplo, m, n, a, lda, b, ldb)
SLACPY copies all or part of one two-dimensional array to another.
Definition slacpy.f:103
subroutine slascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition slascl.f:143
subroutine slaset(uplo, m, n, alpha, beta, a, lda)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition slaset.f:110
subroutine sorgbr(vect, m, n, k, a, lda, tau, work, lwork, info)
SORGBR
Definition sorgbr.f:157
subroutine sorglq(m, n, k, a, lda, tau, work, lwork, info)
SORGLQ
Definition sorglq.f:127
subroutine sorgqr(m, n, k, a, lda, tau, work, lwork, info)
SORGQR
Definition sorgqr.f:128
subroutine sormbr(vect, side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
SORMBR
Definition sormbr.f:196