LAPACK  3.6.1
LAPACK: Linear Algebra PACKage
sgesvd.f
Go to the documentation of this file.
1 *> \brief <b> SGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download SGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * REAL A( LDA, * ), S( * ), U( LDU, * ),
30 * $ VT( LDVT, * ), WORK( * )
31 * ..
32 *
33 *
34 *> \par Purpose:
35 * =============
36 *>
37 *> \verbatim
38 *>
39 *> SGESVD computes the singular value decomposition (SVD) of a real
40 *> M-by-N matrix A, optionally computing the left and/or right singular
41 *> vectors. The SVD is written
42 *>
43 *> A = U * SIGMA * transpose(V)
44 *>
45 *> where SIGMA is an M-by-N matrix which is zero except for its
46 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47 *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
48 *> are the singular values of A; they are real and non-negative, and
49 *> are returned in descending order. The first min(m,n) columns of
50 *> U and V are the left and right singular vectors of A.
51 *>
52 *> Note that the routine returns V**T, not V.
53 *> \endverbatim
54 *
55 * Arguments:
56 * ==========
57 *
58 *> \param[in] JOBU
59 *> \verbatim
60 *> JOBU is CHARACTER*1
61 *> Specifies options for computing all or part of the matrix U:
62 *> = 'A': all M columns of U are returned in array U:
63 *> = 'S': the first min(m,n) columns of U (the left singular
64 *> vectors) are returned in the array U;
65 *> = 'O': the first min(m,n) columns of U (the left singular
66 *> vectors) are overwritten on the array A;
67 *> = 'N': no columns of U (no left singular vectors) are
68 *> computed.
69 *> \endverbatim
70 *>
71 *> \param[in] JOBVT
72 *> \verbatim
73 *> JOBVT is CHARACTER*1
74 *> Specifies options for computing all or part of the matrix
75 *> V**T:
76 *> = 'A': all N rows of V**T are returned in the array VT;
77 *> = 'S': the first min(m,n) rows of V**T (the right singular
78 *> vectors) are returned in the array VT;
79 *> = 'O': the first min(m,n) rows of V**T (the right singular
80 *> vectors) are overwritten on the array A;
81 *> = 'N': no rows of V**T (no right singular vectors) are
82 *> computed.
83 *>
84 *> JOBVT and JOBU cannot both be 'O'.
85 *> \endverbatim
86 *>
87 *> \param[in] M
88 *> \verbatim
89 *> M is INTEGER
90 *> The number of rows of the input matrix A. M >= 0.
91 *> \endverbatim
92 *>
93 *> \param[in] N
94 *> \verbatim
95 *> N is INTEGER
96 *> The number of columns of the input matrix A. N >= 0.
97 *> \endverbatim
98 *>
99 *> \param[in,out] A
100 *> \verbatim
101 *> A is REAL array, dimension (LDA,N)
102 *> On entry, the M-by-N matrix A.
103 *> On exit,
104 *> if JOBU = 'O', A is overwritten with the first min(m,n)
105 *> columns of U (the left singular vectors,
106 *> stored columnwise);
107 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
108 *> rows of V**T (the right singular vectors,
109 *> stored rowwise);
110 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111 *> are destroyed.
112 *> \endverbatim
113 *>
114 *> \param[in] LDA
115 *> \verbatim
116 *> LDA is INTEGER
117 *> The leading dimension of the array A. LDA >= max(1,M).
118 *> \endverbatim
119 *>
120 *> \param[out] S
121 *> \verbatim
122 *> S is REAL array, dimension (min(M,N))
123 *> The singular values of A, sorted so that S(i) >= S(i+1).
124 *> \endverbatim
125 *>
126 *> \param[out] U
127 *> \verbatim
128 *> U is REAL array, dimension (LDU,UCOL)
129 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130 *> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131 *> if JOBU = 'S', U contains the first min(m,n) columns of U
132 *> (the left singular vectors, stored columnwise);
133 *> if JOBU = 'N' or 'O', U is not referenced.
134 *> \endverbatim
135 *>
136 *> \param[in] LDU
137 *> \verbatim
138 *> LDU is INTEGER
139 *> The leading dimension of the array U. LDU >= 1; if
140 *> JOBU = 'S' or 'A', LDU >= M.
141 *> \endverbatim
142 *>
143 *> \param[out] VT
144 *> \verbatim
145 *> VT is REAL array, dimension (LDVT,N)
146 *> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147 *> V**T;
148 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
149 *> V**T (the right singular vectors, stored rowwise);
150 *> if JOBVT = 'N' or 'O', VT is not referenced.
151 *> \endverbatim
152 *>
153 *> \param[in] LDVT
154 *> \verbatim
155 *> LDVT is INTEGER
156 *> The leading dimension of the array VT. LDVT >= 1; if
157 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158 *> \endverbatim
159 *>
160 *> \param[out] WORK
161 *> \verbatim
162 *> WORK is REAL array, dimension (MAX(1,LWORK))
163 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164 *> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165 *> superdiagonal elements of an upper bidiagonal matrix B
166 *> whose diagonal is in S (not necessarily sorted). B
167 *> satisfies A = U * B * VT, so it has the same singular values
168 *> as A, and singular vectors related by U and VT.
169 *> \endverbatim
170 *>
171 *> \param[in] LWORK
172 *> \verbatim
173 *> LWORK is INTEGER
174 *> The dimension of the array WORK.
175 *> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176 *> - PATH 1 (M much larger than N, JOBU='N')
177 *> - PATH 1t (N much larger than M, JOBVT='N')
178 *> LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
179 *> For good performance, LWORK should generally be larger.
180 *>
181 *> If LWORK = -1, then a workspace query is assumed; the routine
182 *> only calculates the optimal size of the WORK array, returns
183 *> this value as the first entry of the WORK array, and no error
184 *> message related to LWORK is issued by XERBLA.
185 *> \endverbatim
186 *>
187 *> \param[out] INFO
188 *> \verbatim
189 *> INFO is INTEGER
190 *> = 0: successful exit.
191 *> < 0: if INFO = -i, the i-th argument had an illegal value.
192 *> > 0: if SBDSQR did not converge, INFO specifies how many
193 *> superdiagonals of an intermediate bidiagonal form B
194 *> did not converge to zero. See the description of WORK
195 *> above for details.
196 *> \endverbatim
197 *
198 * Authors:
199 * ========
200 *
201 *> \author Univ. of Tennessee
202 *> \author Univ. of California Berkeley
203 *> \author Univ. of Colorado Denver
204 *> \author NAG Ltd.
205 *
206 *> \date April 2012
207 *
208 *> \ingroup realGEsing
209 *
210 * =====================================================================
211  SUBROUTINE sgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
212  $ work, lwork, info )
213 *
214 * -- LAPACK driver routine (version 3.6.1) --
215 * -- LAPACK is a software package provided by Univ. of Tennessee, --
216 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
217 * April 2012
218 *
219 * .. Scalar Arguments ..
220  CHARACTER JOBU, JOBVT
221  INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
222 * ..
223 * .. Array Arguments ..
224  REAL A( lda, * ), S( * ), U( ldu, * ),
225  $ vt( ldvt, * ), work( * )
226 * ..
227 *
228 * =====================================================================
229 *
230 * .. Parameters ..
231  REAL ZERO, ONE
232  parameter ( zero = 0.0e0, one = 1.0e0 )
233 * ..
234 * .. Local Scalars ..
235  LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
236  $ wntva, wntvas, wntvn, wntvo, wntvs
237  INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
238  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
239  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
240  $ nrvt, wrkbl
241  INTEGER LWORK_SGEQRF, LWORK_SORGQR_N, LWORK_SORGQR_M,
242  $ lwork_sgebrd, lwork_sorgbr_p, lwork_sorgbr_q,
243  $ lwork_sgelqf, lwork_sorglq_n, lwork_sorglq_m
244  REAL ANRM, BIGNUM, EPS, SMLNUM
245 * ..
246 * .. Local Arrays ..
247  REAL DUM( 1 )
248 * ..
249 * .. External Subroutines ..
250  EXTERNAL sbdsqr, sgebrd, sgelqf, sgemm, sgeqrf, slacpy,
252  $ xerbla
253 * ..
254 * .. External Functions ..
255  LOGICAL LSAME
256  INTEGER ILAENV
257  REAL SLAMCH, SLANGE
258  EXTERNAL lsame, ilaenv, slamch, slange
259 * ..
260 * .. Intrinsic Functions ..
261  INTRINSIC max, min, sqrt
262 * ..
263 * .. Executable Statements ..
264 *
265 * Test the input arguments
266 *
267  info = 0
268  minmn = min( m, n )
269  wntua = lsame( jobu, 'A' )
270  wntus = lsame( jobu, 'S' )
271  wntuas = wntua .OR. wntus
272  wntuo = lsame( jobu, 'O' )
273  wntun = lsame( jobu, 'N' )
274  wntva = lsame( jobvt, 'A' )
275  wntvs = lsame( jobvt, 'S' )
276  wntvas = wntva .OR. wntvs
277  wntvo = lsame( jobvt, 'O' )
278  wntvn = lsame( jobvt, 'N' )
279  lquery = ( lwork.EQ.-1 )
280 *
281  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
282  info = -1
283  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
284  $ ( wntvo .AND. wntuo ) ) THEN
285  info = -2
286  ELSE IF( m.LT.0 ) THEN
287  info = -3
288  ELSE IF( n.LT.0 ) THEN
289  info = -4
290  ELSE IF( lda.LT.max( 1, m ) ) THEN
291  info = -6
292  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
293  info = -9
294  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
295  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
296  info = -11
297  END IF
298 *
299 * Compute workspace
300 * (Note: Comments in the code beginning "Workspace:" describe the
301 * minimal amount of workspace needed at that point in the code,
302 * as well as the preferred amount for good performance.
303 * NB refers to the optimal block size for the immediately
304 * following subroutine, as returned by ILAENV.)
305 *
306  IF( info.EQ.0 ) THEN
307  minwrk = 1
308  maxwrk = 1
309  IF( m.GE.n .AND. minmn.GT.0 ) THEN
310 *
311 * Compute space needed for SBDSQR
312 *
313  mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
314  bdspac = 5*n
315 * Compute space needed for SGEQRF
316  CALL sgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
317  lwork_sgeqrf = int( dum(1) )
318 * Compute space needed for SORGQR
319  CALL sorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
320  lwork_sorgqr_n = int( dum(1) )
321  CALL sorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
322  lwork_sorgqr_m = int( dum(1) )
323 * Compute space needed for SGEBRD
324  CALL sgebrd( n, n, a, lda, s, dum(1), dum(1),
325  $ dum(1), dum(1), -1, ierr )
326  lwork_sgebrd = int( dum(1) )
327 * Compute space needed for SORGBR P
328  CALL sorgbr( 'P', n, n, n, a, lda, dum(1),
329  $ dum(1), -1, ierr )
330  lwork_sorgbr_p = int( dum(1) )
331 * Compute space needed for SORGBR Q
332  CALL sorgbr( 'Q', n, n, n, a, lda, dum(1),
333  $ dum(1), -1, ierr )
334  lwork_sorgbr_q = int( dum(1) )
335 *
336  IF( m.GE.mnthr ) THEN
337  IF( wntun ) THEN
338 *
339 * Path 1 (M much larger than N, JOBU='N')
340 *
341  maxwrk = n + lwork_sgeqrf
342  maxwrk = max( maxwrk, 3*n+lwork_sgebrd )
343  IF( wntvo .OR. wntvas )
344  $ maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
345  maxwrk = max( maxwrk, bdspac )
346  minwrk = max( 4*n, bdspac )
347  ELSE IF( wntuo .AND. wntvn ) THEN
348 *
349 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
350 *
351  wrkbl = n + lwork_sgeqrf
352  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
353  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
354  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
355  wrkbl = max( wrkbl, bdspac )
356  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
357  minwrk = max( 3*n+m, bdspac )
358  ELSE IF( wntuo .AND. wntvas ) THEN
359 *
360 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
361 * 'A')
362 *
363  wrkbl = n + lwork_sgeqrf
364  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
365  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
366  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
367  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
368  wrkbl = max( wrkbl, bdspac )
369  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
370  minwrk = max( 3*n+m, bdspac )
371  ELSE IF( wntus .AND. wntvn ) THEN
372 *
373 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
374 *
375  wrkbl = n + lwork_sgeqrf
376  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
377  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
378  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
379  wrkbl = max( wrkbl, bdspac )
380  maxwrk = n*n + wrkbl
381  minwrk = max( 3*n+m, bdspac )
382  ELSE IF( wntus .AND. wntvo ) THEN
383 *
384 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385 *
386  wrkbl = n + lwork_sgeqrf
387  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
388  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
389  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
390  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
391  wrkbl = max( wrkbl, bdspac )
392  maxwrk = 2*n*n + wrkbl
393  minwrk = max( 3*n+m, bdspac )
394  ELSE IF( wntus .AND. wntvas ) THEN
395 *
396 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397 * 'A')
398 *
399  wrkbl = n + lwork_sgeqrf
400  wrkbl = max( wrkbl, n+lwork_sorgqr_n )
401  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
402  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
403  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
404  wrkbl = max( wrkbl, bdspac )
405  maxwrk = n*n + wrkbl
406  minwrk = max( 3*n+m, bdspac )
407  ELSE IF( wntua .AND. wntvn ) THEN
408 *
409 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
410 *
411  wrkbl = n + lwork_sgeqrf
412  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
413  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
414  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
415  wrkbl = max( wrkbl, bdspac )
416  maxwrk = n*n + wrkbl
417  minwrk = max( 3*n+m, bdspac )
418  ELSE IF( wntua .AND. wntvo ) THEN
419 *
420 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421 *
422  wrkbl = n + lwork_sgeqrf
423  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
424  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
425  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
426  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
427  wrkbl = max( wrkbl, bdspac )
428  maxwrk = 2*n*n + wrkbl
429  minwrk = max( 3*n+m, bdspac )
430  ELSE IF( wntua .AND. wntvas ) THEN
431 *
432 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433 * 'A')
434 *
435  wrkbl = n + lwork_sgeqrf
436  wrkbl = max( wrkbl, n+lwork_sorgqr_m )
437  wrkbl = max( wrkbl, 3*n+lwork_sgebrd )
438  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_q )
439  wrkbl = max( wrkbl, 3*n+lwork_sorgbr_p )
440  wrkbl = max( wrkbl, bdspac )
441  maxwrk = n*n + wrkbl
442  minwrk = max( 3*n+m, bdspac )
443  END IF
444  ELSE
445 *
446 * Path 10 (M at least N, but not much larger)
447 *
448  CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
449  $ dum(1), dum(1), -1, ierr )
450  lwork_sgebrd = int( dum(1) )
451  maxwrk = 3*n + lwork_sgebrd
452  IF( wntus .OR. wntuo ) THEN
453  CALL sorgbr( 'Q', m, n, n, a, lda, dum(1),
454  $ dum(1), -1, ierr )
455  lwork_sorgbr_q = int( dum(1) )
456  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
457  END IF
458  IF( wntua ) THEN
459  CALL sorgbr( 'Q', m, m, n, a, lda, dum(1),
460  $ dum(1), -1, ierr )
461  lwork_sorgbr_q = int( dum(1) )
462  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_q )
463  END IF
464  IF( .NOT.wntvn ) THEN
465  maxwrk = max( maxwrk, 3*n+lwork_sorgbr_p )
466  END IF
467  maxwrk = max( maxwrk, bdspac )
468  minwrk = max( 3*n+m, bdspac )
469  END IF
470  ELSE IF( minmn.GT.0 ) THEN
471 *
472 * Compute space needed for SBDSQR
473 *
474  mnthr = ilaenv( 6, 'SGESVD', jobu // jobvt, m, n, 0, 0 )
475  bdspac = 5*m
476 * Compute space needed for SGELQF
477  CALL sgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
478  lwork_sgelqf = int( dum(1) )
479 * Compute space needed for SORGLQ
480  CALL sorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
481  lwork_sorglq_n = int( dum(1) )
482  CALL sorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
483  lwork_sorglq_m = int( dum(1) )
484 * Compute space needed for SGEBRD
485  CALL sgebrd( m, m, a, lda, s, dum(1), dum(1),
486  $ dum(1), dum(1), -1, ierr )
487  lwork_sgebrd = int( dum(1) )
488 * Compute space needed for SORGBR P
489  CALL sorgbr( 'P', m, m, m, a, n, dum(1),
490  $ dum(1), -1, ierr )
491  lwork_sorgbr_p = int( dum(1) )
492 * Compute space needed for SORGBR Q
493  CALL sorgbr( 'Q', m, m, m, a, n, dum(1),
494  $ dum(1), -1, ierr )
495  lwork_sorgbr_q = int( dum(1) )
496  IF( n.GE.mnthr ) THEN
497  IF( wntvn ) THEN
498 *
499 * Path 1t(N much larger than M, JOBVT='N')
500 *
501  maxwrk = m + lwork_sgelqf
502  maxwrk = max( maxwrk, 3*m+lwork_sgebrd )
503  IF( wntuo .OR. wntuas )
504  $ maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
505  maxwrk = max( maxwrk, bdspac )
506  minwrk = max( 4*m, bdspac )
507  ELSE IF( wntvo .AND. wntun ) THEN
508 *
509 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
510 *
511  wrkbl = m + lwork_sgelqf
512  wrkbl = max( wrkbl, m+lwork_sorglq_m )
513  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
514  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
515  wrkbl = max( wrkbl, bdspac )
516  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
517  minwrk = max( 3*m+n, bdspac )
518  ELSE IF( wntvo .AND. wntuas ) THEN
519 *
520 * Path 3t(N much larger than M, JOBU='S' or 'A',
521 * JOBVT='O')
522 *
523  wrkbl = m + lwork_sgelqf
524  wrkbl = max( wrkbl, m+lwork_sorglq_m )
525  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
526  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
527  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
528  wrkbl = max( wrkbl, bdspac )
529  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
530  minwrk = max( 3*m+n, bdspac )
531  ELSE IF( wntvs .AND. wntun ) THEN
532 *
533 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
534 *
535  wrkbl = m + lwork_sgelqf
536  wrkbl = max( wrkbl, m+lwork_sorglq_m )
537  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
538  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
539  wrkbl = max( wrkbl, bdspac )
540  maxwrk = m*m + wrkbl
541  minwrk = max( 3*m+n, bdspac )
542  ELSE IF( wntvs .AND. wntuo ) THEN
543 *
544 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
545 *
546  wrkbl = m + lwork_sgelqf
547  wrkbl = max( wrkbl, m+lwork_sorglq_m )
548  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
549  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
550  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
551  wrkbl = max( wrkbl, bdspac )
552  maxwrk = 2*m*m + wrkbl
553  minwrk = max( 3*m+n, bdspac )
554  maxwrk = max( maxwrk, minwrk )
555  ELSE IF( wntvs .AND. wntuas ) THEN
556 *
557 * Path 6t(N much larger than M, JOBU='S' or 'A',
558 * JOBVT='S')
559 *
560  wrkbl = m + lwork_sgelqf
561  wrkbl = max( wrkbl, m+lwork_sorglq_m )
562  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
563  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
564  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
565  wrkbl = max( wrkbl, bdspac )
566  maxwrk = m*m + wrkbl
567  minwrk = max( 3*m+n, bdspac )
568  ELSE IF( wntva .AND. wntun ) THEN
569 *
570 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
571 *
572  wrkbl = m + lwork_sgelqf
573  wrkbl = max( wrkbl, m+lwork_sorglq_n )
574  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
575  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
576  wrkbl = max( wrkbl, bdspac )
577  maxwrk = m*m + wrkbl
578  minwrk = max( 3*m+n, bdspac )
579  ELSE IF( wntva .AND. wntuo ) THEN
580 *
581 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
582 *
583  wrkbl = m + lwork_sgelqf
584  wrkbl = max( wrkbl, m+lwork_sorglq_n )
585  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
586  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
587  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
588  wrkbl = max( wrkbl, bdspac )
589  maxwrk = 2*m*m + wrkbl
590  minwrk = max( 3*m+n, bdspac )
591  ELSE IF( wntva .AND. wntuas ) THEN
592 *
593 * Path 9t(N much larger than M, JOBU='S' or 'A',
594 * JOBVT='A')
595 *
596  wrkbl = m + lwork_sgelqf
597  wrkbl = max( wrkbl, m+lwork_sorglq_n )
598  wrkbl = max( wrkbl, 3*m+lwork_sgebrd )
599  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_p )
600  wrkbl = max( wrkbl, 3*m+lwork_sorgbr_q )
601  wrkbl = max( wrkbl, bdspac )
602  maxwrk = m*m + wrkbl
603  minwrk = max( 3*m+n, bdspac )
604  END IF
605  ELSE
606 *
607 * Path 10t(N greater than M, but not much larger)
608 *
609  CALL sgebrd( m, n, a, lda, s, dum(1), dum(1),
610  $ dum(1), dum(1), -1, ierr )
611  lwork_sgebrd = int( dum(1) )
612  maxwrk = 3*m + lwork_sgebrd
613  IF( wntvs .OR. wntvo ) THEN
614 * Compute space needed for SORGBR P
615  CALL sorgbr( 'P', m, n, m, a, n, dum(1),
616  $ dum(1), -1, ierr )
617  lwork_sorgbr_p = int( dum(1) )
618  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
619  END IF
620  IF( wntva ) THEN
621  CALL sorgbr( 'P', n, n, m, a, n, dum(1),
622  $ dum(1), -1, ierr )
623  lwork_sorgbr_p = int( dum(1) )
624  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_p )
625  END IF
626  IF( .NOT.wntun ) THEN
627  maxwrk = max( maxwrk, 3*m+lwork_sorgbr_q )
628  END IF
629  maxwrk = max( maxwrk, bdspac )
630  minwrk = max( 3*m+n, bdspac )
631  END IF
632  END IF
633  maxwrk = max( maxwrk, minwrk )
634  work( 1 ) = maxwrk
635 *
636  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
637  info = -13
638  END IF
639  END IF
640 *
641  IF( info.NE.0 ) THEN
642  CALL xerbla( 'SGESVD', -info )
643  RETURN
644  ELSE IF( lquery ) THEN
645  RETURN
646  END IF
647 *
648 * Quick return if possible
649 *
650  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
651  RETURN
652  END IF
653 *
654 * Get machine constants
655 *
656  eps = slamch( 'P' )
657  smlnum = sqrt( slamch( 'S' ) ) / eps
658  bignum = one / smlnum
659 *
660 * Scale A if max element outside range [SMLNUM,BIGNUM]
661 *
662  anrm = slange( 'M', m, n, a, lda, dum )
663  iscl = 0
664  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
665  iscl = 1
666  CALL slascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
667  ELSE IF( anrm.GT.bignum ) THEN
668  iscl = 1
669  CALL slascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
670  END IF
671 *
672  IF( m.GE.n ) THEN
673 *
674 * A has at least as many rows as columns. If A has sufficiently
675 * more rows than columns, first reduce using the QR
676 * decomposition (if sufficient workspace available)
677 *
678  IF( m.GE.mnthr ) THEN
679 *
680  IF( wntun ) THEN
681 *
682 * Path 1 (M much larger than N, JOBU='N')
683 * No left singular vectors to be computed
684 *
685  itau = 1
686  iwork = itau + n
687 *
688 * Compute A=Q*R
689 * (Workspace: need 2*N, prefer N+N*NB)
690 *
691  CALL sgeqrf( m, n, a, lda, work( itau ), work( iwork ),
692  $ lwork-iwork+1, ierr )
693 *
694 * Zero out below R
695 *
696  IF( n .GT. 1 ) THEN
697  CALL slaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
698  $ lda )
699  END IF
700  ie = 1
701  itauq = ie + n
702  itaup = itauq + n
703  iwork = itaup + n
704 *
705 * Bidiagonalize R in A
706 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
707 *
708  CALL sgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
709  $ work( itaup ), work( iwork ), lwork-iwork+1,
710  $ ierr )
711  ncvt = 0
712  IF( wntvo .OR. wntvas ) THEN
713 *
714 * If right singular vectors desired, generate P'.
715 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
716 *
717  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
718  $ work( iwork ), lwork-iwork+1, ierr )
719  ncvt = n
720  END IF
721  iwork = ie + n
722 *
723 * Perform bidiagonal QR iteration, computing right
724 * singular vectors of A in A if desired
725 * (Workspace: need BDSPAC)
726 *
727  CALL sbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
728  $ dum, 1, dum, 1, work( iwork ), info )
729 *
730 * If right singular vectors desired in VT, copy them there
731 *
732  IF( wntvas )
733  $ CALL slacpy( 'F', n, n, a, lda, vt, ldvt )
734 *
735  ELSE IF( wntuo .AND. wntvn ) THEN
736 *
737 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
738 * N left singular vectors to be overwritten on A and
739 * no right singular vectors to be computed
740 *
741  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
742 *
743 * Sufficient workspace for a fast algorithm
744 *
745  ir = 1
746  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
747 *
748 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
749 *
750  ldwrku = lda
751  ldwrkr = lda
752  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
753 *
754 * WORK(IU) is LDA by N, WORK(IR) is N by N
755 *
756  ldwrku = lda
757  ldwrkr = n
758  ELSE
759 *
760 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
761 *
762  ldwrku = ( lwork-n*n-n ) / n
763  ldwrkr = n
764  END IF
765  itau = ir + ldwrkr*n
766  iwork = itau + n
767 *
768 * Compute A=Q*R
769 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
770 *
771  CALL sgeqrf( m, n, a, lda, work( itau ),
772  $ work( iwork ), lwork-iwork+1, ierr )
773 *
774 * Copy R to WORK(IR) and zero out below it
775 *
776  CALL slacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
777  CALL slaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
778  $ ldwrkr )
779 *
780 * Generate Q in A
781 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
782 *
783  CALL sorgqr( m, n, n, a, lda, work( itau ),
784  $ work( iwork ), lwork-iwork+1, ierr )
785  ie = itau
786  itauq = ie + n
787  itaup = itauq + n
788  iwork = itaup + n
789 *
790 * Bidiagonalize R in WORK(IR)
791 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
792 *
793  CALL sgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
794  $ work( itauq ), work( itaup ),
795  $ work( iwork ), lwork-iwork+1, ierr )
796 *
797 * Generate left vectors bidiagonalizing R
798 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
799 *
800  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
801  $ work( itauq ), work( iwork ),
802  $ lwork-iwork+1, ierr )
803  iwork = ie + n
804 *
805 * Perform bidiagonal QR iteration, computing left
806 * singular vectors of R in WORK(IR)
807 * (Workspace: need N*N+BDSPAC)
808 *
809  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
810  $ work( ir ), ldwrkr, dum, 1,
811  $ work( iwork ), info )
812  iu = ie + n
813 *
814 * Multiply Q in A by left singular vectors of R in
815 * WORK(IR), storing result in WORK(IU) and copying to A
816 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
817 *
818  DO 10 i = 1, m, ldwrku
819  chunk = min( m-i+1, ldwrku )
820  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
821  $ lda, work( ir ), ldwrkr, zero,
822  $ work( iu ), ldwrku )
823  CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
824  $ a( i, 1 ), lda )
825  10 CONTINUE
826 *
827  ELSE
828 *
829 * Insufficient workspace for a fast algorithm
830 *
831  ie = 1
832  itauq = ie + n
833  itaup = itauq + n
834  iwork = itaup + n
835 *
836 * Bidiagonalize A
837 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
838 *
839  CALL sgebrd( m, n, a, lda, s, work( ie ),
840  $ work( itauq ), work( itaup ),
841  $ work( iwork ), lwork-iwork+1, ierr )
842 *
843 * Generate left vectors bidiagonalizing A
844 * (Workspace: need 4*N, prefer 3*N+N*NB)
845 *
846  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
847  $ work( iwork ), lwork-iwork+1, ierr )
848  iwork = ie + n
849 *
850 * Perform bidiagonal QR iteration, computing left
851 * singular vectors of A in A
852 * (Workspace: need BDSPAC)
853 *
854  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
855  $ a, lda, dum, 1, work( iwork ), info )
856 *
857  END IF
858 *
859  ELSE IF( wntuo .AND. wntvas ) THEN
860 *
861 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
862 * N left singular vectors to be overwritten on A and
863 * N right singular vectors to be computed in VT
864 *
865  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
866 *
867 * Sufficient workspace for a fast algorithm
868 *
869  ir = 1
870  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
871 *
872 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
873 *
874  ldwrku = lda
875  ldwrkr = lda
876  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
877 *
878 * WORK(IU) is LDA by N and WORK(IR) is N by N
879 *
880  ldwrku = lda
881  ldwrkr = n
882  ELSE
883 *
884 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
885 *
886  ldwrku = ( lwork-n*n-n ) / n
887  ldwrkr = n
888  END IF
889  itau = ir + ldwrkr*n
890  iwork = itau + n
891 *
892 * Compute A=Q*R
893 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
894 *
895  CALL sgeqrf( m, n, a, lda, work( itau ),
896  $ work( iwork ), lwork-iwork+1, ierr )
897 *
898 * Copy R to VT, zeroing out below it
899 *
900  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
901  IF( n.GT.1 )
902  $ CALL slaset( 'L', n-1, n-1, zero, zero,
903  $ vt( 2, 1 ), ldvt )
904 *
905 * Generate Q in A
906 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
907 *
908  CALL sorgqr( m, n, n, a, lda, work( itau ),
909  $ work( iwork ), lwork-iwork+1, ierr )
910  ie = itau
911  itauq = ie + n
912  itaup = itauq + n
913  iwork = itaup + n
914 *
915 * Bidiagonalize R in VT, copying result to WORK(IR)
916 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
917 *
918  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
919  $ work( itauq ), work( itaup ),
920  $ work( iwork ), lwork-iwork+1, ierr )
921  CALL slacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
922 *
923 * Generate left vectors bidiagonalizing R in WORK(IR)
924 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
925 *
926  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
927  $ work( itauq ), work( iwork ),
928  $ lwork-iwork+1, ierr )
929 *
930 * Generate right vectors bidiagonalizing R in VT
931 * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
932 *
933  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
934  $ work( iwork ), lwork-iwork+1, ierr )
935  iwork = ie + n
936 *
937 * Perform bidiagonal QR iteration, computing left
938 * singular vectors of R in WORK(IR) and computing right
939 * singular vectors of R in VT
940 * (Workspace: need N*N+BDSPAC)
941 *
942  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
943  $ work( ir ), ldwrkr, dum, 1,
944  $ work( iwork ), info )
945  iu = ie + n
946 *
947 * Multiply Q in A by left singular vectors of R in
948 * WORK(IR), storing result in WORK(IU) and copying to A
949 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
950 *
951  DO 20 i = 1, m, ldwrku
952  chunk = min( m-i+1, ldwrku )
953  CALL sgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
954  $ lda, work( ir ), ldwrkr, zero,
955  $ work( iu ), ldwrku )
956  CALL slacpy( 'F', chunk, n, work( iu ), ldwrku,
957  $ a( i, 1 ), lda )
958  20 CONTINUE
959 *
960  ELSE
961 *
962 * Insufficient workspace for a fast algorithm
963 *
964  itau = 1
965  iwork = itau + n
966 *
967 * Compute A=Q*R
968 * (Workspace: need 2*N, prefer N+N*NB)
969 *
970  CALL sgeqrf( m, n, a, lda, work( itau ),
971  $ work( iwork ), lwork-iwork+1, ierr )
972 *
973 * Copy R to VT, zeroing out below it
974 *
975  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
976  IF( n.GT.1 )
977  $ CALL slaset( 'L', n-1, n-1, zero, zero,
978  $ vt( 2, 1 ), ldvt )
979 *
980 * Generate Q in A
981 * (Workspace: need 2*N, prefer N+N*NB)
982 *
983  CALL sorgqr( m, n, n, a, lda, work( itau ),
984  $ work( iwork ), lwork-iwork+1, ierr )
985  ie = itau
986  itauq = ie + n
987  itaup = itauq + n
988  iwork = itaup + n
989 *
990 * Bidiagonalize R in VT
991 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
992 *
993  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
994  $ work( itauq ), work( itaup ),
995  $ work( iwork ), lwork-iwork+1, ierr )
996 *
997 * Multiply Q in A by left vectors bidiagonalizing R
998 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
999 *
1000  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1001  $ work( itauq ), a, lda, work( iwork ),
1002  $ lwork-iwork+1, ierr )
1003 *
1004 * Generate right vectors bidiagonalizing R in VT
1005 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1006 *
1007  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1008  $ work( iwork ), lwork-iwork+1, ierr )
1009  iwork = ie + n
1010 *
1011 * Perform bidiagonal QR iteration, computing left
1012 * singular vectors of A in A and computing right
1013 * singular vectors of A in VT
1014 * (Workspace: need BDSPAC)
1015 *
1016  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1017  $ a, lda, dum, 1, work( iwork ), info )
1018 *
1019  END IF
1020 *
1021  ELSE IF( wntus ) THEN
1022 *
1023  IF( wntvn ) THEN
1024 *
1025 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1026 * N left singular vectors to be computed in U and
1027 * no right singular vectors to be computed
1028 *
1029  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1030 *
1031 * Sufficient workspace for a fast algorithm
1032 *
1033  ir = 1
1034  IF( lwork.GE.wrkbl+lda*n ) THEN
1035 *
1036 * WORK(IR) is LDA by N
1037 *
1038  ldwrkr = lda
1039  ELSE
1040 *
1041 * WORK(IR) is N by N
1042 *
1043  ldwrkr = n
1044  END IF
1045  itau = ir + ldwrkr*n
1046  iwork = itau + n
1047 *
1048 * Compute A=Q*R
1049 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1050 *
1051  CALL sgeqrf( m, n, a, lda, work( itau ),
1052  $ work( iwork ), lwork-iwork+1, ierr )
1053 *
1054 * Copy R to WORK(IR), zeroing out below it
1055 *
1056  CALL slacpy( 'U', n, n, a, lda, work( ir ),
1057  $ ldwrkr )
1058  CALL slaset( 'L', n-1, n-1, zero, zero,
1059  $ work( ir+1 ), ldwrkr )
1060 *
1061 * Generate Q in A
1062 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1063 *
1064  CALL sorgqr( m, n, n, a, lda, work( itau ),
1065  $ work( iwork ), lwork-iwork+1, ierr )
1066  ie = itau
1067  itauq = ie + n
1068  itaup = itauq + n
1069  iwork = itaup + n
1070 *
1071 * Bidiagonalize R in WORK(IR)
1072 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1073 *
1074  CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1075  $ work( ie ), work( itauq ),
1076  $ work( itaup ), work( iwork ),
1077  $ lwork-iwork+1, ierr )
1078 *
1079 * Generate left vectors bidiagonalizing R in WORK(IR)
1080 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1081 *
1082  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1083  $ work( itauq ), work( iwork ),
1084  $ lwork-iwork+1, ierr )
1085  iwork = ie + n
1086 *
1087 * Perform bidiagonal QR iteration, computing left
1088 * singular vectors of R in WORK(IR)
1089 * (Workspace: need N*N+BDSPAC)
1090 *
1091  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1092  $ 1, work( ir ), ldwrkr, dum, 1,
1093  $ work( iwork ), info )
1094 *
1095 * Multiply Q in A by left singular vectors of R in
1096 * WORK(IR), storing result in U
1097 * (Workspace: need N*N)
1098 *
1099  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1100  $ work( ir ), ldwrkr, zero, u, ldu )
1101 *
1102  ELSE
1103 *
1104 * Insufficient workspace for a fast algorithm
1105 *
1106  itau = 1
1107  iwork = itau + n
1108 *
1109 * Compute A=Q*R, copying result to U
1110 * (Workspace: need 2*N, prefer N+N*NB)
1111 *
1112  CALL sgeqrf( m, n, a, lda, work( itau ),
1113  $ work( iwork ), lwork-iwork+1, ierr )
1114  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1115 *
1116 * Generate Q in U
1117 * (Workspace: need 2*N, prefer N+N*NB)
1118 *
1119  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1120  $ work( iwork ), lwork-iwork+1, ierr )
1121  ie = itau
1122  itauq = ie + n
1123  itaup = itauq + n
1124  iwork = itaup + n
1125 *
1126 * Zero out below R in A
1127 *
1128  IF( n .GT. 1 ) THEN
1129  CALL slaset( 'L', n-1, n-1, zero, zero,
1130  $ a( 2, 1 ), lda )
1131  END IF
1132 *
1133 * Bidiagonalize R in A
1134 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1135 *
1136  CALL sgebrd( n, n, a, lda, s, work( ie ),
1137  $ work( itauq ), work( itaup ),
1138  $ work( iwork ), lwork-iwork+1, ierr )
1139 *
1140 * Multiply Q in U by left vectors bidiagonalizing R
1141 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1142 *
1143  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1144  $ work( itauq ), u, ldu, work( iwork ),
1145  $ lwork-iwork+1, ierr )
1146  iwork = ie + n
1147 *
1148 * Perform bidiagonal QR iteration, computing left
1149 * singular vectors of A in U
1150 * (Workspace: need BDSPAC)
1151 *
1152  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1153  $ 1, u, ldu, dum, 1, work( iwork ),
1154  $ info )
1155 *
1156  END IF
1157 *
1158  ELSE IF( wntvo ) THEN
1159 *
1160 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1161 * N left singular vectors to be computed in U and
1162 * N right singular vectors to be overwritten on A
1163 *
1164  IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1165 *
1166 * Sufficient workspace for a fast algorithm
1167 *
1168  iu = 1
1169  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1170 *
1171 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1172 *
1173  ldwrku = lda
1174  ir = iu + ldwrku*n
1175  ldwrkr = lda
1176  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1177 *
1178 * WORK(IU) is LDA by N and WORK(IR) is N by N
1179 *
1180  ldwrku = lda
1181  ir = iu + ldwrku*n
1182  ldwrkr = n
1183  ELSE
1184 *
1185 * WORK(IU) is N by N and WORK(IR) is N by N
1186 *
1187  ldwrku = n
1188  ir = iu + ldwrku*n
1189  ldwrkr = n
1190  END IF
1191  itau = ir + ldwrkr*n
1192  iwork = itau + n
1193 *
1194 * Compute A=Q*R
1195 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1196 *
1197  CALL sgeqrf( m, n, a, lda, work( itau ),
1198  $ work( iwork ), lwork-iwork+1, ierr )
1199 *
1200 * Copy R to WORK(IU), zeroing out below it
1201 *
1202  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1203  $ ldwrku )
1204  CALL slaset( 'L', n-1, n-1, zero, zero,
1205  $ work( iu+1 ), ldwrku )
1206 *
1207 * Generate Q in A
1208 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1209 *
1210  CALL sorgqr( m, n, n, a, lda, work( itau ),
1211  $ work( iwork ), lwork-iwork+1, ierr )
1212  ie = itau
1213  itauq = ie + n
1214  itaup = itauq + n
1215  iwork = itaup + n
1216 *
1217 * Bidiagonalize R in WORK(IU), copying result to
1218 * WORK(IR)
1219 * (Workspace: need 2*N*N+4*N,
1220 * prefer 2*N*N+3*N+2*N*NB)
1221 *
1222  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1223  $ work( ie ), work( itauq ),
1224  $ work( itaup ), work( iwork ),
1225  $ lwork-iwork+1, ierr )
1226  CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1227  $ work( ir ), ldwrkr )
1228 *
1229 * Generate left bidiagonalizing vectors in WORK(IU)
1230 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1231 *
1232  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1233  $ work( itauq ), work( iwork ),
1234  $ lwork-iwork+1, ierr )
1235 *
1236 * Generate right bidiagonalizing vectors in WORK(IR)
1237 * (Workspace: need 2*N*N+4*N-1,
1238 * prefer 2*N*N+3*N+(N-1)*NB)
1239 *
1240  CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1241  $ work( itaup ), work( iwork ),
1242  $ lwork-iwork+1, ierr )
1243  iwork = ie + n
1244 *
1245 * Perform bidiagonal QR iteration, computing left
1246 * singular vectors of R in WORK(IU) and computing
1247 * right singular vectors of R in WORK(IR)
1248 * (Workspace: need 2*N*N+BDSPAC)
1249 *
1250  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1251  $ work( ir ), ldwrkr, work( iu ),
1252  $ ldwrku, dum, 1, work( iwork ), info )
1253 *
1254 * Multiply Q in A by left singular vectors of R in
1255 * WORK(IU), storing result in U
1256 * (Workspace: need N*N)
1257 *
1258  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1259  $ work( iu ), ldwrku, zero, u, ldu )
1260 *
1261 * Copy right singular vectors of R to A
1262 * (Workspace: need N*N)
1263 *
1264  CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1265  $ lda )
1266 *
1267  ELSE
1268 *
1269 * Insufficient workspace for a fast algorithm
1270 *
1271  itau = 1
1272  iwork = itau + n
1273 *
1274 * Compute A=Q*R, copying result to U
1275 * (Workspace: need 2*N, prefer N+N*NB)
1276 *
1277  CALL sgeqrf( m, n, a, lda, work( itau ),
1278  $ work( iwork ), lwork-iwork+1, ierr )
1279  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1280 *
1281 * Generate Q in U
1282 * (Workspace: need 2*N, prefer N+N*NB)
1283 *
1284  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1285  $ work( iwork ), lwork-iwork+1, ierr )
1286  ie = itau
1287  itauq = ie + n
1288  itaup = itauq + n
1289  iwork = itaup + n
1290 *
1291 * Zero out below R in A
1292 *
1293  IF( n .GT. 1 ) THEN
1294  CALL slaset( 'L', n-1, n-1, zero, zero,
1295  $ a( 2, 1 ), lda )
1296  END IF
1297 *
1298 * Bidiagonalize R in A
1299 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1300 *
1301  CALL sgebrd( n, n, a, lda, s, work( ie ),
1302  $ work( itauq ), work( itaup ),
1303  $ work( iwork ), lwork-iwork+1, ierr )
1304 *
1305 * Multiply Q in U by left vectors bidiagonalizing R
1306 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1307 *
1308  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1309  $ work( itauq ), u, ldu, work( iwork ),
1310  $ lwork-iwork+1, ierr )
1311 *
1312 * Generate right vectors bidiagonalizing R in A
1313 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1314 *
1315  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1316  $ work( iwork ), lwork-iwork+1, ierr )
1317  iwork = ie + n
1318 *
1319 * Perform bidiagonal QR iteration, computing left
1320 * singular vectors of A in U and computing right
1321 * singular vectors of A in A
1322 * (Workspace: need BDSPAC)
1323 *
1324  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1325  $ lda, u, ldu, dum, 1, work( iwork ),
1326  $ info )
1327 *
1328  END IF
1329 *
1330  ELSE IF( wntvas ) THEN
1331 *
1332 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1333 * or 'A')
1334 * N left singular vectors to be computed in U and
1335 * N right singular vectors to be computed in VT
1336 *
1337  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1338 *
1339 * Sufficient workspace for a fast algorithm
1340 *
1341  iu = 1
1342  IF( lwork.GE.wrkbl+lda*n ) THEN
1343 *
1344 * WORK(IU) is LDA by N
1345 *
1346  ldwrku = lda
1347  ELSE
1348 *
1349 * WORK(IU) is N by N
1350 *
1351  ldwrku = n
1352  END IF
1353  itau = iu + ldwrku*n
1354  iwork = itau + n
1355 *
1356 * Compute A=Q*R
1357 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1358 *
1359  CALL sgeqrf( m, n, a, lda, work( itau ),
1360  $ work( iwork ), lwork-iwork+1, ierr )
1361 *
1362 * Copy R to WORK(IU), zeroing out below it
1363 *
1364  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1365  $ ldwrku )
1366  CALL slaset( 'L', n-1, n-1, zero, zero,
1367  $ work( iu+1 ), ldwrku )
1368 *
1369 * Generate Q in A
1370 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1371 *
1372  CALL sorgqr( m, n, n, a, lda, work( itau ),
1373  $ work( iwork ), lwork-iwork+1, ierr )
1374  ie = itau
1375  itauq = ie + n
1376  itaup = itauq + n
1377  iwork = itaup + n
1378 *
1379 * Bidiagonalize R in WORK(IU), copying result to VT
1380 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1381 *
1382  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1383  $ work( ie ), work( itauq ),
1384  $ work( itaup ), work( iwork ),
1385  $ lwork-iwork+1, ierr )
1386  CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1387  $ ldvt )
1388 *
1389 * Generate left bidiagonalizing vectors in WORK(IU)
1390 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1391 *
1392  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1393  $ work( itauq ), work( iwork ),
1394  $ lwork-iwork+1, ierr )
1395 *
1396 * Generate right bidiagonalizing vectors in VT
1397 * (Workspace: need N*N+4*N-1,
1398 * prefer N*N+3*N+(N-1)*NB)
1399 *
1400  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1401  $ work( iwork ), lwork-iwork+1, ierr )
1402  iwork = ie + n
1403 *
1404 * Perform bidiagonal QR iteration, computing left
1405 * singular vectors of R in WORK(IU) and computing
1406 * right singular vectors of R in VT
1407 * (Workspace: need N*N+BDSPAC)
1408 *
1409  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1410  $ ldvt, work( iu ), ldwrku, dum, 1,
1411  $ work( iwork ), info )
1412 *
1413 * Multiply Q in A by left singular vectors of R in
1414 * WORK(IU), storing result in U
1415 * (Workspace: need N*N)
1416 *
1417  CALL sgemm( 'N', 'N', m, n, n, one, a, lda,
1418  $ work( iu ), ldwrku, zero, u, ldu )
1419 *
1420  ELSE
1421 *
1422 * Insufficient workspace for a fast algorithm
1423 *
1424  itau = 1
1425  iwork = itau + n
1426 *
1427 * Compute A=Q*R, copying result to U
1428 * (Workspace: need 2*N, prefer N+N*NB)
1429 *
1430  CALL sgeqrf( m, n, a, lda, work( itau ),
1431  $ work( iwork ), lwork-iwork+1, ierr )
1432  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1433 *
1434 * Generate Q in U
1435 * (Workspace: need 2*N, prefer N+N*NB)
1436 *
1437  CALL sorgqr( m, n, n, u, ldu, work( itau ),
1438  $ work( iwork ), lwork-iwork+1, ierr )
1439 *
1440 * Copy R to VT, zeroing out below it
1441 *
1442  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1443  IF( n.GT.1 )
1444  $ CALL slaset( 'L', n-1, n-1, zero, zero,
1445  $ vt( 2, 1 ), ldvt )
1446  ie = itau
1447  itauq = ie + n
1448  itaup = itauq + n
1449  iwork = itaup + n
1450 *
1451 * Bidiagonalize R in VT
1452 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1453 *
1454  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1455  $ work( itauq ), work( itaup ),
1456  $ work( iwork ), lwork-iwork+1, ierr )
1457 *
1458 * Multiply Q in U by left bidiagonalizing vectors
1459 * in VT
1460 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1461 *
1462  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1463  $ work( itauq ), u, ldu, work( iwork ),
1464  $ lwork-iwork+1, ierr )
1465 *
1466 * Generate right bidiagonalizing vectors in VT
1467 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1468 *
1469  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1470  $ work( iwork ), lwork-iwork+1, ierr )
1471  iwork = ie + n
1472 *
1473 * Perform bidiagonal QR iteration, computing left
1474 * singular vectors of A in U and computing right
1475 * singular vectors of A in VT
1476 * (Workspace: need BDSPAC)
1477 *
1478  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1479  $ ldvt, u, ldu, dum, 1, work( iwork ),
1480  $ info )
1481 *
1482  END IF
1483 *
1484  END IF
1485 *
1486  ELSE IF( wntua ) THEN
1487 *
1488  IF( wntvn ) THEN
1489 *
1490 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1491 * M left singular vectors to be computed in U and
1492 * no right singular vectors to be computed
1493 *
1494  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1495 *
1496 * Sufficient workspace for a fast algorithm
1497 *
1498  ir = 1
1499  IF( lwork.GE.wrkbl+lda*n ) THEN
1500 *
1501 * WORK(IR) is LDA by N
1502 *
1503  ldwrkr = lda
1504  ELSE
1505 *
1506 * WORK(IR) is N by N
1507 *
1508  ldwrkr = n
1509  END IF
1510  itau = ir + ldwrkr*n
1511  iwork = itau + n
1512 *
1513 * Compute A=Q*R, copying result to U
1514 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1515 *
1516  CALL sgeqrf( m, n, a, lda, work( itau ),
1517  $ work( iwork ), lwork-iwork+1, ierr )
1518  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1519 *
1520 * Copy R to WORK(IR), zeroing out below it
1521 *
1522  CALL slacpy( 'U', n, n, a, lda, work( ir ),
1523  $ ldwrkr )
1524  CALL slaset( 'L', n-1, n-1, zero, zero,
1525  $ work( ir+1 ), ldwrkr )
1526 *
1527 * Generate Q in U
1528 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1529 *
1530  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1531  $ work( iwork ), lwork-iwork+1, ierr )
1532  ie = itau
1533  itauq = ie + n
1534  itaup = itauq + n
1535  iwork = itaup + n
1536 *
1537 * Bidiagonalize R in WORK(IR)
1538 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1539 *
1540  CALL sgebrd( n, n, work( ir ), ldwrkr, s,
1541  $ work( ie ), work( itauq ),
1542  $ work( itaup ), work( iwork ),
1543  $ lwork-iwork+1, ierr )
1544 *
1545 * Generate left bidiagonalizing vectors in WORK(IR)
1546 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1547 *
1548  CALL sorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1549  $ work( itauq ), work( iwork ),
1550  $ lwork-iwork+1, ierr )
1551  iwork = ie + n
1552 *
1553 * Perform bidiagonal QR iteration, computing left
1554 * singular vectors of R in WORK(IR)
1555 * (Workspace: need N*N+BDSPAC)
1556 *
1557  CALL sbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1558  $ 1, work( ir ), ldwrkr, dum, 1,
1559  $ work( iwork ), info )
1560 *
1561 * Multiply Q in U by left singular vectors of R in
1562 * WORK(IR), storing result in A
1563 * (Workspace: need N*N)
1564 *
1565  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1566  $ work( ir ), ldwrkr, zero, a, lda )
1567 *
1568 * Copy left singular vectors of A from A to U
1569 *
1570  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1571 *
1572  ELSE
1573 *
1574 * Insufficient workspace for a fast algorithm
1575 *
1576  itau = 1
1577  iwork = itau + n
1578 *
1579 * Compute A=Q*R, copying result to U
1580 * (Workspace: need 2*N, prefer N+N*NB)
1581 *
1582  CALL sgeqrf( m, n, a, lda, work( itau ),
1583  $ work( iwork ), lwork-iwork+1, ierr )
1584  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1585 *
1586 * Generate Q in U
1587 * (Workspace: need N+M, prefer N+M*NB)
1588 *
1589  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1590  $ work( iwork ), lwork-iwork+1, ierr )
1591  ie = itau
1592  itauq = ie + n
1593  itaup = itauq + n
1594  iwork = itaup + n
1595 *
1596 * Zero out below R in A
1597 *
1598  IF( n .GT. 1 ) THEN
1599  CALL slaset( 'L', n-1, n-1, zero, zero,
1600  $ a( 2, 1 ), lda )
1601  END IF
1602 *
1603 * Bidiagonalize R in A
1604 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1605 *
1606  CALL sgebrd( n, n, a, lda, s, work( ie ),
1607  $ work( itauq ), work( itaup ),
1608  $ work( iwork ), lwork-iwork+1, ierr )
1609 *
1610 * Multiply Q in U by left bidiagonalizing vectors
1611 * in A
1612 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1613 *
1614  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1615  $ work( itauq ), u, ldu, work( iwork ),
1616  $ lwork-iwork+1, ierr )
1617  iwork = ie + n
1618 *
1619 * Perform bidiagonal QR iteration, computing left
1620 * singular vectors of A in U
1621 * (Workspace: need BDSPAC)
1622 *
1623  CALL sbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1624  $ 1, u, ldu, dum, 1, work( iwork ),
1625  $ info )
1626 *
1627  END IF
1628 *
1629  ELSE IF( wntvo ) THEN
1630 *
1631 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1632 * M left singular vectors to be computed in U and
1633 * N right singular vectors to be overwritten on A
1634 *
1635  IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1636 *
1637 * Sufficient workspace for a fast algorithm
1638 *
1639  iu = 1
1640  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1641 *
1642 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1643 *
1644  ldwrku = lda
1645  ir = iu + ldwrku*n
1646  ldwrkr = lda
1647  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1648 *
1649 * WORK(IU) is LDA by N and WORK(IR) is N by N
1650 *
1651  ldwrku = lda
1652  ir = iu + ldwrku*n
1653  ldwrkr = n
1654  ELSE
1655 *
1656 * WORK(IU) is N by N and WORK(IR) is N by N
1657 *
1658  ldwrku = n
1659  ir = iu + ldwrku*n
1660  ldwrkr = n
1661  END IF
1662  itau = ir + ldwrkr*n
1663  iwork = itau + n
1664 *
1665 * Compute A=Q*R, copying result to U
1666 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1667 *
1668  CALL sgeqrf( m, n, a, lda, work( itau ),
1669  $ work( iwork ), lwork-iwork+1, ierr )
1670  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1671 *
1672 * Generate Q in U
1673 * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1674 *
1675  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1676  $ work( iwork ), lwork-iwork+1, ierr )
1677 *
1678 * Copy R to WORK(IU), zeroing out below it
1679 *
1680  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1681  $ ldwrku )
1682  CALL slaset( 'L', n-1, n-1, zero, zero,
1683  $ work( iu+1 ), ldwrku )
1684  ie = itau
1685  itauq = ie + n
1686  itaup = itauq + n
1687  iwork = itaup + n
1688 *
1689 * Bidiagonalize R in WORK(IU), copying result to
1690 * WORK(IR)
1691 * (Workspace: need 2*N*N+4*N,
1692 * prefer 2*N*N+3*N+2*N*NB)
1693 *
1694  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1695  $ work( ie ), work( itauq ),
1696  $ work( itaup ), work( iwork ),
1697  $ lwork-iwork+1, ierr )
1698  CALL slacpy( 'U', n, n, work( iu ), ldwrku,
1699  $ work( ir ), ldwrkr )
1700 *
1701 * Generate left bidiagonalizing vectors in WORK(IU)
1702 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1703 *
1704  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1705  $ work( itauq ), work( iwork ),
1706  $ lwork-iwork+1, ierr )
1707 *
1708 * Generate right bidiagonalizing vectors in WORK(IR)
1709 * (Workspace: need 2*N*N+4*N-1,
1710 * prefer 2*N*N+3*N+(N-1)*NB)
1711 *
1712  CALL sorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1713  $ work( itaup ), work( iwork ),
1714  $ lwork-iwork+1, ierr )
1715  iwork = ie + n
1716 *
1717 * Perform bidiagonal QR iteration, computing left
1718 * singular vectors of R in WORK(IU) and computing
1719 * right singular vectors of R in WORK(IR)
1720 * (Workspace: need 2*N*N+BDSPAC)
1721 *
1722  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ),
1723  $ work( ir ), ldwrkr, work( iu ),
1724  $ ldwrku, dum, 1, work( iwork ), info )
1725 *
1726 * Multiply Q in U by left singular vectors of R in
1727 * WORK(IU), storing result in A
1728 * (Workspace: need N*N)
1729 *
1730  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1731  $ work( iu ), ldwrku, zero, a, lda )
1732 *
1733 * Copy left singular vectors of A from A to U
1734 *
1735  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1736 *
1737 * Copy right singular vectors of R from WORK(IR) to A
1738 *
1739  CALL slacpy( 'F', n, n, work( ir ), ldwrkr, a,
1740  $ lda )
1741 *
1742  ELSE
1743 *
1744 * Insufficient workspace for a fast algorithm
1745 *
1746  itau = 1
1747  iwork = itau + n
1748 *
1749 * Compute A=Q*R, copying result to U
1750 * (Workspace: need 2*N, prefer N+N*NB)
1751 *
1752  CALL sgeqrf( m, n, a, lda, work( itau ),
1753  $ work( iwork ), lwork-iwork+1, ierr )
1754  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1755 *
1756 * Generate Q in U
1757 * (Workspace: need N+M, prefer N+M*NB)
1758 *
1759  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1760  $ work( iwork ), lwork-iwork+1, ierr )
1761  ie = itau
1762  itauq = ie + n
1763  itaup = itauq + n
1764  iwork = itaup + n
1765 *
1766 * Zero out below R in A
1767 *
1768  IF( n .GT. 1 ) THEN
1769  CALL slaset( 'L', n-1, n-1, zero, zero,
1770  $ a( 2, 1 ), lda )
1771  END IF
1772 *
1773 * Bidiagonalize R in A
1774 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1775 *
1776  CALL sgebrd( n, n, a, lda, s, work( ie ),
1777  $ work( itauq ), work( itaup ),
1778  $ work( iwork ), lwork-iwork+1, ierr )
1779 *
1780 * Multiply Q in U by left bidiagonalizing vectors
1781 * in A
1782 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1783 *
1784  CALL sormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1785  $ work( itauq ), u, ldu, work( iwork ),
1786  $ lwork-iwork+1, ierr )
1787 *
1788 * Generate right bidiagonalizing vectors in A
1789 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1790 *
1791  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
1792  $ work( iwork ), lwork-iwork+1, ierr )
1793  iwork = ie + n
1794 *
1795 * Perform bidiagonal QR iteration, computing left
1796 * singular vectors of A in U and computing right
1797 * singular vectors of A in A
1798 * (Workspace: need BDSPAC)
1799 *
1800  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1801  $ lda, u, ldu, dum, 1, work( iwork ),
1802  $ info )
1803 *
1804  END IF
1805 *
1806  ELSE IF( wntvas ) THEN
1807 *
1808 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1809 * or 'A')
1810 * M left singular vectors to be computed in U and
1811 * N right singular vectors to be computed in VT
1812 *
1813  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1814 *
1815 * Sufficient workspace for a fast algorithm
1816 *
1817  iu = 1
1818  IF( lwork.GE.wrkbl+lda*n ) THEN
1819 *
1820 * WORK(IU) is LDA by N
1821 *
1822  ldwrku = lda
1823  ELSE
1824 *
1825 * WORK(IU) is N by N
1826 *
1827  ldwrku = n
1828  END IF
1829  itau = iu + ldwrku*n
1830  iwork = itau + n
1831 *
1832 * Compute A=Q*R, copying result to U
1833 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1834 *
1835  CALL sgeqrf( m, n, a, lda, work( itau ),
1836  $ work( iwork ), lwork-iwork+1, ierr )
1837  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1838 *
1839 * Generate Q in U
1840 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1841 *
1842  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1843  $ work( iwork ), lwork-iwork+1, ierr )
1844 *
1845 * Copy R to WORK(IU), zeroing out below it
1846 *
1847  CALL slacpy( 'U', n, n, a, lda, work( iu ),
1848  $ ldwrku )
1849  CALL slaset( 'L', n-1, n-1, zero, zero,
1850  $ work( iu+1 ), ldwrku )
1851  ie = itau
1852  itauq = ie + n
1853  itaup = itauq + n
1854  iwork = itaup + n
1855 *
1856 * Bidiagonalize R in WORK(IU), copying result to VT
1857 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1858 *
1859  CALL sgebrd( n, n, work( iu ), ldwrku, s,
1860  $ work( ie ), work( itauq ),
1861  $ work( itaup ), work( iwork ),
1862  $ lwork-iwork+1, ierr )
1863  CALL slacpy( 'U', n, n, work( iu ), ldwrku, vt,
1864  $ ldvt )
1865 *
1866 * Generate left bidiagonalizing vectors in WORK(IU)
1867 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1868 *
1869  CALL sorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1870  $ work( itauq ), work( iwork ),
1871  $ lwork-iwork+1, ierr )
1872 *
1873 * Generate right bidiagonalizing vectors in VT
1874 * (Workspace: need N*N+4*N-1,
1875 * prefer N*N+3*N+(N-1)*NB)
1876 *
1877  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1878  $ work( iwork ), lwork-iwork+1, ierr )
1879  iwork = ie + n
1880 *
1881 * Perform bidiagonal QR iteration, computing left
1882 * singular vectors of R in WORK(IU) and computing
1883 * right singular vectors of R in VT
1884 * (Workspace: need N*N+BDSPAC)
1885 *
1886  CALL sbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1887  $ ldvt, work( iu ), ldwrku, dum, 1,
1888  $ work( iwork ), info )
1889 *
1890 * Multiply Q in U by left singular vectors of R in
1891 * WORK(IU), storing result in A
1892 * (Workspace: need N*N)
1893 *
1894  CALL sgemm( 'N', 'N', m, n, n, one, u, ldu,
1895  $ work( iu ), ldwrku, zero, a, lda )
1896 *
1897 * Copy left singular vectors of A from A to U
1898 *
1899  CALL slacpy( 'F', m, n, a, lda, u, ldu )
1900 *
1901  ELSE
1902 *
1903 * Insufficient workspace for a fast algorithm
1904 *
1905  itau = 1
1906  iwork = itau + n
1907 *
1908 * Compute A=Q*R, copying result to U
1909 * (Workspace: need 2*N, prefer N+N*NB)
1910 *
1911  CALL sgeqrf( m, n, a, lda, work( itau ),
1912  $ work( iwork ), lwork-iwork+1, ierr )
1913  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1914 *
1915 * Generate Q in U
1916 * (Workspace: need N+M, prefer N+M*NB)
1917 *
1918  CALL sorgqr( m, m, n, u, ldu, work( itau ),
1919  $ work( iwork ), lwork-iwork+1, ierr )
1920 *
1921 * Copy R from A to VT, zeroing out below it
1922 *
1923  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
1924  IF( n.GT.1 )
1925  $ CALL slaset( 'L', n-1, n-1, zero, zero,
1926  $ vt( 2, 1 ), ldvt )
1927  ie = itau
1928  itauq = ie + n
1929  itaup = itauq + n
1930  iwork = itaup + n
1931 *
1932 * Bidiagonalize R in VT
1933 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1934 *
1935  CALL sgebrd( n, n, vt, ldvt, s, work( ie ),
1936  $ work( itauq ), work( itaup ),
1937  $ work( iwork ), lwork-iwork+1, ierr )
1938 *
1939 * Multiply Q in U by left bidiagonalizing vectors
1940 * in VT
1941 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1942 *
1943  CALL sormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1944  $ work( itauq ), u, ldu, work( iwork ),
1945  $ lwork-iwork+1, ierr )
1946 *
1947 * Generate right bidiagonalizing vectors in VT
1948 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1949 *
1950  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1951  $ work( iwork ), lwork-iwork+1, ierr )
1952  iwork = ie + n
1953 *
1954 * Perform bidiagonal QR iteration, computing left
1955 * singular vectors of A in U and computing right
1956 * singular vectors of A in VT
1957 * (Workspace: need BDSPAC)
1958 *
1959  CALL sbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1960  $ ldvt, u, ldu, dum, 1, work( iwork ),
1961  $ info )
1962 *
1963  END IF
1964 *
1965  END IF
1966 *
1967  END IF
1968 *
1969  ELSE
1970 *
1971 * M .LT. MNTHR
1972 *
1973 * Path 10 (M at least N, but not much larger)
1974 * Reduce to bidiagonal form without QR decomposition
1975 *
1976  ie = 1
1977  itauq = ie + n
1978  itaup = itauq + n
1979  iwork = itaup + n
1980 *
1981 * Bidiagonalize A
1982 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1983 *
1984  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1985  $ work( itaup ), work( iwork ), lwork-iwork+1,
1986  $ ierr )
1987  IF( wntuas ) THEN
1988 *
1989 * If left singular vectors desired in U, copy result to U
1990 * and generate left bidiagonalizing vectors in U
1991 * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1992 *
1993  CALL slacpy( 'L', m, n, a, lda, u, ldu )
1994  IF( wntus )
1995  $ ncu = n
1996  IF( wntua )
1997  $ ncu = m
1998  CALL sorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1999  $ work( iwork ), lwork-iwork+1, ierr )
2000  END IF
2001  IF( wntvas ) THEN
2002 *
2003 * If right singular vectors desired in VT, copy result to
2004 * VT and generate right bidiagonalizing vectors in VT
2005 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2006 *
2007  CALL slacpy( 'U', n, n, a, lda, vt, ldvt )
2008  CALL sorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2009  $ work( iwork ), lwork-iwork+1, ierr )
2010  END IF
2011  IF( wntuo ) THEN
2012 *
2013 * If left singular vectors desired in A, generate left
2014 * bidiagonalizing vectors in A
2015 * (Workspace: need 4*N, prefer 3*N+N*NB)
2016 *
2017  CALL sorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2018  $ work( iwork ), lwork-iwork+1, ierr )
2019  END IF
2020  IF( wntvo ) THEN
2021 *
2022 * If right singular vectors desired in A, generate right
2023 * bidiagonalizing vectors in A
2024 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2025 *
2026  CALL sorgbr( 'P', n, n, n, a, lda, work( itaup ),
2027  $ work( iwork ), lwork-iwork+1, ierr )
2028  END IF
2029  iwork = ie + n
2030  IF( wntuas .OR. wntuo )
2031  $ nru = m
2032  IF( wntun )
2033  $ nru = 0
2034  IF( wntvas .OR. wntvo )
2035  $ ncvt = n
2036  IF( wntvn )
2037  $ ncvt = 0
2038  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2039 *
2040 * Perform bidiagonal QR iteration, if desired, computing
2041 * left singular vectors in U and computing right singular
2042 * vectors in VT
2043 * (Workspace: need BDSPAC)
2044 *
2045  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2046  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2047  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2048 *
2049 * Perform bidiagonal QR iteration, if desired, computing
2050 * left singular vectors in U and computing right singular
2051 * vectors in A
2052 * (Workspace: need BDSPAC)
2053 *
2054  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2055  $ u, ldu, dum, 1, work( iwork ), info )
2056  ELSE
2057 *
2058 * Perform bidiagonal QR iteration, if desired, computing
2059 * left singular vectors in A and computing right singular
2060 * vectors in VT
2061 * (Workspace: need BDSPAC)
2062 *
2063  CALL sbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2064  $ ldvt, a, lda, dum, 1, work( iwork ), info )
2065  END IF
2066 *
2067  END IF
2068 *
2069  ELSE
2070 *
2071 * A has more columns than rows. If A has sufficiently more
2072 * columns than rows, first reduce using the LQ decomposition (if
2073 * sufficient workspace available)
2074 *
2075  IF( n.GE.mnthr ) THEN
2076 *
2077  IF( wntvn ) THEN
2078 *
2079 * Path 1t(N much larger than M, JOBVT='N')
2080 * No right singular vectors to be computed
2081 *
2082  itau = 1
2083  iwork = itau + m
2084 *
2085 * Compute A=L*Q
2086 * (Workspace: need 2*M, prefer M+M*NB)
2087 *
2088  CALL sgelqf( m, n, a, lda, work( itau ), work( iwork ),
2089  $ lwork-iwork+1, ierr )
2090 *
2091 * Zero out above L
2092 *
2093  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2094  ie = 1
2095  itauq = ie + m
2096  itaup = itauq + m
2097  iwork = itaup + m
2098 *
2099 * Bidiagonalize L in A
2100 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2101 *
2102  CALL sgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2103  $ work( itaup ), work( iwork ), lwork-iwork+1,
2104  $ ierr )
2105  IF( wntuo .OR. wntuas ) THEN
2106 *
2107 * If left singular vectors desired, generate Q
2108 * (Workspace: need 4*M, prefer 3*M+M*NB)
2109 *
2110  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2111  $ work( iwork ), lwork-iwork+1, ierr )
2112  END IF
2113  iwork = ie + m
2114  nru = 0
2115  IF( wntuo .OR. wntuas )
2116  $ nru = m
2117 *
2118 * Perform bidiagonal QR iteration, computing left singular
2119 * vectors of A in A if desired
2120 * (Workspace: need BDSPAC)
2121 *
2122  CALL sbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2123  $ lda, dum, 1, work( iwork ), info )
2124 *
2125 * If left singular vectors desired in U, copy them there
2126 *
2127  IF( wntuas )
2128  $ CALL slacpy( 'F', m, m, a, lda, u, ldu )
2129 *
2130  ELSE IF( wntvo .AND. wntun ) THEN
2131 *
2132 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2133 * M right singular vectors to be overwritten on A and
2134 * no left singular vectors to be computed
2135 *
2136  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2137 *
2138 * Sufficient workspace for a fast algorithm
2139 *
2140  ir = 1
2141  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2142 *
2143 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2144 *
2145  ldwrku = lda
2146  chunk = n
2147  ldwrkr = lda
2148  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2149 *
2150 * WORK(IU) is LDA by N and WORK(IR) is M by M
2151 *
2152  ldwrku = lda
2153  chunk = n
2154  ldwrkr = m
2155  ELSE
2156 *
2157 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2158 *
2159  ldwrku = m
2160  chunk = ( lwork-m*m-m ) / m
2161  ldwrkr = m
2162  END IF
2163  itau = ir + ldwrkr*m
2164  iwork = itau + m
2165 *
2166 * Compute A=L*Q
2167 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2168 *
2169  CALL sgelqf( m, n, a, lda, work( itau ),
2170  $ work( iwork ), lwork-iwork+1, ierr )
2171 *
2172 * Copy L to WORK(IR) and zero out above it
2173 *
2174  CALL slacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2175  CALL slaset( 'U', m-1, m-1, zero, zero,
2176  $ work( ir+ldwrkr ), ldwrkr )
2177 *
2178 * Generate Q in A
2179 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2180 *
2181  CALL sorglq( m, n, m, a, lda, work( itau ),
2182  $ work( iwork ), lwork-iwork+1, ierr )
2183  ie = itau
2184  itauq = ie + m
2185  itaup = itauq + m
2186  iwork = itaup + m
2187 *
2188 * Bidiagonalize L in WORK(IR)
2189 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2190 *
2191  CALL sgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2192  $ work( itauq ), work( itaup ),
2193  $ work( iwork ), lwork-iwork+1, ierr )
2194 *
2195 * Generate right vectors bidiagonalizing L
2196 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2197 *
2198  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2199  $ work( itaup ), work( iwork ),
2200  $ lwork-iwork+1, ierr )
2201  iwork = ie + m
2202 *
2203 * Perform bidiagonal QR iteration, computing right
2204 * singular vectors of L in WORK(IR)
2205 * (Workspace: need M*M+BDSPAC)
2206 *
2207  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2208  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2209  $ work( iwork ), info )
2210  iu = ie + m
2211 *
2212 * Multiply right singular vectors of L in WORK(IR) by Q
2213 * in A, storing result in WORK(IU) and copying to A
2214 * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2215 *
2216  DO 30 i = 1, n, chunk
2217  blk = min( n-i+1, chunk )
2218  CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2219  $ ldwrkr, a( 1, i ), lda, zero,
2220  $ work( iu ), ldwrku )
2221  CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2222  $ a( 1, i ), lda )
2223  30 CONTINUE
2224 *
2225  ELSE
2226 *
2227 * Insufficient workspace for a fast algorithm
2228 *
2229  ie = 1
2230  itauq = ie + m
2231  itaup = itauq + m
2232  iwork = itaup + m
2233 *
2234 * Bidiagonalize A
2235 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2236 *
2237  CALL sgebrd( m, n, a, lda, s, work( ie ),
2238  $ work( itauq ), work( itaup ),
2239  $ work( iwork ), lwork-iwork+1, ierr )
2240 *
2241 * Generate right vectors bidiagonalizing A
2242 * (Workspace: need 4*M, prefer 3*M+M*NB)
2243 *
2244  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
2245  $ work( iwork ), lwork-iwork+1, ierr )
2246  iwork = ie + m
2247 *
2248 * Perform bidiagonal QR iteration, computing right
2249 * singular vectors of A in A
2250 * (Workspace: need BDSPAC)
2251 *
2252  CALL sbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2253  $ dum, 1, dum, 1, work( iwork ), info )
2254 *
2255  END IF
2256 *
2257  ELSE IF( wntvo .AND. wntuas ) THEN
2258 *
2259 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2260 * M right singular vectors to be overwritten on A and
2261 * M left singular vectors to be computed in U
2262 *
2263  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2264 *
2265 * Sufficient workspace for a fast algorithm
2266 *
2267  ir = 1
2268  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2269 *
2270 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2271 *
2272  ldwrku = lda
2273  chunk = n
2274  ldwrkr = lda
2275  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2276 *
2277 * WORK(IU) is LDA by N and WORK(IR) is M by M
2278 *
2279  ldwrku = lda
2280  chunk = n
2281  ldwrkr = m
2282  ELSE
2283 *
2284 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2285 *
2286  ldwrku = m
2287  chunk = ( lwork-m*m-m ) / m
2288  ldwrkr = m
2289  END IF
2290  itau = ir + ldwrkr*m
2291  iwork = itau + m
2292 *
2293 * Compute A=L*Q
2294 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2295 *
2296  CALL sgelqf( m, n, a, lda, work( itau ),
2297  $ work( iwork ), lwork-iwork+1, ierr )
2298 *
2299 * Copy L to U, zeroing about above it
2300 *
2301  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2302  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2303  $ ldu )
2304 *
2305 * Generate Q in A
2306 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2307 *
2308  CALL sorglq( m, n, m, a, lda, work( itau ),
2309  $ work( iwork ), lwork-iwork+1, ierr )
2310  ie = itau
2311  itauq = ie + m
2312  itaup = itauq + m
2313  iwork = itaup + m
2314 *
2315 * Bidiagonalize L in U, copying result to WORK(IR)
2316 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2317 *
2318  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2319  $ work( itauq ), work( itaup ),
2320  $ work( iwork ), lwork-iwork+1, ierr )
2321  CALL slacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2322 *
2323 * Generate right vectors bidiagonalizing L in WORK(IR)
2324 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2325 *
2326  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2327  $ work( itaup ), work( iwork ),
2328  $ lwork-iwork+1, ierr )
2329 *
2330 * Generate left vectors bidiagonalizing L in U
2331 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2332 *
2333  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2334  $ work( iwork ), lwork-iwork+1, ierr )
2335  iwork = ie + m
2336 *
2337 * Perform bidiagonal QR iteration, computing left
2338 * singular vectors of L in U, and computing right
2339 * singular vectors of L in WORK(IR)
2340 * (Workspace: need M*M+BDSPAC)
2341 *
2342  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2343  $ work( ir ), ldwrkr, u, ldu, dum, 1,
2344  $ work( iwork ), info )
2345  iu = ie + m
2346 *
2347 * Multiply right singular vectors of L in WORK(IR) by Q
2348 * in A, storing result in WORK(IU) and copying to A
2349 * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2350 *
2351  DO 40 i = 1, n, chunk
2352  blk = min( n-i+1, chunk )
2353  CALL sgemm( 'N', 'N', m, blk, m, one, work( ir ),
2354  $ ldwrkr, a( 1, i ), lda, zero,
2355  $ work( iu ), ldwrku )
2356  CALL slacpy( 'F', m, blk, work( iu ), ldwrku,
2357  $ a( 1, i ), lda )
2358  40 CONTINUE
2359 *
2360  ELSE
2361 *
2362 * Insufficient workspace for a fast algorithm
2363 *
2364  itau = 1
2365  iwork = itau + m
2366 *
2367 * Compute A=L*Q
2368 * (Workspace: need 2*M, prefer M+M*NB)
2369 *
2370  CALL sgelqf( m, n, a, lda, work( itau ),
2371  $ work( iwork ), lwork-iwork+1, ierr )
2372 *
2373 * Copy L to U, zeroing out above it
2374 *
2375  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2376  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2377  $ ldu )
2378 *
2379 * Generate Q in A
2380 * (Workspace: need 2*M, prefer M+M*NB)
2381 *
2382  CALL sorglq( m, n, m, a, lda, work( itau ),
2383  $ work( iwork ), lwork-iwork+1, ierr )
2384  ie = itau
2385  itauq = ie + m
2386  itaup = itauq + m
2387  iwork = itaup + m
2388 *
2389 * Bidiagonalize L in U
2390 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2391 *
2392  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2393  $ work( itauq ), work( itaup ),
2394  $ work( iwork ), lwork-iwork+1, ierr )
2395 *
2396 * Multiply right vectors bidiagonalizing L by Q in A
2397 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2398 *
2399  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2400  $ work( itaup ), a, lda, work( iwork ),
2401  $ lwork-iwork+1, ierr )
2402 *
2403 * Generate left vectors bidiagonalizing L in U
2404 * (Workspace: need 4*M, prefer 3*M+M*NB)
2405 *
2406  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2407  $ work( iwork ), lwork-iwork+1, ierr )
2408  iwork = ie + m
2409 *
2410 * Perform bidiagonal QR iteration, computing left
2411 * singular vectors of A in U and computing right
2412 * singular vectors of A in A
2413 * (Workspace: need BDSPAC)
2414 *
2415  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2416  $ u, ldu, dum, 1, work( iwork ), info )
2417 *
2418  END IF
2419 *
2420  ELSE IF( wntvs ) THEN
2421 *
2422  IF( wntun ) THEN
2423 *
2424 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2425 * M right singular vectors to be computed in VT and
2426 * no left singular vectors to be computed
2427 *
2428  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2429 *
2430 * Sufficient workspace for a fast algorithm
2431 *
2432  ir = 1
2433  IF( lwork.GE.wrkbl+lda*m ) THEN
2434 *
2435 * WORK(IR) is LDA by M
2436 *
2437  ldwrkr = lda
2438  ELSE
2439 *
2440 * WORK(IR) is M by M
2441 *
2442  ldwrkr = m
2443  END IF
2444  itau = ir + ldwrkr*m
2445  iwork = itau + m
2446 *
2447 * Compute A=L*Q
2448 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2449 *
2450  CALL sgelqf( m, n, a, lda, work( itau ),
2451  $ work( iwork ), lwork-iwork+1, ierr )
2452 *
2453 * Copy L to WORK(IR), zeroing out above it
2454 *
2455  CALL slacpy( 'L', m, m, a, lda, work( ir ),
2456  $ ldwrkr )
2457  CALL slaset( 'U', m-1, m-1, zero, zero,
2458  $ work( ir+ldwrkr ), ldwrkr )
2459 *
2460 * Generate Q in A
2461 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2462 *
2463  CALL sorglq( m, n, m, a, lda, work( itau ),
2464  $ work( iwork ), lwork-iwork+1, ierr )
2465  ie = itau
2466  itauq = ie + m
2467  itaup = itauq + m
2468  iwork = itaup + m
2469 *
2470 * Bidiagonalize L in WORK(IR)
2471 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2472 *
2473  CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2474  $ work( ie ), work( itauq ),
2475  $ work( itaup ), work( iwork ),
2476  $ lwork-iwork+1, ierr )
2477 *
2478 * Generate right vectors bidiagonalizing L in
2479 * WORK(IR)
2480 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2481 *
2482  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2483  $ work( itaup ), work( iwork ),
2484  $ lwork-iwork+1, ierr )
2485  iwork = ie + m
2486 *
2487 * Perform bidiagonal QR iteration, computing right
2488 * singular vectors of L in WORK(IR)
2489 * (Workspace: need M*M+BDSPAC)
2490 *
2491  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2492  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2493  $ work( iwork ), info )
2494 *
2495 * Multiply right singular vectors of L in WORK(IR) by
2496 * Q in A, storing result in VT
2497 * (Workspace: need M*M)
2498 *
2499  CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2500  $ ldwrkr, a, lda, zero, vt, ldvt )
2501 *
2502  ELSE
2503 *
2504 * Insufficient workspace for a fast algorithm
2505 *
2506  itau = 1
2507  iwork = itau + m
2508 *
2509 * Compute A=L*Q
2510 * (Workspace: need 2*M, prefer M+M*NB)
2511 *
2512  CALL sgelqf( m, n, a, lda, work( itau ),
2513  $ work( iwork ), lwork-iwork+1, ierr )
2514 *
2515 * Copy result to VT
2516 *
2517  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2518 *
2519 * Generate Q in VT
2520 * (Workspace: need 2*M, prefer M+M*NB)
2521 *
2522  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2523  $ work( iwork ), lwork-iwork+1, ierr )
2524  ie = itau
2525  itauq = ie + m
2526  itaup = itauq + m
2527  iwork = itaup + m
2528 *
2529 * Zero out above L in A
2530 *
2531  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2532  $ lda )
2533 *
2534 * Bidiagonalize L in A
2535 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2536 *
2537  CALL sgebrd( m, m, a, lda, s, work( ie ),
2538  $ work( itauq ), work( itaup ),
2539  $ work( iwork ), lwork-iwork+1, ierr )
2540 *
2541 * Multiply right vectors bidiagonalizing L by Q in VT
2542 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2543 *
2544  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2545  $ work( itaup ), vt, ldvt,
2546  $ work( iwork ), lwork-iwork+1, ierr )
2547  iwork = ie + m
2548 *
2549 * Perform bidiagonal QR iteration, computing right
2550 * singular vectors of A in VT
2551 * (Workspace: need BDSPAC)
2552 *
2553  CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2554  $ ldvt, dum, 1, dum, 1, work( iwork ),
2555  $ info )
2556 *
2557  END IF
2558 *
2559  ELSE IF( wntuo ) THEN
2560 *
2561 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2562 * M right singular vectors to be computed in VT and
2563 * M left singular vectors to be overwritten on A
2564 *
2565  IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2566 *
2567 * Sufficient workspace for a fast algorithm
2568 *
2569  iu = 1
2570  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2571 *
2572 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2573 *
2574  ldwrku = lda
2575  ir = iu + ldwrku*m
2576  ldwrkr = lda
2577  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2578 *
2579 * WORK(IU) is LDA by M and WORK(IR) is M by M
2580 *
2581  ldwrku = lda
2582  ir = iu + ldwrku*m
2583  ldwrkr = m
2584  ELSE
2585 *
2586 * WORK(IU) is M by M and WORK(IR) is M by M
2587 *
2588  ldwrku = m
2589  ir = iu + ldwrku*m
2590  ldwrkr = m
2591  END IF
2592  itau = ir + ldwrkr*m
2593  iwork = itau + m
2594 *
2595 * Compute A=L*Q
2596 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2597 *
2598  CALL sgelqf( m, n, a, lda, work( itau ),
2599  $ work( iwork ), lwork-iwork+1, ierr )
2600 *
2601 * Copy L to WORK(IU), zeroing out below it
2602 *
2603  CALL slacpy( 'L', m, m, a, lda, work( iu ),
2604  $ ldwrku )
2605  CALL slaset( 'U', m-1, m-1, zero, zero,
2606  $ work( iu+ldwrku ), ldwrku )
2607 *
2608 * Generate Q in A
2609 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2610 *
2611  CALL sorglq( m, n, m, a, lda, work( itau ),
2612  $ work( iwork ), lwork-iwork+1, ierr )
2613  ie = itau
2614  itauq = ie + m
2615  itaup = itauq + m
2616  iwork = itaup + m
2617 *
2618 * Bidiagonalize L in WORK(IU), copying result to
2619 * WORK(IR)
2620 * (Workspace: need 2*M*M+4*M,
2621 * prefer 2*M*M+3*M+2*M*NB)
2622 *
2623  CALL sgebrd( m, m, work( iu ), ldwrku, s,
2624  $ work( ie ), work( itauq ),
2625  $ work( itaup ), work( iwork ),
2626  $ lwork-iwork+1, ierr )
2627  CALL slacpy( 'L', m, m, work( iu ), ldwrku,
2628  $ work( ir ), ldwrkr )
2629 *
2630 * Generate right bidiagonalizing vectors in WORK(IU)
2631 * (Workspace: need 2*M*M+4*M-1,
2632 * prefer 2*M*M+3*M+(M-1)*NB)
2633 *
2634  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2635  $ work( itaup ), work( iwork ),
2636  $ lwork-iwork+1, ierr )
2637 *
2638 * Generate left bidiagonalizing vectors in WORK(IR)
2639 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2640 *
2641  CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2642  $ work( itauq ), work( iwork ),
2643  $ lwork-iwork+1, ierr )
2644  iwork = ie + m
2645 *
2646 * Perform bidiagonal QR iteration, computing left
2647 * singular vectors of L in WORK(IR) and computing
2648 * right singular vectors of L in WORK(IU)
2649 * (Workspace: need 2*M*M+BDSPAC)
2650 *
2651  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2652  $ work( iu ), ldwrku, work( ir ),
2653  $ ldwrkr, dum, 1, work( iwork ), info )
2654 *
2655 * Multiply right singular vectors of L in WORK(IU) by
2656 * Q in A, storing result in VT
2657 * (Workspace: need M*M)
2658 *
2659  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2660  $ ldwrku, a, lda, zero, vt, ldvt )
2661 *
2662 * Copy left singular vectors of L to A
2663 * (Workspace: need M*M)
2664 *
2665  CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
2666  $ lda )
2667 *
2668  ELSE
2669 *
2670 * Insufficient workspace for a fast algorithm
2671 *
2672  itau = 1
2673  iwork = itau + m
2674 *
2675 * Compute A=L*Q, copying result to VT
2676 * (Workspace: need 2*M, prefer M+M*NB)
2677 *
2678  CALL sgelqf( m, n, a, lda, work( itau ),
2679  $ work( iwork ), lwork-iwork+1, ierr )
2680  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2681 *
2682 * Generate Q in VT
2683 * (Workspace: need 2*M, prefer M+M*NB)
2684 *
2685  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2686  $ work( iwork ), lwork-iwork+1, ierr )
2687  ie = itau
2688  itauq = ie + m
2689  itaup = itauq + m
2690  iwork = itaup + m
2691 *
2692 * Zero out above L in A
2693 *
2694  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2695  $ lda )
2696 *
2697 * Bidiagonalize L in A
2698 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2699 *
2700  CALL sgebrd( m, m, a, lda, s, work( ie ),
2701  $ work( itauq ), work( itaup ),
2702  $ work( iwork ), lwork-iwork+1, ierr )
2703 *
2704 * Multiply right vectors bidiagonalizing L by Q in VT
2705 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2706 *
2707  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
2708  $ work( itaup ), vt, ldvt,
2709  $ work( iwork ), lwork-iwork+1, ierr )
2710 *
2711 * Generate left bidiagonalizing vectors of L in A
2712 * (Workspace: need 4*M, prefer 3*M+M*NB)
2713 *
2714  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2715  $ work( iwork ), lwork-iwork+1, ierr )
2716  iwork = ie + m
2717 *
2718 * Perform bidiagonal QR iteration, compute left
2719 * singular vectors of A in A and compute right
2720 * singular vectors of A in VT
2721 * (Workspace: need BDSPAC)
2722 *
2723  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2724  $ ldvt, a, lda, dum, 1, work( iwork ),
2725  $ info )
2726 *
2727  END IF
2728 *
2729  ELSE IF( wntuas ) THEN
2730 *
2731 * Path 6t(N much larger than M, JOBU='S' or 'A',
2732 * JOBVT='S')
2733 * M right singular vectors to be computed in VT and
2734 * M left singular vectors to be computed in U
2735 *
2736  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2737 *
2738 * Sufficient workspace for a fast algorithm
2739 *
2740  iu = 1
2741  IF( lwork.GE.wrkbl+lda*m ) THEN
2742 *
2743 * WORK(IU) is LDA by N
2744 *
2745  ldwrku = lda
2746  ELSE
2747 *
2748 * WORK(IU) is LDA by M
2749 *
2750  ldwrku = m
2751  END IF
2752  itau = iu + ldwrku*m
2753  iwork = itau + m
2754 *
2755 * Compute A=L*Q
2756 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2757 *
2758  CALL sgelqf( m, n, a, lda, work( itau ),
2759  $ work( iwork ), lwork-iwork+1, ierr )
2760 *
2761 * Copy L to WORK(IU), zeroing out above it
2762 *
2763  CALL slacpy( 'L', m, m, a, lda, work( iu ),
2764  $ ldwrku )
2765  CALL slaset( 'U', m-1, m-1, zero, zero,
2766  $ work( iu+ldwrku ), ldwrku )
2767 *
2768 * Generate Q in A
2769 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2770 *
2771  CALL sorglq( m, n, m, a, lda, work( itau ),
2772  $ work( iwork ), lwork-iwork+1, ierr )
2773  ie = itau
2774  itauq = ie + m
2775  itaup = itauq + m
2776  iwork = itaup + m
2777 *
2778 * Bidiagonalize L in WORK(IU), copying result to U
2779 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2780 *
2781  CALL sgebrd( m, m, work( iu ), ldwrku, s,
2782  $ work( ie ), work( itauq ),
2783  $ work( itaup ), work( iwork ),
2784  $ lwork-iwork+1, ierr )
2785  CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
2786  $ ldu )
2787 *
2788 * Generate right bidiagonalizing vectors in WORK(IU)
2789 * (Workspace: need M*M+4*M-1,
2790 * prefer M*M+3*M+(M-1)*NB)
2791 *
2792  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
2793  $ work( itaup ), work( iwork ),
2794  $ lwork-iwork+1, ierr )
2795 *
2796 * Generate left bidiagonalizing vectors in U
2797 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2798 *
2799  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2800  $ work( iwork ), lwork-iwork+1, ierr )
2801  iwork = ie + m
2802 *
2803 * Perform bidiagonal QR iteration, computing left
2804 * singular vectors of L in U and computing right
2805 * singular vectors of L in WORK(IU)
2806 * (Workspace: need M*M+BDSPAC)
2807 *
2808  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
2809  $ work( iu ), ldwrku, u, ldu, dum, 1,
2810  $ work( iwork ), info )
2811 *
2812 * Multiply right singular vectors of L in WORK(IU) by
2813 * Q in A, storing result in VT
2814 * (Workspace: need M*M)
2815 *
2816  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
2817  $ ldwrku, a, lda, zero, vt, ldvt )
2818 *
2819  ELSE
2820 *
2821 * Insufficient workspace for a fast algorithm
2822 *
2823  itau = 1
2824  iwork = itau + m
2825 *
2826 * Compute A=L*Q, copying result to VT
2827 * (Workspace: need 2*M, prefer M+M*NB)
2828 *
2829  CALL sgelqf( m, n, a, lda, work( itau ),
2830  $ work( iwork ), lwork-iwork+1, ierr )
2831  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2832 *
2833 * Generate Q in VT
2834 * (Workspace: need 2*M, prefer M+M*NB)
2835 *
2836  CALL sorglq( m, n, m, vt, ldvt, work( itau ),
2837  $ work( iwork ), lwork-iwork+1, ierr )
2838 *
2839 * Copy L to U, zeroing out above it
2840 *
2841  CALL slacpy( 'L', m, m, a, lda, u, ldu )
2842  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2843  $ ldu )
2844  ie = itau
2845  itauq = ie + m
2846  itaup = itauq + m
2847  iwork = itaup + m
2848 *
2849 * Bidiagonalize L in U
2850 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2851 *
2852  CALL sgebrd( m, m, u, ldu, s, work( ie ),
2853  $ work( itauq ), work( itaup ),
2854  $ work( iwork ), lwork-iwork+1, ierr )
2855 *
2856 * Multiply right bidiagonalizing vectors in U by Q
2857 * in VT
2858 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2859 *
2860  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2861  $ work( itaup ), vt, ldvt,
2862  $ work( iwork ), lwork-iwork+1, ierr )
2863 *
2864 * Generate left bidiagonalizing vectors in U
2865 * (Workspace: need 4*M, prefer 3*M+M*NB)
2866 *
2867  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2868  $ work( iwork ), lwork-iwork+1, ierr )
2869  iwork = ie + m
2870 *
2871 * Perform bidiagonal QR iteration, computing left
2872 * singular vectors of A in U and computing right
2873 * singular vectors of A in VT
2874 * (Workspace: need BDSPAC)
2875 *
2876  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2877  $ ldvt, u, ldu, dum, 1, work( iwork ),
2878  $ info )
2879 *
2880  END IF
2881 *
2882  END IF
2883 *
2884  ELSE IF( wntva ) THEN
2885 *
2886  IF( wntun ) THEN
2887 *
2888 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2889 * N right singular vectors to be computed in VT and
2890 * no left singular vectors to be computed
2891 *
2892  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
2893 *
2894 * Sufficient workspace for a fast algorithm
2895 *
2896  ir = 1
2897  IF( lwork.GE.wrkbl+lda*m ) THEN
2898 *
2899 * WORK(IR) is LDA by M
2900 *
2901  ldwrkr = lda
2902  ELSE
2903 *
2904 * WORK(IR) is M by M
2905 *
2906  ldwrkr = m
2907  END IF
2908  itau = ir + ldwrkr*m
2909  iwork = itau + m
2910 *
2911 * Compute A=L*Q, copying result to VT
2912 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2913 *
2914  CALL sgelqf( m, n, a, lda, work( itau ),
2915  $ work( iwork ), lwork-iwork+1, ierr )
2916  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2917 *
2918 * Copy L to WORK(IR), zeroing out above it
2919 *
2920  CALL slacpy( 'L', m, m, a, lda, work( ir ),
2921  $ ldwrkr )
2922  CALL slaset( 'U', m-1, m-1, zero, zero,
2923  $ work( ir+ldwrkr ), ldwrkr )
2924 *
2925 * Generate Q in VT
2926 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2927 *
2928  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2929  $ work( iwork ), lwork-iwork+1, ierr )
2930  ie = itau
2931  itauq = ie + m
2932  itaup = itauq + m
2933  iwork = itaup + m
2934 *
2935 * Bidiagonalize L in WORK(IR)
2936 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2937 *
2938  CALL sgebrd( m, m, work( ir ), ldwrkr, s,
2939  $ work( ie ), work( itauq ),
2940  $ work( itaup ), work( iwork ),
2941  $ lwork-iwork+1, ierr )
2942 *
2943 * Generate right bidiagonalizing vectors in WORK(IR)
2944 * (Workspace: need M*M+4*M-1,
2945 * prefer M*M+3*M+(M-1)*NB)
2946 *
2947  CALL sorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2948  $ work( itaup ), work( iwork ),
2949  $ lwork-iwork+1, ierr )
2950  iwork = ie + m
2951 *
2952 * Perform bidiagonal QR iteration, computing right
2953 * singular vectors of L in WORK(IR)
2954 * (Workspace: need M*M+BDSPAC)
2955 *
2956  CALL sbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2957  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2958  $ work( iwork ), info )
2959 *
2960 * Multiply right singular vectors of L in WORK(IR) by
2961 * Q in VT, storing result in A
2962 * (Workspace: need M*M)
2963 *
2964  CALL sgemm( 'N', 'N', m, n, m, one, work( ir ),
2965  $ ldwrkr, vt, ldvt, zero, a, lda )
2966 *
2967 * Copy right singular vectors of A from A to VT
2968 *
2969  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
2970 *
2971  ELSE
2972 *
2973 * Insufficient workspace for a fast algorithm
2974 *
2975  itau = 1
2976  iwork = itau + m
2977 *
2978 * Compute A=L*Q, copying result to VT
2979 * (Workspace: need 2*M, prefer M+M*NB)
2980 *
2981  CALL sgelqf( m, n, a, lda, work( itau ),
2982  $ work( iwork ), lwork-iwork+1, ierr )
2983  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
2984 *
2985 * Generate Q in VT
2986 * (Workspace: need M+N, prefer M+N*NB)
2987 *
2988  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
2989  $ work( iwork ), lwork-iwork+1, ierr )
2990  ie = itau
2991  itauq = ie + m
2992  itaup = itauq + m
2993  iwork = itaup + m
2994 *
2995 * Zero out above L in A
2996 *
2997  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2998  $ lda )
2999 *
3000 * Bidiagonalize L in A
3001 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3002 *
3003  CALL sgebrd( m, m, a, lda, s, work( ie ),
3004  $ work( itauq ), work( itaup ),
3005  $ work( iwork ), lwork-iwork+1, ierr )
3006 *
3007 * Multiply right bidiagonalizing vectors in A by Q
3008 * in VT
3009 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3010 *
3011  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3012  $ work( itaup ), vt, ldvt,
3013  $ work( iwork ), lwork-iwork+1, ierr )
3014  iwork = ie + m
3015 *
3016 * Perform bidiagonal QR iteration, computing right
3017 * singular vectors of A in VT
3018 * (Workspace: need BDSPAC)
3019 *
3020  CALL sbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3021  $ ldvt, dum, 1, dum, 1, work( iwork ),
3022  $ info )
3023 *
3024  END IF
3025 *
3026  ELSE IF( wntuo ) THEN
3027 *
3028 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3029 * N right singular vectors to be computed in VT and
3030 * M left singular vectors to be overwritten on A
3031 *
3032  IF( lwork.GE.2*m*m+max( n+m, 4*m, bdspac ) ) THEN
3033 *
3034 * Sufficient workspace for a fast algorithm
3035 *
3036  iu = 1
3037  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3038 *
3039 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3040 *
3041  ldwrku = lda
3042  ir = iu + ldwrku*m
3043  ldwrkr = lda
3044  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3045 *
3046 * WORK(IU) is LDA by M and WORK(IR) is M by M
3047 *
3048  ldwrku = lda
3049  ir = iu + ldwrku*m
3050  ldwrkr = m
3051  ELSE
3052 *
3053 * WORK(IU) is M by M and WORK(IR) is M by M
3054 *
3055  ldwrku = m
3056  ir = iu + ldwrku*m
3057  ldwrkr = m
3058  END IF
3059  itau = ir + ldwrkr*m
3060  iwork = itau + m
3061 *
3062 * Compute A=L*Q, copying result to VT
3063 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3064 *
3065  CALL sgelqf( m, n, a, lda, work( itau ),
3066  $ work( iwork ), lwork-iwork+1, ierr )
3067  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3068 *
3069 * Generate Q in VT
3070 * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3071 *
3072  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3073  $ work( iwork ), lwork-iwork+1, ierr )
3074 *
3075 * Copy L to WORK(IU), zeroing out above it
3076 *
3077  CALL slacpy( 'L', m, m, a, lda, work( iu ),
3078  $ ldwrku )
3079  CALL slaset( 'U', m-1, m-1, zero, zero,
3080  $ work( iu+ldwrku ), ldwrku )
3081  ie = itau
3082  itauq = ie + m
3083  itaup = itauq + m
3084  iwork = itaup + m
3085 *
3086 * Bidiagonalize L in WORK(IU), copying result to
3087 * WORK(IR)
3088 * (Workspace: need 2*M*M+4*M,
3089 * prefer 2*M*M+3*M+2*M*NB)
3090 *
3091  CALL sgebrd( m, m, work( iu ), ldwrku, s,
3092  $ work( ie ), work( itauq ),
3093  $ work( itaup ), work( iwork ),
3094  $ lwork-iwork+1, ierr )
3095  CALL slacpy( 'L', m, m, work( iu ), ldwrku,
3096  $ work( ir ), ldwrkr )
3097 *
3098 * Generate right bidiagonalizing vectors in WORK(IU)
3099 * (Workspace: need 2*M*M+4*M-1,
3100 * prefer 2*M*M+3*M+(M-1)*NB)
3101 *
3102  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3103  $ work( itaup ), work( iwork ),
3104  $ lwork-iwork+1, ierr )
3105 *
3106 * Generate left bidiagonalizing vectors in WORK(IR)
3107 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3108 *
3109  CALL sorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3110  $ work( itauq ), work( iwork ),
3111  $ lwork-iwork+1, ierr )
3112  iwork = ie + m
3113 *
3114 * Perform bidiagonal QR iteration, computing left
3115 * singular vectors of L in WORK(IR) and computing
3116 * right singular vectors of L in WORK(IU)
3117 * (Workspace: need 2*M*M+BDSPAC)
3118 *
3119  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3120  $ work( iu ), ldwrku, work( ir ),
3121  $ ldwrkr, dum, 1, work( iwork ), info )
3122 *
3123 * Multiply right singular vectors of L in WORK(IU) by
3124 * Q in VT, storing result in A
3125 * (Workspace: need M*M)
3126 *
3127  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3128  $ ldwrku, vt, ldvt, zero, a, lda )
3129 *
3130 * Copy right singular vectors of A from A to VT
3131 *
3132  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3133 *
3134 * Copy left singular vectors of A from WORK(IR) to A
3135 *
3136  CALL slacpy( 'F', m, m, work( ir ), ldwrkr, a,
3137  $ lda )
3138 *
3139  ELSE
3140 *
3141 * Insufficient workspace for a fast algorithm
3142 *
3143  itau = 1
3144  iwork = itau + m
3145 *
3146 * Compute A=L*Q, copying result to VT
3147 * (Workspace: need 2*M, prefer M+M*NB)
3148 *
3149  CALL sgelqf( m, n, a, lda, work( itau ),
3150  $ work( iwork ), lwork-iwork+1, ierr )
3151  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3152 *
3153 * Generate Q in VT
3154 * (Workspace: need M+N, prefer M+N*NB)
3155 *
3156  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3157  $ work( iwork ), lwork-iwork+1, ierr )
3158  ie = itau
3159  itauq = ie + m
3160  itaup = itauq + m
3161  iwork = itaup + m
3162 *
3163 * Zero out above L in A
3164 *
3165  CALL slaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3166  $ lda )
3167 *
3168 * Bidiagonalize L in A
3169 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3170 *
3171  CALL sgebrd( m, m, a, lda, s, work( ie ),
3172  $ work( itauq ), work( itaup ),
3173  $ work( iwork ), lwork-iwork+1, ierr )
3174 *
3175 * Multiply right bidiagonalizing vectors in A by Q
3176 * in VT
3177 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3178 *
3179  CALL sormbr( 'P', 'L', 'T', m, n, m, a, lda,
3180  $ work( itaup ), vt, ldvt,
3181  $ work( iwork ), lwork-iwork+1, ierr )
3182 *
3183 * Generate left bidiagonalizing vectors in A
3184 * (Workspace: need 4*M, prefer 3*M+M*NB)
3185 *
3186  CALL sorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3187  $ work( iwork ), lwork-iwork+1, ierr )
3188  iwork = ie + m
3189 *
3190 * Perform bidiagonal QR iteration, computing left
3191 * singular vectors of A in A and computing right
3192 * singular vectors of A in VT
3193 * (Workspace: need BDSPAC)
3194 *
3195  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3196  $ ldvt, a, lda, dum, 1, work( iwork ),
3197  $ info )
3198 *
3199  END IF
3200 *
3201  ELSE IF( wntuas ) THEN
3202 *
3203 * Path 9t(N much larger than M, JOBU='S' or 'A',
3204 * JOBVT='A')
3205 * N right singular vectors to be computed in VT and
3206 * M left singular vectors to be computed in U
3207 *
3208  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
3209 *
3210 * Sufficient workspace for a fast algorithm
3211 *
3212  iu = 1
3213  IF( lwork.GE.wrkbl+lda*m ) THEN
3214 *
3215 * WORK(IU) is LDA by M
3216 *
3217  ldwrku = lda
3218  ELSE
3219 *
3220 * WORK(IU) is M by M
3221 *
3222  ldwrku = m
3223  END IF
3224  itau = iu + ldwrku*m
3225  iwork = itau + m
3226 *
3227 * Compute A=L*Q, copying result to VT
3228 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3229 *
3230  CALL sgelqf( m, n, a, lda, work( itau ),
3231  $ work( iwork ), lwork-iwork+1, ierr )
3232  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3233 *
3234 * Generate Q in VT
3235 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3236 *
3237  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3238  $ work( iwork ), lwork-iwork+1, ierr )
3239 *
3240 * Copy L to WORK(IU), zeroing out above it
3241 *
3242  CALL slacpy( 'L', m, m, a, lda, work( iu ),
3243  $ ldwrku )
3244  CALL slaset( 'U', m-1, m-1, zero, zero,
3245  $ work( iu+ldwrku ), ldwrku )
3246  ie = itau
3247  itauq = ie + m
3248  itaup = itauq + m
3249  iwork = itaup + m
3250 *
3251 * Bidiagonalize L in WORK(IU), copying result to U
3252 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3253 *
3254  CALL sgebrd( m, m, work( iu ), ldwrku, s,
3255  $ work( ie ), work( itauq ),
3256  $ work( itaup ), work( iwork ),
3257  $ lwork-iwork+1, ierr )
3258  CALL slacpy( 'L', m, m, work( iu ), ldwrku, u,
3259  $ ldu )
3260 *
3261 * Generate right bidiagonalizing vectors in WORK(IU)
3262 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3263 *
3264  CALL sorgbr( 'P', m, m, m, work( iu ), ldwrku,
3265  $ work( itaup ), work( iwork ),
3266  $ lwork-iwork+1, ierr )
3267 *
3268 * Generate left bidiagonalizing vectors in U
3269 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3270 *
3271  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3272  $ work( iwork ), lwork-iwork+1, ierr )
3273  iwork = ie + m
3274 *
3275 * Perform bidiagonal QR iteration, computing left
3276 * singular vectors of L in U and computing right
3277 * singular vectors of L in WORK(IU)
3278 * (Workspace: need M*M+BDSPAC)
3279 *
3280  CALL sbdsqr( 'U', m, m, m, 0, s, work( ie ),
3281  $ work( iu ), ldwrku, u, ldu, dum, 1,
3282  $ work( iwork ), info )
3283 *
3284 * Multiply right singular vectors of L in WORK(IU) by
3285 * Q in VT, storing result in A
3286 * (Workspace: need M*M)
3287 *
3288  CALL sgemm( 'N', 'N', m, n, m, one, work( iu ),
3289  $ ldwrku, vt, ldvt, zero, a, lda )
3290 *
3291 * Copy right singular vectors of A from A to VT
3292 *
3293  CALL slacpy( 'F', m, n, a, lda, vt, ldvt )
3294 *
3295  ELSE
3296 *
3297 * Insufficient workspace for a fast algorithm
3298 *
3299  itau = 1
3300  iwork = itau + m
3301 *
3302 * Compute A=L*Q, copying result to VT
3303 * (Workspace: need 2*M, prefer M+M*NB)
3304 *
3305  CALL sgelqf( m, n, a, lda, work( itau ),
3306  $ work( iwork ), lwork-iwork+1, ierr )
3307  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3308 *
3309 * Generate Q in VT
3310 * (Workspace: need M+N, prefer M+N*NB)
3311 *
3312  CALL sorglq( n, n, m, vt, ldvt, work( itau ),
3313  $ work( iwork ), lwork-iwork+1, ierr )
3314 *
3315 * Copy L to U, zeroing out above it
3316 *
3317  CALL slacpy( 'L', m, m, a, lda, u, ldu )
3318  CALL slaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3319  $ ldu )
3320  ie = itau
3321  itauq = ie + m
3322  itaup = itauq + m
3323  iwork = itaup + m
3324 *
3325 * Bidiagonalize L in U
3326 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3327 *
3328  CALL sgebrd( m, m, u, ldu, s, work( ie ),
3329  $ work( itauq ), work( itaup ),
3330  $ work( iwork ), lwork-iwork+1, ierr )
3331 *
3332 * Multiply right bidiagonalizing vectors in U by Q
3333 * in VT
3334 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3335 *
3336  CALL sormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3337  $ work( itaup ), vt, ldvt,
3338  $ work( iwork ), lwork-iwork+1, ierr )
3339 *
3340 * Generate left bidiagonalizing vectors in U
3341 * (Workspace: need 4*M, prefer 3*M+M*NB)
3342 *
3343  CALL sorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3344  $ work( iwork ), lwork-iwork+1, ierr )
3345  iwork = ie + m
3346 *
3347 * Perform bidiagonal QR iteration, computing left
3348 * singular vectors of A in U and computing right
3349 * singular vectors of A in VT
3350 * (Workspace: need BDSPAC)
3351 *
3352  CALL sbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3353  $ ldvt, u, ldu, dum, 1, work( iwork ),
3354  $ info )
3355 *
3356  END IF
3357 *
3358  END IF
3359 *
3360  END IF
3361 *
3362  ELSE
3363 *
3364 * N .LT. MNTHR
3365 *
3366 * Path 10t(N greater than M, but not much larger)
3367 * Reduce to bidiagonal form without LQ decomposition
3368 *
3369  ie = 1
3370  itauq = ie + m
3371  itaup = itauq + m
3372  iwork = itaup + m
3373 *
3374 * Bidiagonalize A
3375 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3376 *
3377  CALL sgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3378  $ work( itaup ), work( iwork ), lwork-iwork+1,
3379  $ ierr )
3380  IF( wntuas ) THEN
3381 *
3382 * If left singular vectors desired in U, copy result to U
3383 * and generate left bidiagonalizing vectors in U
3384 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3385 *
3386  CALL slacpy( 'L', m, m, a, lda, u, ldu )
3387  CALL sorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3388  $ work( iwork ), lwork-iwork+1, ierr )
3389  END IF
3390  IF( wntvas ) THEN
3391 *
3392 * If right singular vectors desired in VT, copy result to
3393 * VT and generate right bidiagonalizing vectors in VT
3394 * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3395 *
3396  CALL slacpy( 'U', m, n, a, lda, vt, ldvt )
3397  IF( wntva )
3398  $ nrvt = n
3399  IF( wntvs )
3400  $ nrvt = m
3401  CALL sorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3402  $ work( iwork ), lwork-iwork+1, ierr )
3403  END IF
3404  IF( wntuo ) THEN
3405 *
3406 * If left singular vectors desired in A, generate left
3407 * bidiagonalizing vectors in A
3408 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3409 *
3410  CALL sorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3411  $ work( iwork ), lwork-iwork+1, ierr )
3412  END IF
3413  IF( wntvo ) THEN
3414 *
3415 * If right singular vectors desired in A, generate right
3416 * bidiagonalizing vectors in A
3417 * (Workspace: need 4*M, prefer 3*M+M*NB)
3418 *
3419  CALL sorgbr( 'P', m, n, m, a, lda, work( itaup ),
3420  $ work( iwork ), lwork-iwork+1, ierr )
3421  END IF
3422  iwork = ie + m
3423  IF( wntuas .OR. wntuo )
3424  $ nru = m
3425  IF( wntun )
3426  $ nru = 0
3427  IF( wntvas .OR. wntvo )
3428  $ ncvt = n
3429  IF( wntvn )
3430  $ ncvt = 0
3431  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3432 *
3433 * Perform bidiagonal QR iteration, if desired, computing
3434 * left singular vectors in U and computing right singular
3435 * vectors in VT
3436 * (Workspace: need BDSPAC)
3437 *
3438  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3439  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3440  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3441 *
3442 * Perform bidiagonal QR iteration, if desired, computing
3443 * left singular vectors in U and computing right singular
3444 * vectors in A
3445 * (Workspace: need BDSPAC)
3446 *
3447  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3448  $ u, ldu, dum, 1, work( iwork ), info )
3449  ELSE
3450 *
3451 * Perform bidiagonal QR iteration, if desired, computing
3452 * left singular vectors in A and computing right singular
3453 * vectors in VT
3454 * (Workspace: need BDSPAC)
3455 *
3456  CALL sbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3457  $ ldvt, a, lda, dum, 1, work( iwork ), info )
3458  END IF
3459 *
3460  END IF
3461 *
3462  END IF
3463 *
3464 * If SBDSQR failed to converge, copy unconverged superdiagonals
3465 * to WORK( 2:MINMN )
3466 *
3467  IF( info.NE.0 ) THEN
3468  IF( ie.GT.2 ) THEN
3469  DO 50 i = 1, minmn - 1
3470  work( i+1 ) = work( i+ie-1 )
3471  50 CONTINUE
3472  END IF
3473  IF( ie.LT.2 ) THEN
3474  DO 60 i = minmn - 1, 1, -1
3475  work( i+1 ) = work( i+ie-1 )
3476  60 CONTINUE
3477  END IF
3478  END IF
3479 *
3480 * Undo scaling if necessary
3481 *
3482  IF( iscl.EQ.1 ) THEN
3483  IF( anrm.GT.bignum )
3484  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3485  $ ierr )
3486  IF( info.NE.0 .AND. anrm.GT.bignum )
3487  $ CALL slascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3488  $ minmn, ierr )
3489  IF( anrm.LT.smlnum )
3490  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3491  $ ierr )
3492  IF( info.NE.0 .AND. anrm.LT.smlnum )
3493  $ CALL slascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3494  $ minmn, ierr )
3495  END IF
3496 *
3497 * Return optimal workspace in WORK(1)
3498 *
3499  work( 1 ) = maxwrk
3500 *
3501  RETURN
3502 *
3503 * End of SGESVD
3504 *
3505  END
subroutine sorglq(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGLQ
Definition: sorglq.f:129
subroutine slascl(TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition: slascl.f:145
subroutine sgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
SGEMM
Definition: sgemm.f:189
subroutine sgeqrf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGEQRF
Definition: sgeqrf.f:138
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:62
subroutine slacpy(UPLO, M, N, A, LDA, B, LDB)
SLACPY copies all or part of one two-dimensional array to another.
Definition: slacpy.f:105
subroutine slaset(UPLO, M, N, ALPHA, BETA, A, LDA)
SLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: slaset.f:112
subroutine sormbr(VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK, LWORK, INFO)
SORMBR
Definition: sormbr.f:198
subroutine sbdsqr(UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C, LDC, WORK, INFO)
SBDSQR
Definition: sbdsqr.f:232
subroutine sgebrd(M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK, INFO)
SGEBRD
Definition: sgebrd.f:207
subroutine sgesvd(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO)
SGESVD computes the singular value decomposition (SVD) for GE matrices
Definition: sgesvd.f:213
subroutine sorgqr(M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGQR
Definition: sorgqr.f:130
subroutine sorgbr(VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO)
SORGBR
Definition: sorgbr.f:159
subroutine sgelqf(M, N, A, LDA, TAU, WORK, LWORK, INFO)
SGELQF
Definition: sgelqf.f:137