LAPACK  3.4.2
LAPACK: Linear Algebra PACKage
 All Files Functions Groups
dgesvd.f
Go to the documentation of this file.
1 *> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
30 * $ VT( LDVT, * ), WORK( * )
31 * ..
32 *
33 *
34 *> \par Purpose:
35 * =============
36 *>
37 *> \verbatim
38 *>
39 *> DGESVD computes the singular value decomposition (SVD) of a real
40 *> M-by-N matrix A, optionally computing the left and/or right singular
41 *> vectors. The SVD is written
42 *>
43 *> A = U * SIGMA * transpose(V)
44 *>
45 *> where SIGMA is an M-by-N matrix which is zero except for its
46 *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
47 *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
48 *> are the singular values of A; they are real and non-negative, and
49 *> are returned in descending order. The first min(m,n) columns of
50 *> U and V are the left and right singular vectors of A.
51 *>
52 *> Note that the routine returns V**T, not V.
53 *> \endverbatim
54 *
55 * Arguments:
56 * ==========
57 *
58 *> \param[in] JOBU
59 *> \verbatim
60 *> JOBU is CHARACTER*1
61 *> Specifies options for computing all or part of the matrix U:
62 *> = 'A': all M columns of U are returned in array U:
63 *> = 'S': the first min(m,n) columns of U (the left singular
64 *> vectors) are returned in the array U;
65 *> = 'O': the first min(m,n) columns of U (the left singular
66 *> vectors) are overwritten on the array A;
67 *> = 'N': no columns of U (no left singular vectors) are
68 *> computed.
69 *> \endverbatim
70 *>
71 *> \param[in] JOBVT
72 *> \verbatim
73 *> JOBVT is CHARACTER*1
74 *> Specifies options for computing all or part of the matrix
75 *> V**T:
76 *> = 'A': all N rows of V**T are returned in the array VT;
77 *> = 'S': the first min(m,n) rows of V**T (the right singular
78 *> vectors) are returned in the array VT;
79 *> = 'O': the first min(m,n) rows of V**T (the right singular
80 *> vectors) are overwritten on the array A;
81 *> = 'N': no rows of V**T (no right singular vectors) are
82 *> computed.
83 *>
84 *> JOBVT and JOBU cannot both be 'O'.
85 *> \endverbatim
86 *>
87 *> \param[in] M
88 *> \verbatim
89 *> M is INTEGER
90 *> The number of rows of the input matrix A. M >= 0.
91 *> \endverbatim
92 *>
93 *> \param[in] N
94 *> \verbatim
95 *> N is INTEGER
96 *> The number of columns of the input matrix A. N >= 0.
97 *> \endverbatim
98 *>
99 *> \param[in,out] A
100 *> \verbatim
101 *> A is DOUBLE PRECISION array, dimension (LDA,N)
102 *> On entry, the M-by-N matrix A.
103 *> On exit,
104 *> if JOBU = 'O', A is overwritten with the first min(m,n)
105 *> columns of U (the left singular vectors,
106 *> stored columnwise);
107 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
108 *> rows of V**T (the right singular vectors,
109 *> stored rowwise);
110 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
111 *> are destroyed.
112 *> \endverbatim
113 *>
114 *> \param[in] LDA
115 *> \verbatim
116 *> LDA is INTEGER
117 *> The leading dimension of the array A. LDA >= max(1,M).
118 *> \endverbatim
119 *>
120 *> \param[out] S
121 *> \verbatim
122 *> S is DOUBLE PRECISION array, dimension (min(M,N))
123 *> The singular values of A, sorted so that S(i) >= S(i+1).
124 *> \endverbatim
125 *>
126 *> \param[out] U
127 *> \verbatim
128 *> U is DOUBLE PRECISION array, dimension (LDU,UCOL)
129 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
130 *> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
131 *> if JOBU = 'S', U contains the first min(m,n) columns of U
132 *> (the left singular vectors, stored columnwise);
133 *> if JOBU = 'N' or 'O', U is not referenced.
134 *> \endverbatim
135 *>
136 *> \param[in] LDU
137 *> \verbatim
138 *> LDU is INTEGER
139 *> The leading dimension of the array U. LDU >= 1; if
140 *> JOBU = 'S' or 'A', LDU >= M.
141 *> \endverbatim
142 *>
143 *> \param[out] VT
144 *> \verbatim
145 *> VT is DOUBLE PRECISION array, dimension (LDVT,N)
146 *> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
147 *> V**T;
148 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
149 *> V**T (the right singular vectors, stored rowwise);
150 *> if JOBVT = 'N' or 'O', VT is not referenced.
151 *> \endverbatim
152 *>
153 *> \param[in] LDVT
154 *> \verbatim
155 *> LDVT is INTEGER
156 *> The leading dimension of the array VT. LDVT >= 1; if
157 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
158 *> \endverbatim
159 *>
160 *> \param[out] WORK
161 *> \verbatim
162 *> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
163 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
164 *> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
165 *> superdiagonal elements of an upper bidiagonal matrix B
166 *> whose diagonal is in S (not necessarily sorted). B
167 *> satisfies A = U * B * VT, so it has the same singular values
168 *> as A, and singular vectors related by U and VT.
169 *> \endverbatim
170 *>
171 *> \param[in] LWORK
172 *> \verbatim
173 *> LWORK is INTEGER
174 *> The dimension of the array WORK.
175 *> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
176 *> - PATH 1 (M much larger than N, JOBU='N')
177 *> - PATH 1t (N much larger than M, JOBVT='N')
178 *> LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
179 *> For good performance, LWORK should generally be larger.
180 *>
181 *> If LWORK = -1, then a workspace query is assumed; the routine
182 *> only calculates the optimal size of the WORK array, returns
183 *> this value as the first entry of the WORK array, and no error
184 *> message related to LWORK is issued by XERBLA.
185 *> \endverbatim
186 *>
187 *> \param[out] INFO
188 *> \verbatim
189 *> INFO is INTEGER
190 *> = 0: successful exit.
191 *> < 0: if INFO = -i, the i-th argument had an illegal value.
192 *> > 0: if DBDSQR did not converge, INFO specifies how many
193 *> superdiagonals of an intermediate bidiagonal form B
194 *> did not converge to zero. See the description of WORK
195 *> above for details.
196 *> \endverbatim
197 *
198 * Authors:
199 * ========
200 *
201 *> \author Univ. of Tennessee
202 *> \author Univ. of California Berkeley
203 *> \author Univ. of Colorado Denver
204 *> \author NAG Ltd.
205 *
206 *> \date April 2012
207 *
208 *> \ingroup doubleGEsing
209 *
210 * =====================================================================
211  SUBROUTINE dgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
212  $ vt, ldvt, work, lwork, info )
213 *
214 * -- LAPACK driver routine (version 3.4.1) --
215 * -- LAPACK is a software package provided by Univ. of Tennessee, --
216 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
217 * April 2012
218 *
219 * .. Scalar Arguments ..
220  CHARACTER jobu, jobvt
221  INTEGER info, lda, ldu, ldvt, lwork, m, n
222 * ..
223 * .. Array Arguments ..
224  DOUBLE PRECISION a( lda, * ), s( * ), u( ldu, * ),
225  $ vt( ldvt, * ), work( * )
226 * ..
227 *
228 * =====================================================================
229 *
230 * .. Parameters ..
231  DOUBLE PRECISION zero, one
232  parameter( zero = 0.0d0, one = 1.0d0 )
233 * ..
234 * .. Local Scalars ..
235  LOGICAL lquery, wntua, wntuas, wntun, wntuo, wntus,
236  $ wntva, wntvas, wntvn, wntvo, wntvs
237  INTEGER bdspac, blk, chunk, i, ie, ierr, ir, iscl,
238  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
239  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
240  $ nrvt, wrkbl
241  INTEGER lwork_dgeqrf, lwork_dorgqr_n, lwork_dorgqr_m,
242  $ lwork_dgebrd, lwork_dorgbr_p, lwork_dorgbr_q,
243  $ lwork_dgelqf, lwork_dorglq_n, lwork_dorglq_m
244  DOUBLE PRECISION anrm, bignum, eps, smlnum
245 * ..
246 * .. Local Arrays ..
247  DOUBLE PRECISION dum( 1 )
248 * ..
249 * .. External Subroutines ..
250  EXTERNAL dbdsqr, dgebrd, dgelqf, dgemm, dgeqrf, dlacpy,
252  $ xerbla
253 * ..
254 * .. External Functions ..
255  LOGICAL lsame
256  INTEGER ilaenv
257  DOUBLE PRECISION dlamch, dlange
258  EXTERNAL lsame, ilaenv, dlamch, dlange
259 * ..
260 * .. Intrinsic Functions ..
261  INTRINSIC max, min, sqrt
262 * ..
263 * .. Executable Statements ..
264 *
265 * Test the input arguments
266 *
267  info = 0
268  minmn = min( m, n )
269  wntua = lsame( jobu, 'A' )
270  wntus = lsame( jobu, 'S' )
271  wntuas = wntua .OR. wntus
272  wntuo = lsame( jobu, 'O' )
273  wntun = lsame( jobu, 'N' )
274  wntva = lsame( jobvt, 'A' )
275  wntvs = lsame( jobvt, 'S' )
276  wntvas = wntva .OR. wntvs
277  wntvo = lsame( jobvt, 'O' )
278  wntvn = lsame( jobvt, 'N' )
279  lquery = ( lwork.EQ.-1 )
280 *
281  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
282  info = -1
283  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
284  $ ( wntvo .AND. wntuo ) ) THEN
285  info = -2
286  ELSE IF( m.LT.0 ) THEN
287  info = -3
288  ELSE IF( n.LT.0 ) THEN
289  info = -4
290  ELSE IF( lda.LT.max( 1, m ) ) THEN
291  info = -6
292  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
293  info = -9
294  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
295  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
296  info = -11
297  END IF
298 *
299 * Compute workspace
300 * (Note: Comments in the code beginning "Workspace:" describe the
301 * minimal amount of workspace needed at that point in the code,
302 * as well as the preferred amount for good performance.
303 * NB refers to the optimal block size for the immediately
304 * following subroutine, as returned by ILAENV.)
305 *
306  IF( info.EQ.0 ) THEN
307  minwrk = 1
308  maxwrk = 1
309  IF( m.GE.n .AND. minmn.GT.0 ) THEN
310 *
311 * Compute space needed for DBDSQR
312 *
313  mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
314  bdspac = 5*n
315 * Compute space needed for DGEQRF
316  CALL dgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
317  lwork_dgeqrf=dum(1)
318 * Compute space needed for DORGQR
319  CALL dorgqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
320  lwork_dorgqr_n=dum(1)
321  CALL dorgqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
322  lwork_dorgqr_m=dum(1)
323 * Compute space needed for DGEBRD
324  CALL dgebrd( n, n, a, lda, s, dum(1), dum(1),
325  $ dum(1), dum(1), -1, ierr )
326  lwork_dgebrd=dum(1)
327 * Compute space needed for DORGBR P
328  CALL dorgbr( 'P', n, n, n, a, lda, dum(1),
329  $ dum(1), -1, ierr )
330  lwork_dorgbr_p=dum(1)
331 * Compute space needed for DORGBR Q
332  CALL dorgbr( 'Q', n, n, n, a, lda, dum(1),
333  $ dum(1), -1, ierr )
334  lwork_dorgbr_q=dum(1)
335 *
336  IF( m.GE.mnthr ) THEN
337  IF( wntun ) THEN
338 *
339 * Path 1 (M much larger than N, JOBU='N')
340 *
341  maxwrk = n + lwork_dgeqrf
342  maxwrk = max( maxwrk, 3*n+lwork_dgebrd )
343  IF( wntvo .OR. wntvas )
344  $ maxwrk = max( maxwrk, 3*n+lwork_dorgbr_p )
345  maxwrk = max( maxwrk, bdspac )
346  minwrk = max( 4*n, bdspac )
347  ELSE IF( wntuo .AND. wntvn ) THEN
348 *
349 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
350 *
351  wrkbl = n + lwork_dgeqrf
352  wrkbl = max( wrkbl, n+lwork_dorgqr_n )
353  wrkbl = max( wrkbl, 3*n+lwork_dgebrd )
354  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_q )
355  wrkbl = max( wrkbl, bdspac )
356  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
357  minwrk = max( 3*n+m, bdspac )
358  ELSE IF( wntuo .AND. wntvas ) THEN
359 *
360 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
361 * 'A')
362 *
363  wrkbl = n + lwork_dgeqrf
364  wrkbl = max( wrkbl, n+lwork_dorgqr_n )
365  wrkbl = max( wrkbl, 3*n+lwork_dgebrd )
366  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_q )
367  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_p )
368  wrkbl = max( wrkbl, bdspac )
369  maxwrk = max( n*n+wrkbl, n*n+m*n+n )
370  minwrk = max( 3*n+m, bdspac )
371  ELSE IF( wntus .AND. wntvn ) THEN
372 *
373 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
374 *
375  wrkbl = n + lwork_dgeqrf
376  wrkbl = max( wrkbl, n+lwork_dorgqr_n )
377  wrkbl = max( wrkbl, 3*n+lwork_dgebrd )
378  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_q )
379  wrkbl = max( wrkbl, bdspac )
380  maxwrk = n*n + wrkbl
381  minwrk = max( 3*n+m, bdspac )
382  ELSE IF( wntus .AND. wntvo ) THEN
383 *
384 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
385 *
386  wrkbl = n + lwork_dgeqrf
387  wrkbl = max( wrkbl, n+lwork_dorgqr_n )
388  wrkbl = max( wrkbl, 3*n+lwork_dgebrd )
389  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_q )
390  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_p )
391  wrkbl = max( wrkbl, bdspac )
392  maxwrk = 2*n*n + wrkbl
393  minwrk = max( 3*n+m, bdspac )
394  ELSE IF( wntus .AND. wntvas ) THEN
395 *
396 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
397 * 'A')
398 *
399  wrkbl = n + lwork_dgeqrf
400  wrkbl = max( wrkbl, n+lwork_dorgqr_n )
401  wrkbl = max( wrkbl, 3*n+lwork_dgebrd )
402  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_q )
403  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_p )
404  wrkbl = max( wrkbl, bdspac )
405  maxwrk = n*n + wrkbl
406  minwrk = max( 3*n+m, bdspac )
407  ELSE IF( wntua .AND. wntvn ) THEN
408 *
409 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
410 *
411  wrkbl = n + lwork_dgeqrf
412  wrkbl = max( wrkbl, n+lwork_dorgqr_m )
413  wrkbl = max( wrkbl, 3*n+lwork_dgebrd )
414  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_q )
415  wrkbl = max( wrkbl, bdspac )
416  maxwrk = n*n + wrkbl
417  minwrk = max( 3*n+m, bdspac )
418  ELSE IF( wntua .AND. wntvo ) THEN
419 *
420 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421 *
422  wrkbl = n + lwork_dgeqrf
423  wrkbl = max( wrkbl, n+lwork_dorgqr_m )
424  wrkbl = max( wrkbl, 3*n+lwork_dgebrd )
425  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_q )
426  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_p )
427  wrkbl = max( wrkbl, bdspac )
428  maxwrk = 2*n*n + wrkbl
429  minwrk = max( 3*n+m, bdspac )
430  ELSE IF( wntua .AND. wntvas ) THEN
431 *
432 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
433 * 'A')
434 *
435  wrkbl = n + lwork_dgeqrf
436  wrkbl = max( wrkbl, n+lwork_dorgqr_m )
437  wrkbl = max( wrkbl, 3*n+lwork_dgebrd )
438  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_q )
439  wrkbl = max( wrkbl, 3*n+lwork_dorgbr_p )
440  wrkbl = max( wrkbl, bdspac )
441  maxwrk = n*n + wrkbl
442  minwrk = max( 3*n+m, bdspac )
443  END IF
444  ELSE
445 *
446 * Path 10 (M at least N, but not much larger)
447 *
448  CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
449  $ dum(1), dum(1), -1, ierr )
450  lwork_dgebrd=dum(1)
451  maxwrk = 3*n + lwork_dgebrd
452  IF( wntus .OR. wntuo ) THEN
453  CALL dorgbr( 'Q', m, n, n, a, lda, dum(1),
454  $ dum(1), -1, ierr )
455  lwork_dorgbr_q=dum(1)
456  maxwrk = max( maxwrk, 3*n+lwork_dorgbr_q )
457  END IF
458  IF( wntua ) THEN
459  CALL dorgbr( 'Q', m, m, n, a, lda, dum(1),
460  $ dum(1), -1, ierr )
461  lwork_dorgbr_q=dum(1)
462  maxwrk = max( maxwrk, 3*n+lwork_dorgbr_q )
463  END IF
464  IF( .NOT.wntvn ) THEN
465  maxwrk = max( maxwrk, 3*n+lwork_dorgbr_p )
466  END IF
467  maxwrk = max( maxwrk, bdspac )
468  minwrk = max( 3*n+m, bdspac )
469  END IF
470  ELSE IF( minmn.GT.0 ) THEN
471 *
472 * Compute space needed for DBDSQR
473 *
474  mnthr = ilaenv( 6, 'DGESVD', jobu // jobvt, m, n, 0, 0 )
475  bdspac = 5*m
476 * Compute space needed for DGELQF
477  CALL dgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
478  lwork_dgelqf=dum(1)
479 * Compute space needed for DORGLQ
480  CALL dorglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
481  lwork_dorglq_n=dum(1)
482  CALL dorglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
483  lwork_dorglq_m=dum(1)
484 * Compute space needed for DGEBRD
485  CALL dgebrd( m, m, a, lda, s, dum(1), dum(1),
486  $ dum(1), dum(1), -1, ierr )
487  lwork_dgebrd=dum(1)
488 * Compute space needed for DORGBR P
489  CALL dorgbr( 'P', m, m, m, a, n, dum(1),
490  $ dum(1), -1, ierr )
491  lwork_dorgbr_p=dum(1)
492 * Compute space needed for DORGBR Q
493  CALL dorgbr( 'Q', m, m, m, a, n, dum(1),
494  $ dum(1), -1, ierr )
495  lwork_dorgbr_q=dum(1)
496  IF( n.GE.mnthr ) THEN
497  IF( wntvn ) THEN
498 *
499 * Path 1t(N much larger than M, JOBVT='N')
500 *
501  maxwrk = m + lwork_dgelqf
502  maxwrk = max( maxwrk, 3*m+lwork_dgebrd )
503  IF( wntuo .OR. wntuas )
504  $ maxwrk = max( maxwrk, 3*m+lwork_dorgbr_q )
505  maxwrk = max( maxwrk, bdspac )
506  minwrk = max( 4*m, bdspac )
507  ELSE IF( wntvo .AND. wntun ) THEN
508 *
509 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
510 *
511  wrkbl = m + lwork_dgelqf
512  wrkbl = max( wrkbl, m+lwork_dorglq_m )
513  wrkbl = max( wrkbl, 3*m+lwork_dgebrd )
514  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_p )
515  wrkbl = max( wrkbl, bdspac )
516  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
517  minwrk = max( 3*m+n, bdspac )
518  ELSE IF( wntvo .AND. wntuas ) THEN
519 *
520 * Path 3t(N much larger than M, JOBU='S' or 'A',
521 * JOBVT='O')
522 *
523  wrkbl = m + lwork_dgelqf
524  wrkbl = max( wrkbl, m+lwork_dorglq_m )
525  wrkbl = max( wrkbl, 3*m+lwork_dgebrd )
526  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_p )
527  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_q )
528  wrkbl = max( wrkbl, bdspac )
529  maxwrk = max( m*m+wrkbl, m*m+m*n+m )
530  minwrk = max( 3*m+n, bdspac )
531  ELSE IF( wntvs .AND. wntun ) THEN
532 *
533 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
534 *
535  wrkbl = m + lwork_dgelqf
536  wrkbl = max( wrkbl, m+lwork_dorglq_m )
537  wrkbl = max( wrkbl, 3*m+lwork_dgebrd )
538  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_p )
539  wrkbl = max( wrkbl, bdspac )
540  maxwrk = m*m + wrkbl
541  minwrk = max( 3*m+n, bdspac )
542  ELSE IF( wntvs .AND. wntuo ) THEN
543 *
544 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
545 *
546  wrkbl = m + lwork_dgelqf
547  wrkbl = max( wrkbl, m+lwork_dorglq_m )
548  wrkbl = max( wrkbl, 3*m+lwork_dgebrd )
549  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_p )
550  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_q )
551  wrkbl = max( wrkbl, bdspac )
552  maxwrk = 2*m*m + wrkbl
553  minwrk = max( 3*m+n, bdspac )
554  ELSE IF( wntvs .AND. wntuas ) THEN
555 *
556 * Path 6t(N much larger than M, JOBU='S' or 'A',
557 * JOBVT='S')
558 *
559  wrkbl = m + lwork_dgelqf
560  wrkbl = max( wrkbl, m+lwork_dorglq_m )
561  wrkbl = max( wrkbl, 3*m+lwork_dgebrd )
562  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_p )
563  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_q )
564  wrkbl = max( wrkbl, bdspac )
565  maxwrk = m*m + wrkbl
566  minwrk = max( 3*m+n, bdspac )
567  ELSE IF( wntva .AND. wntun ) THEN
568 *
569 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
570 *
571  wrkbl = m + lwork_dgelqf
572  wrkbl = max( wrkbl, m+lwork_dorglq_n )
573  wrkbl = max( wrkbl, 3*m+lwork_dgebrd )
574  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_p )
575  wrkbl = max( wrkbl, bdspac )
576  maxwrk = m*m + wrkbl
577  minwrk = max( 3*m+n, bdspac )
578  ELSE IF( wntva .AND. wntuo ) THEN
579 *
580 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
581 *
582  wrkbl = m + lwork_dgelqf
583  wrkbl = max( wrkbl, m+lwork_dorglq_n )
584  wrkbl = max( wrkbl, 3*m+lwork_dgebrd )
585  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_p )
586  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_q )
587  wrkbl = max( wrkbl, bdspac )
588  maxwrk = 2*m*m + wrkbl
589  minwrk = max( 3*m+n, bdspac )
590  ELSE IF( wntva .AND. wntuas ) THEN
591 *
592 * Path 9t(N much larger than M, JOBU='S' or 'A',
593 * JOBVT='A')
594 *
595  wrkbl = m + lwork_dgelqf
596  wrkbl = max( wrkbl, m+lwork_dorglq_n )
597  wrkbl = max( wrkbl, 3*m+lwork_dgebrd )
598  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_p )
599  wrkbl = max( wrkbl, 3*m+lwork_dorgbr_q )
600  wrkbl = max( wrkbl, bdspac )
601  maxwrk = m*m + wrkbl
602  minwrk = max( 3*m+n, bdspac )
603  END IF
604  ELSE
605 *
606 * Path 10t(N greater than M, but not much larger)
607 *
608  CALL dgebrd( m, n, a, lda, s, dum(1), dum(1),
609  $ dum(1), dum(1), -1, ierr )
610  lwork_dgebrd=dum(1)
611  maxwrk = 3*m + lwork_dgebrd
612  IF( wntvs .OR. wntvo ) THEN
613 * Compute space needed for DORGBR P
614  CALL dorgbr( 'P', m, n, m, a, n, dum(1),
615  $ dum(1), -1, ierr )
616  lwork_dorgbr_p=dum(1)
617  maxwrk = max( maxwrk, 3*m+lwork_dorgbr_p )
618  END IF
619  IF( wntva ) THEN
620  CALL dorgbr( 'P', n, n, m, a, n, dum(1),
621  $ dum(1), -1, ierr )
622  lwork_dorgbr_p=dum(1)
623  maxwrk = max( maxwrk, 3*m+lwork_dorgbr_p )
624  END IF
625  IF( .NOT.wntun ) THEN
626  maxwrk = max( maxwrk, 3*m+lwork_dorgbr_q )
627  END IF
628  maxwrk = max( maxwrk, bdspac )
629  minwrk = max( 3*m+n, bdspac )
630  END IF
631  END IF
632  maxwrk = max( maxwrk, minwrk )
633  work( 1 ) = maxwrk
634 *
635  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
636  info = -13
637  END IF
638  END IF
639 *
640  IF( info.NE.0 ) THEN
641  CALL xerbla( 'DGESVD', -info )
642  return
643  ELSE IF( lquery ) THEN
644  return
645  END IF
646 *
647 * Quick return if possible
648 *
649  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
650  return
651  END IF
652 *
653 * Get machine constants
654 *
655  eps = dlamch( 'P' )
656  smlnum = sqrt( dlamch( 'S' ) ) / eps
657  bignum = one / smlnum
658 *
659 * Scale A if max element outside range [SMLNUM,BIGNUM]
660 *
661  anrm = dlange( 'M', m, n, a, lda, dum )
662  iscl = 0
663  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
664  iscl = 1
665  CALL dlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
666  ELSE IF( anrm.GT.bignum ) THEN
667  iscl = 1
668  CALL dlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
669  END IF
670 *
671  IF( m.GE.n ) THEN
672 *
673 * A has at least as many rows as columns. If A has sufficiently
674 * more rows than columns, first reduce using the QR
675 * decomposition (if sufficient workspace available)
676 *
677  IF( m.GE.mnthr ) THEN
678 *
679  IF( wntun ) THEN
680 *
681 * Path 1 (M much larger than N, JOBU='N')
682 * No left singular vectors to be computed
683 *
684  itau = 1
685  iwork = itau + n
686 *
687 * Compute A=Q*R
688 * (Workspace: need 2*N, prefer N+N*NB)
689 *
690  CALL dgeqrf( m, n, a, lda, work( itau ), work( iwork ),
691  $ lwork-iwork+1, ierr )
692 *
693 * Zero out below R
694 *
695  CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ), lda )
696  ie = 1
697  itauq = ie + n
698  itaup = itauq + n
699  iwork = itaup + n
700 *
701 * Bidiagonalize R in A
702 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
703 *
704  CALL dgebrd( n, n, a, lda, s, work( ie ), work( itauq ),
705  $ work( itaup ), work( iwork ), lwork-iwork+1,
706  $ ierr )
707  ncvt = 0
708  IF( wntvo .OR. wntvas ) THEN
709 *
710 * If right singular vectors desired, generate P'.
711 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
712 *
713  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
714  $ work( iwork ), lwork-iwork+1, ierr )
715  ncvt = n
716  END IF
717  iwork = ie + n
718 *
719 * Perform bidiagonal QR iteration, computing right
720 * singular vectors of A in A if desired
721 * (Workspace: need BDSPAC)
722 *
723  CALL dbdsqr( 'U', n, ncvt, 0, 0, s, work( ie ), a, lda,
724  $ dum, 1, dum, 1, work( iwork ), info )
725 *
726 * If right singular vectors desired in VT, copy them there
727 *
728  IF( wntvas )
729  $ CALL dlacpy( 'F', n, n, a, lda, vt, ldvt )
730 *
731  ELSE IF( wntuo .AND. wntvn ) THEN
732 *
733 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
734 * N left singular vectors to be overwritten on A and
735 * no right singular vectors to be computed
736 *
737  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
738 *
739 * Sufficient workspace for a fast algorithm
740 *
741  ir = 1
742  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
743 *
744 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
745 *
746  ldwrku = lda
747  ldwrkr = lda
748  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
749 *
750 * WORK(IU) is LDA by N, WORK(IR) is N by N
751 *
752  ldwrku = lda
753  ldwrkr = n
754  ELSE
755 *
756 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
757 *
758  ldwrku = ( lwork-n*n-n ) / n
759  ldwrkr = n
760  END IF
761  itau = ir + ldwrkr*n
762  iwork = itau + n
763 *
764 * Compute A=Q*R
765 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
766 *
767  CALL dgeqrf( m, n, a, lda, work( itau ),
768  $ work( iwork ), lwork-iwork+1, ierr )
769 *
770 * Copy R to WORK(IR) and zero out below it
771 *
772  CALL dlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
773  CALL dlaset( 'L', n-1, n-1, zero, zero, work( ir+1 ),
774  $ ldwrkr )
775 *
776 * Generate Q in A
777 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
778 *
779  CALL dorgqr( m, n, n, a, lda, work( itau ),
780  $ work( iwork ), lwork-iwork+1, ierr )
781  ie = itau
782  itauq = ie + n
783  itaup = itauq + n
784  iwork = itaup + n
785 *
786 * Bidiagonalize R in WORK(IR)
787 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
788 *
789  CALL dgebrd( n, n, work( ir ), ldwrkr, s, work( ie ),
790  $ work( itauq ), work( itaup ),
791  $ work( iwork ), lwork-iwork+1, ierr )
792 *
793 * Generate left vectors bidiagonalizing R
794 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
795 *
796  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
797  $ work( itauq ), work( iwork ),
798  $ lwork-iwork+1, ierr )
799  iwork = ie + n
800 *
801 * Perform bidiagonal QR iteration, computing left
802 * singular vectors of R in WORK(IR)
803 * (Workspace: need N*N+BDSPAC)
804 *
805  CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum, 1,
806  $ work( ir ), ldwrkr, dum, 1,
807  $ work( iwork ), info )
808  iu = ie + n
809 *
810 * Multiply Q in A by left singular vectors of R in
811 * WORK(IR), storing result in WORK(IU) and copying to A
812 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
813 *
814  DO 10 i = 1, m, ldwrku
815  chunk = min( m-i+1, ldwrku )
816  CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
817  $ lda, work( ir ), ldwrkr, zero,
818  $ work( iu ), ldwrku )
819  CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
820  $ a( i, 1 ), lda )
821  10 continue
822 *
823  ELSE
824 *
825 * Insufficient workspace for a fast algorithm
826 *
827  ie = 1
828  itauq = ie + n
829  itaup = itauq + n
830  iwork = itaup + n
831 *
832 * Bidiagonalize A
833 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
834 *
835  CALL dgebrd( m, n, a, lda, s, work( ie ),
836  $ work( itauq ), work( itaup ),
837  $ work( iwork ), lwork-iwork+1, ierr )
838 *
839 * Generate left vectors bidiagonalizing A
840 * (Workspace: need 4*N, prefer 3*N+N*NB)
841 *
842  CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
843  $ work( iwork ), lwork-iwork+1, ierr )
844  iwork = ie + n
845 *
846 * Perform bidiagonal QR iteration, computing left
847 * singular vectors of A in A
848 * (Workspace: need BDSPAC)
849 *
850  CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum, 1,
851  $ a, lda, dum, 1, work( iwork ), info )
852 *
853  END IF
854 *
855  ELSE IF( wntuo .AND. wntvas ) THEN
856 *
857 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858 * N left singular vectors to be overwritten on A and
859 * N right singular vectors to be computed in VT
860 *
861  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
862 *
863 * Sufficient workspace for a fast algorithm
864 *
865  ir = 1
866  IF( lwork.GE.max( wrkbl, lda*n+n )+lda*n ) THEN
867 *
868 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
869 *
870  ldwrku = lda
871  ldwrkr = lda
872  ELSE IF( lwork.GE.max( wrkbl, lda*n+n )+n*n ) THEN
873 *
874 * WORK(IU) is LDA by N and WORK(IR) is N by N
875 *
876  ldwrku = lda
877  ldwrkr = n
878  ELSE
879 *
880 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881 *
882  ldwrku = ( lwork-n*n-n ) / n
883  ldwrkr = n
884  END IF
885  itau = ir + ldwrkr*n
886  iwork = itau + n
887 *
888 * Compute A=Q*R
889 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
890 *
891  CALL dgeqrf( m, n, a, lda, work( itau ),
892  $ work( iwork ), lwork-iwork+1, ierr )
893 *
894 * Copy R to VT, zeroing out below it
895 *
896  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
897  IF( n.GT.1 )
898  $ CALL dlaset( 'L', n-1, n-1, zero, zero,
899  $ vt( 2, 1 ), ldvt )
900 *
901 * Generate Q in A
902 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
903 *
904  CALL dorgqr( m, n, n, a, lda, work( itau ),
905  $ work( iwork ), lwork-iwork+1, ierr )
906  ie = itau
907  itauq = ie + n
908  itaup = itauq + n
909  iwork = itaup + n
910 *
911 * Bidiagonalize R in VT, copying result to WORK(IR)
912 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
913 *
914  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
915  $ work( itauq ), work( itaup ),
916  $ work( iwork ), lwork-iwork+1, ierr )
917  CALL dlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
918 *
919 * Generate left vectors bidiagonalizing R in WORK(IR)
920 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
921 *
922  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
923  $ work( itauq ), work( iwork ),
924  $ lwork-iwork+1, ierr )
925 *
926 * Generate right vectors bidiagonalizing R in VT
927 * (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
928 *
929  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
930  $ work( iwork ), lwork-iwork+1, ierr )
931  iwork = ie + n
932 *
933 * Perform bidiagonal QR iteration, computing left
934 * singular vectors of R in WORK(IR) and computing right
935 * singular vectors of R in VT
936 * (Workspace: need N*N+BDSPAC)
937 *
938  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt, ldvt,
939  $ work( ir ), ldwrkr, dum, 1,
940  $ work( iwork ), info )
941  iu = ie + n
942 *
943 * Multiply Q in A by left singular vectors of R in
944 * WORK(IR), storing result in WORK(IU) and copying to A
945 * (Workspace: need N*N+2*N, prefer N*N+M*N+N)
946 *
947  DO 20 i = 1, m, ldwrku
948  chunk = min( m-i+1, ldwrku )
949  CALL dgemm( 'N', 'N', chunk, n, n, one, a( i, 1 ),
950  $ lda, work( ir ), ldwrkr, zero,
951  $ work( iu ), ldwrku )
952  CALL dlacpy( 'F', chunk, n, work( iu ), ldwrku,
953  $ a( i, 1 ), lda )
954  20 continue
955 *
956  ELSE
957 *
958 * Insufficient workspace for a fast algorithm
959 *
960  itau = 1
961  iwork = itau + n
962 *
963 * Compute A=Q*R
964 * (Workspace: need 2*N, prefer N+N*NB)
965 *
966  CALL dgeqrf( m, n, a, lda, work( itau ),
967  $ work( iwork ), lwork-iwork+1, ierr )
968 *
969 * Copy R to VT, zeroing out below it
970 *
971  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
972  IF( n.GT.1 )
973  $ CALL dlaset( 'L', n-1, n-1, zero, zero,
974  $ vt( 2, 1 ), ldvt )
975 *
976 * Generate Q in A
977 * (Workspace: need 2*N, prefer N+N*NB)
978 *
979  CALL dorgqr( m, n, n, a, lda, work( itau ),
980  $ work( iwork ), lwork-iwork+1, ierr )
981  ie = itau
982  itauq = ie + n
983  itaup = itauq + n
984  iwork = itaup + n
985 *
986 * Bidiagonalize R in VT
987 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
988 *
989  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
990  $ work( itauq ), work( itaup ),
991  $ work( iwork ), lwork-iwork+1, ierr )
992 *
993 * Multiply Q in A by left vectors bidiagonalizing R
994 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
995 *
996  CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
997  $ work( itauq ), a, lda, work( iwork ),
998  $ lwork-iwork+1, ierr )
999 *
1000 * Generate right vectors bidiagonalizing R in VT
1001 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1002 *
1003  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1004  $ work( iwork ), lwork-iwork+1, ierr )
1005  iwork = ie + n
1006 *
1007 * Perform bidiagonal QR iteration, computing left
1008 * singular vectors of A in A and computing right
1009 * singular vectors of A in VT
1010 * (Workspace: need BDSPAC)
1011 *
1012  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt, ldvt,
1013  $ a, lda, dum, 1, work( iwork ), info )
1014 *
1015  END IF
1016 *
1017  ELSE IF( wntus ) THEN
1018 *
1019  IF( wntvn ) THEN
1020 *
1021 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1022 * N left singular vectors to be computed in U and
1023 * no right singular vectors to be computed
1024 *
1025  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1026 *
1027 * Sufficient workspace for a fast algorithm
1028 *
1029  ir = 1
1030  IF( lwork.GE.wrkbl+lda*n ) THEN
1031 *
1032 * WORK(IR) is LDA by N
1033 *
1034  ldwrkr = lda
1035  ELSE
1036 *
1037 * WORK(IR) is N by N
1038 *
1039  ldwrkr = n
1040  END IF
1041  itau = ir + ldwrkr*n
1042  iwork = itau + n
1043 *
1044 * Compute A=Q*R
1045 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1046 *
1047  CALL dgeqrf( m, n, a, lda, work( itau ),
1048  $ work( iwork ), lwork-iwork+1, ierr )
1049 *
1050 * Copy R to WORK(IR), zeroing out below it
1051 *
1052  CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1053  $ ldwrkr )
1054  CALL dlaset( 'L', n-1, n-1, zero, zero,
1055  $ work( ir+1 ), ldwrkr )
1056 *
1057 * Generate Q in A
1058 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1059 *
1060  CALL dorgqr( m, n, n, a, lda, work( itau ),
1061  $ work( iwork ), lwork-iwork+1, ierr )
1062  ie = itau
1063  itauq = ie + n
1064  itaup = itauq + n
1065  iwork = itaup + n
1066 *
1067 * Bidiagonalize R in WORK(IR)
1068 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1069 *
1070  CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1071  $ work( ie ), work( itauq ),
1072  $ work( itaup ), work( iwork ),
1073  $ lwork-iwork+1, ierr )
1074 *
1075 * Generate left vectors bidiagonalizing R in WORK(IR)
1076 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1077 *
1078  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1079  $ work( itauq ), work( iwork ),
1080  $ lwork-iwork+1, ierr )
1081  iwork = ie + n
1082 *
1083 * Perform bidiagonal QR iteration, computing left
1084 * singular vectors of R in WORK(IR)
1085 * (Workspace: need N*N+BDSPAC)
1086 *
1087  CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1088  $ 1, work( ir ), ldwrkr, dum, 1,
1089  $ work( iwork ), info )
1090 *
1091 * Multiply Q in A by left singular vectors of R in
1092 * WORK(IR), storing result in U
1093 * (Workspace: need N*N)
1094 *
1095  CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1096  $ work( ir ), ldwrkr, zero, u, ldu )
1097 *
1098  ELSE
1099 *
1100 * Insufficient workspace for a fast algorithm
1101 *
1102  itau = 1
1103  iwork = itau + n
1104 *
1105 * Compute A=Q*R, copying result to U
1106 * (Workspace: need 2*N, prefer N+N*NB)
1107 *
1108  CALL dgeqrf( m, n, a, lda, work( itau ),
1109  $ work( iwork ), lwork-iwork+1, ierr )
1110  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1111 *
1112 * Generate Q in U
1113 * (Workspace: need 2*N, prefer N+N*NB)
1114 *
1115  CALL dorgqr( m, n, n, u, ldu, work( itau ),
1116  $ work( iwork ), lwork-iwork+1, ierr )
1117  ie = itau
1118  itauq = ie + n
1119  itaup = itauq + n
1120  iwork = itaup + n
1121 *
1122 * Zero out below R in A
1123 *
1124  CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1125  $ lda )
1126 *
1127 * Bidiagonalize R in A
1128 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1129 *
1130  CALL dgebrd( n, n, a, lda, s, work( ie ),
1131  $ work( itauq ), work( itaup ),
1132  $ work( iwork ), lwork-iwork+1, ierr )
1133 *
1134 * Multiply Q in U by left vectors bidiagonalizing R
1135 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1136 *
1137  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1138  $ work( itauq ), u, ldu, work( iwork ),
1139  $ lwork-iwork+1, ierr )
1140  iwork = ie + n
1141 *
1142 * Perform bidiagonal QR iteration, computing left
1143 * singular vectors of A in U
1144 * (Workspace: need BDSPAC)
1145 *
1146  CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1147  $ 1, u, ldu, dum, 1, work( iwork ),
1148  $ info )
1149 *
1150  END IF
1151 *
1152  ELSE IF( wntvo ) THEN
1153 *
1154 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1155 * N left singular vectors to be computed in U and
1156 * N right singular vectors to be overwritten on A
1157 *
1158  IF( lwork.GE.2*n*n+max( 4*n, bdspac ) ) THEN
1159 *
1160 * Sufficient workspace for a fast algorithm
1161 *
1162  iu = 1
1163  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1164 *
1165 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1166 *
1167  ldwrku = lda
1168  ir = iu + ldwrku*n
1169  ldwrkr = lda
1170  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1171 *
1172 * WORK(IU) is LDA by N and WORK(IR) is N by N
1173 *
1174  ldwrku = lda
1175  ir = iu + ldwrku*n
1176  ldwrkr = n
1177  ELSE
1178 *
1179 * WORK(IU) is N by N and WORK(IR) is N by N
1180 *
1181  ldwrku = n
1182  ir = iu + ldwrku*n
1183  ldwrkr = n
1184  END IF
1185  itau = ir + ldwrkr*n
1186  iwork = itau + n
1187 *
1188 * Compute A=Q*R
1189 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1190 *
1191  CALL dgeqrf( m, n, a, lda, work( itau ),
1192  $ work( iwork ), lwork-iwork+1, ierr )
1193 *
1194 * Copy R to WORK(IU), zeroing out below it
1195 *
1196  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1197  $ ldwrku )
1198  CALL dlaset( 'L', n-1, n-1, zero, zero,
1199  $ work( iu+1 ), ldwrku )
1200 *
1201 * Generate Q in A
1202 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1203 *
1204  CALL dorgqr( m, n, n, a, lda, work( itau ),
1205  $ work( iwork ), lwork-iwork+1, ierr )
1206  ie = itau
1207  itauq = ie + n
1208  itaup = itauq + n
1209  iwork = itaup + n
1210 *
1211 * Bidiagonalize R in WORK(IU), copying result to
1212 * WORK(IR)
1213 * (Workspace: need 2*N*N+4*N,
1214 * prefer 2*N*N+3*N+2*N*NB)
1215 *
1216  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1217  $ work( ie ), work( itauq ),
1218  $ work( itaup ), work( iwork ),
1219  $ lwork-iwork+1, ierr )
1220  CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1221  $ work( ir ), ldwrkr )
1222 *
1223 * Generate left bidiagonalizing vectors in WORK(IU)
1224 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1225 *
1226  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1227  $ work( itauq ), work( iwork ),
1228  $ lwork-iwork+1, ierr )
1229 *
1230 * Generate right bidiagonalizing vectors in WORK(IR)
1231 * (Workspace: need 2*N*N+4*N-1,
1232 * prefer 2*N*N+3*N+(N-1)*NB)
1233 *
1234  CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1235  $ work( itaup ), work( iwork ),
1236  $ lwork-iwork+1, ierr )
1237  iwork = ie + n
1238 *
1239 * Perform bidiagonal QR iteration, computing left
1240 * singular vectors of R in WORK(IU) and computing
1241 * right singular vectors of R in WORK(IR)
1242 * (Workspace: need 2*N*N+BDSPAC)
1243 *
1244  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1245  $ work( ir ), ldwrkr, work( iu ),
1246  $ ldwrku, dum, 1, work( iwork ), info )
1247 *
1248 * Multiply Q in A by left singular vectors of R in
1249 * WORK(IU), storing result in U
1250 * (Workspace: need N*N)
1251 *
1252  CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1253  $ work( iu ), ldwrku, zero, u, ldu )
1254 *
1255 * Copy right singular vectors of R to A
1256 * (Workspace: need N*N)
1257 *
1258  CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1259  $ lda )
1260 *
1261  ELSE
1262 *
1263 * Insufficient workspace for a fast algorithm
1264 *
1265  itau = 1
1266  iwork = itau + n
1267 *
1268 * Compute A=Q*R, copying result to U
1269 * (Workspace: need 2*N, prefer N+N*NB)
1270 *
1271  CALL dgeqrf( m, n, a, lda, work( itau ),
1272  $ work( iwork ), lwork-iwork+1, ierr )
1273  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1274 *
1275 * Generate Q in U
1276 * (Workspace: need 2*N, prefer N+N*NB)
1277 *
1278  CALL dorgqr( m, n, n, u, ldu, work( itau ),
1279  $ work( iwork ), lwork-iwork+1, ierr )
1280  ie = itau
1281  itauq = ie + n
1282  itaup = itauq + n
1283  iwork = itaup + n
1284 *
1285 * Zero out below R in A
1286 *
1287  CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1288  $ lda )
1289 *
1290 * Bidiagonalize R in A
1291 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1292 *
1293  CALL dgebrd( n, n, a, lda, s, work( ie ),
1294  $ work( itauq ), work( itaup ),
1295  $ work( iwork ), lwork-iwork+1, ierr )
1296 *
1297 * Multiply Q in U by left vectors bidiagonalizing R
1298 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1299 *
1300  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1301  $ work( itauq ), u, ldu, work( iwork ),
1302  $ lwork-iwork+1, ierr )
1303 *
1304 * Generate right vectors bidiagonalizing R in A
1305 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1306 *
1307  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1308  $ work( iwork ), lwork-iwork+1, ierr )
1309  iwork = ie + n
1310 *
1311 * Perform bidiagonal QR iteration, computing left
1312 * singular vectors of A in U and computing right
1313 * singular vectors of A in A
1314 * (Workspace: need BDSPAC)
1315 *
1316  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1317  $ lda, u, ldu, dum, 1, work( iwork ),
1318  $ info )
1319 *
1320  END IF
1321 *
1322  ELSE IF( wntvas ) THEN
1323 *
1324 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1325 * or 'A')
1326 * N left singular vectors to be computed in U and
1327 * N right singular vectors to be computed in VT
1328 *
1329  IF( lwork.GE.n*n+max( 4*n, bdspac ) ) THEN
1330 *
1331 * Sufficient workspace for a fast algorithm
1332 *
1333  iu = 1
1334  IF( lwork.GE.wrkbl+lda*n ) THEN
1335 *
1336 * WORK(IU) is LDA by N
1337 *
1338  ldwrku = lda
1339  ELSE
1340 *
1341 * WORK(IU) is N by N
1342 *
1343  ldwrku = n
1344  END IF
1345  itau = iu + ldwrku*n
1346  iwork = itau + n
1347 *
1348 * Compute A=Q*R
1349 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1350 *
1351  CALL dgeqrf( m, n, a, lda, work( itau ),
1352  $ work( iwork ), lwork-iwork+1, ierr )
1353 *
1354 * Copy R to WORK(IU), zeroing out below it
1355 *
1356  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1357  $ ldwrku )
1358  CALL dlaset( 'L', n-1, n-1, zero, zero,
1359  $ work( iu+1 ), ldwrku )
1360 *
1361 * Generate Q in A
1362 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1363 *
1364  CALL dorgqr( m, n, n, a, lda, work( itau ),
1365  $ work( iwork ), lwork-iwork+1, ierr )
1366  ie = itau
1367  itauq = ie + n
1368  itaup = itauq + n
1369  iwork = itaup + n
1370 *
1371 * Bidiagonalize R in WORK(IU), copying result to VT
1372 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1373 *
1374  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1375  $ work( ie ), work( itauq ),
1376  $ work( itaup ), work( iwork ),
1377  $ lwork-iwork+1, ierr )
1378  CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1379  $ ldvt )
1380 *
1381 * Generate left bidiagonalizing vectors in WORK(IU)
1382 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1383 *
1384  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1385  $ work( itauq ), work( iwork ),
1386  $ lwork-iwork+1, ierr )
1387 *
1388 * Generate right bidiagonalizing vectors in VT
1389 * (Workspace: need N*N+4*N-1,
1390 * prefer N*N+3*N+(N-1)*NB)
1391 *
1392  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1393  $ work( iwork ), lwork-iwork+1, ierr )
1394  iwork = ie + n
1395 *
1396 * Perform bidiagonal QR iteration, computing left
1397 * singular vectors of R in WORK(IU) and computing
1398 * right singular vectors of R in VT
1399 * (Workspace: need N*N+BDSPAC)
1400 *
1401  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1402  $ ldvt, work( iu ), ldwrku, dum, 1,
1403  $ work( iwork ), info )
1404 *
1405 * Multiply Q in A by left singular vectors of R in
1406 * WORK(IU), storing result in U
1407 * (Workspace: need N*N)
1408 *
1409  CALL dgemm( 'N', 'N', m, n, n, one, a, lda,
1410  $ work( iu ), ldwrku, zero, u, ldu )
1411 *
1412  ELSE
1413 *
1414 * Insufficient workspace for a fast algorithm
1415 *
1416  itau = 1
1417  iwork = itau + n
1418 *
1419 * Compute A=Q*R, copying result to U
1420 * (Workspace: need 2*N, prefer N+N*NB)
1421 *
1422  CALL dgeqrf( m, n, a, lda, work( itau ),
1423  $ work( iwork ), lwork-iwork+1, ierr )
1424  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1425 *
1426 * Generate Q in U
1427 * (Workspace: need 2*N, prefer N+N*NB)
1428 *
1429  CALL dorgqr( m, n, n, u, ldu, work( itau ),
1430  $ work( iwork ), lwork-iwork+1, ierr )
1431 *
1432 * Copy R to VT, zeroing out below it
1433 *
1434  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1435  IF( n.GT.1 )
1436  $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1437  $ vt( 2, 1 ), ldvt )
1438  ie = itau
1439  itauq = ie + n
1440  itaup = itauq + n
1441  iwork = itaup + n
1442 *
1443 * Bidiagonalize R in VT
1444 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1445 *
1446  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1447  $ work( itauq ), work( itaup ),
1448  $ work( iwork ), lwork-iwork+1, ierr )
1449 *
1450 * Multiply Q in U by left bidiagonalizing vectors
1451 * in VT
1452 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1453 *
1454  CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1455  $ work( itauq ), u, ldu, work( iwork ),
1456  $ lwork-iwork+1, ierr )
1457 *
1458 * Generate right bidiagonalizing vectors in VT
1459 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1460 *
1461  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1462  $ work( iwork ), lwork-iwork+1, ierr )
1463  iwork = ie + n
1464 *
1465 * Perform bidiagonal QR iteration, computing left
1466 * singular vectors of A in U and computing right
1467 * singular vectors of A in VT
1468 * (Workspace: need BDSPAC)
1469 *
1470  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1471  $ ldvt, u, ldu, dum, 1, work( iwork ),
1472  $ info )
1473 *
1474  END IF
1475 *
1476  END IF
1477 *
1478  ELSE IF( wntua ) THEN
1479 *
1480  IF( wntvn ) THEN
1481 *
1482 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1483 * M left singular vectors to be computed in U and
1484 * no right singular vectors to be computed
1485 *
1486  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1487 *
1488 * Sufficient workspace for a fast algorithm
1489 *
1490  ir = 1
1491  IF( lwork.GE.wrkbl+lda*n ) THEN
1492 *
1493 * WORK(IR) is LDA by N
1494 *
1495  ldwrkr = lda
1496  ELSE
1497 *
1498 * WORK(IR) is N by N
1499 *
1500  ldwrkr = n
1501  END IF
1502  itau = ir + ldwrkr*n
1503  iwork = itau + n
1504 *
1505 * Compute A=Q*R, copying result to U
1506 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1507 *
1508  CALL dgeqrf( m, n, a, lda, work( itau ),
1509  $ work( iwork ), lwork-iwork+1, ierr )
1510  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1511 *
1512 * Copy R to WORK(IR), zeroing out below it
1513 *
1514  CALL dlacpy( 'U', n, n, a, lda, work( ir ),
1515  $ ldwrkr )
1516  CALL dlaset( 'L', n-1, n-1, zero, zero,
1517  $ work( ir+1 ), ldwrkr )
1518 *
1519 * Generate Q in U
1520 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1521 *
1522  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1523  $ work( iwork ), lwork-iwork+1, ierr )
1524  ie = itau
1525  itauq = ie + n
1526  itaup = itauq + n
1527  iwork = itaup + n
1528 *
1529 * Bidiagonalize R in WORK(IR)
1530 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1531 *
1532  CALL dgebrd( n, n, work( ir ), ldwrkr, s,
1533  $ work( ie ), work( itauq ),
1534  $ work( itaup ), work( iwork ),
1535  $ lwork-iwork+1, ierr )
1536 *
1537 * Generate left bidiagonalizing vectors in WORK(IR)
1538 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1539 *
1540  CALL dorgbr( 'Q', n, n, n, work( ir ), ldwrkr,
1541  $ work( itauq ), work( iwork ),
1542  $ lwork-iwork+1, ierr )
1543  iwork = ie + n
1544 *
1545 * Perform bidiagonal QR iteration, computing left
1546 * singular vectors of R in WORK(IR)
1547 * (Workspace: need N*N+BDSPAC)
1548 *
1549  CALL dbdsqr( 'U', n, 0, n, 0, s, work( ie ), dum,
1550  $ 1, work( ir ), ldwrkr, dum, 1,
1551  $ work( iwork ), info )
1552 *
1553 * Multiply Q in U by left singular vectors of R in
1554 * WORK(IR), storing result in A
1555 * (Workspace: need N*N)
1556 *
1557  CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1558  $ work( ir ), ldwrkr, zero, a, lda )
1559 *
1560 * Copy left singular vectors of A from A to U
1561 *
1562  CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1563 *
1564  ELSE
1565 *
1566 * Insufficient workspace for a fast algorithm
1567 *
1568  itau = 1
1569  iwork = itau + n
1570 *
1571 * Compute A=Q*R, copying result to U
1572 * (Workspace: need 2*N, prefer N+N*NB)
1573 *
1574  CALL dgeqrf( m, n, a, lda, work( itau ),
1575  $ work( iwork ), lwork-iwork+1, ierr )
1576  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1577 *
1578 * Generate Q in U
1579 * (Workspace: need N+M, prefer N+M*NB)
1580 *
1581  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1582  $ work( iwork ), lwork-iwork+1, ierr )
1583  ie = itau
1584  itauq = ie + n
1585  itaup = itauq + n
1586  iwork = itaup + n
1587 *
1588 * Zero out below R in A
1589 *
1590  CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1591  $ lda )
1592 *
1593 * Bidiagonalize R in A
1594 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1595 *
1596  CALL dgebrd( n, n, a, lda, s, work( ie ),
1597  $ work( itauq ), work( itaup ),
1598  $ work( iwork ), lwork-iwork+1, ierr )
1599 *
1600 * Multiply Q in U by left bidiagonalizing vectors
1601 * in A
1602 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1603 *
1604  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1605  $ work( itauq ), u, ldu, work( iwork ),
1606  $ lwork-iwork+1, ierr )
1607  iwork = ie + n
1608 *
1609 * Perform bidiagonal QR iteration, computing left
1610 * singular vectors of A in U
1611 * (Workspace: need BDSPAC)
1612 *
1613  CALL dbdsqr( 'U', n, 0, m, 0, s, work( ie ), dum,
1614  $ 1, u, ldu, dum, 1, work( iwork ),
1615  $ info )
1616 *
1617  END IF
1618 *
1619  ELSE IF( wntvo ) THEN
1620 *
1621 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1622 * M left singular vectors to be computed in U and
1623 * N right singular vectors to be overwritten on A
1624 *
1625  IF( lwork.GE.2*n*n+max( n+m, 4*n, bdspac ) ) THEN
1626 *
1627 * Sufficient workspace for a fast algorithm
1628 *
1629  iu = 1
1630  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1631 *
1632 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1633 *
1634  ldwrku = lda
1635  ir = iu + ldwrku*n
1636  ldwrkr = lda
1637  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1638 *
1639 * WORK(IU) is LDA by N and WORK(IR) is N by N
1640 *
1641  ldwrku = lda
1642  ir = iu + ldwrku*n
1643  ldwrkr = n
1644  ELSE
1645 *
1646 * WORK(IU) is N by N and WORK(IR) is N by N
1647 *
1648  ldwrku = n
1649  ir = iu + ldwrku*n
1650  ldwrkr = n
1651  END IF
1652  itau = ir + ldwrkr*n
1653  iwork = itau + n
1654 *
1655 * Compute A=Q*R, copying result to U
1656 * (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1657 *
1658  CALL dgeqrf( m, n, a, lda, work( itau ),
1659  $ work( iwork ), lwork-iwork+1, ierr )
1660  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1661 *
1662 * Generate Q in U
1663 * (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1664 *
1665  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1666  $ work( iwork ), lwork-iwork+1, ierr )
1667 *
1668 * Copy R to WORK(IU), zeroing out below it
1669 *
1670  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1671  $ ldwrku )
1672  CALL dlaset( 'L', n-1, n-1, zero, zero,
1673  $ work( iu+1 ), ldwrku )
1674  ie = itau
1675  itauq = ie + n
1676  itaup = itauq + n
1677  iwork = itaup + n
1678 *
1679 * Bidiagonalize R in WORK(IU), copying result to
1680 * WORK(IR)
1681 * (Workspace: need 2*N*N+4*N,
1682 * prefer 2*N*N+3*N+2*N*NB)
1683 *
1684  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1685  $ work( ie ), work( itauq ),
1686  $ work( itaup ), work( iwork ),
1687  $ lwork-iwork+1, ierr )
1688  CALL dlacpy( 'U', n, n, work( iu ), ldwrku,
1689  $ work( ir ), ldwrkr )
1690 *
1691 * Generate left bidiagonalizing vectors in WORK(IU)
1692 * (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1693 *
1694  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1695  $ work( itauq ), work( iwork ),
1696  $ lwork-iwork+1, ierr )
1697 *
1698 * Generate right bidiagonalizing vectors in WORK(IR)
1699 * (Workspace: need 2*N*N+4*N-1,
1700 * prefer 2*N*N+3*N+(N-1)*NB)
1701 *
1702  CALL dorgbr( 'P', n, n, n, work( ir ), ldwrkr,
1703  $ work( itaup ), work( iwork ),
1704  $ lwork-iwork+1, ierr )
1705  iwork = ie + n
1706 *
1707 * Perform bidiagonal QR iteration, computing left
1708 * singular vectors of R in WORK(IU) and computing
1709 * right singular vectors of R in WORK(IR)
1710 * (Workspace: need 2*N*N+BDSPAC)
1711 *
1712  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ),
1713  $ work( ir ), ldwrkr, work( iu ),
1714  $ ldwrku, dum, 1, work( iwork ), info )
1715 *
1716 * Multiply Q in U by left singular vectors of R in
1717 * WORK(IU), storing result in A
1718 * (Workspace: need N*N)
1719 *
1720  CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1721  $ work( iu ), ldwrku, zero, a, lda )
1722 *
1723 * Copy left singular vectors of A from A to U
1724 *
1725  CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1726 *
1727 * Copy right singular vectors of R from WORK(IR) to A
1728 *
1729  CALL dlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1730  $ lda )
1731 *
1732  ELSE
1733 *
1734 * Insufficient workspace for a fast algorithm
1735 *
1736  itau = 1
1737  iwork = itau + n
1738 *
1739 * Compute A=Q*R, copying result to U
1740 * (Workspace: need 2*N, prefer N+N*NB)
1741 *
1742  CALL dgeqrf( m, n, a, lda, work( itau ),
1743  $ work( iwork ), lwork-iwork+1, ierr )
1744  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1745 *
1746 * Generate Q in U
1747 * (Workspace: need N+M, prefer N+M*NB)
1748 *
1749  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1750  $ work( iwork ), lwork-iwork+1, ierr )
1751  ie = itau
1752  itauq = ie + n
1753  itaup = itauq + n
1754  iwork = itaup + n
1755 *
1756 * Zero out below R in A
1757 *
1758  CALL dlaset( 'L', n-1, n-1, zero, zero, a( 2, 1 ),
1759  $ lda )
1760 *
1761 * Bidiagonalize R in A
1762 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1763 *
1764  CALL dgebrd( n, n, a, lda, s, work( ie ),
1765  $ work( itauq ), work( itaup ),
1766  $ work( iwork ), lwork-iwork+1, ierr )
1767 *
1768 * Multiply Q in U by left bidiagonalizing vectors
1769 * in A
1770 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1771 *
1772  CALL dormbr( 'Q', 'R', 'N', m, n, n, a, lda,
1773  $ work( itauq ), u, ldu, work( iwork ),
1774  $ lwork-iwork+1, ierr )
1775 *
1776 * Generate right bidiagonalizing vectors in A
1777 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1778 *
1779  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
1780  $ work( iwork ), lwork-iwork+1, ierr )
1781  iwork = ie + n
1782 *
1783 * Perform bidiagonal QR iteration, computing left
1784 * singular vectors of A in U and computing right
1785 * singular vectors of A in A
1786 * (Workspace: need BDSPAC)
1787 *
1788  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), a,
1789  $ lda, u, ldu, dum, 1, work( iwork ),
1790  $ info )
1791 *
1792  END IF
1793 *
1794  ELSE IF( wntvas ) THEN
1795 *
1796 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1797 * or 'A')
1798 * M left singular vectors to be computed in U and
1799 * N right singular vectors to be computed in VT
1800 *
1801  IF( lwork.GE.n*n+max( n+m, 4*n, bdspac ) ) THEN
1802 *
1803 * Sufficient workspace for a fast algorithm
1804 *
1805  iu = 1
1806  IF( lwork.GE.wrkbl+lda*n ) THEN
1807 *
1808 * WORK(IU) is LDA by N
1809 *
1810  ldwrku = lda
1811  ELSE
1812 *
1813 * WORK(IU) is N by N
1814 *
1815  ldwrku = n
1816  END IF
1817  itau = iu + ldwrku*n
1818  iwork = itau + n
1819 *
1820 * Compute A=Q*R, copying result to U
1821 * (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1822 *
1823  CALL dgeqrf( m, n, a, lda, work( itau ),
1824  $ work( iwork ), lwork-iwork+1, ierr )
1825  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1826 *
1827 * Generate Q in U
1828 * (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1829 *
1830  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1831  $ work( iwork ), lwork-iwork+1, ierr )
1832 *
1833 * Copy R to WORK(IU), zeroing out below it
1834 *
1835  CALL dlacpy( 'U', n, n, a, lda, work( iu ),
1836  $ ldwrku )
1837  CALL dlaset( 'L', n-1, n-1, zero, zero,
1838  $ work( iu+1 ), ldwrku )
1839  ie = itau
1840  itauq = ie + n
1841  itaup = itauq + n
1842  iwork = itaup + n
1843 *
1844 * Bidiagonalize R in WORK(IU), copying result to VT
1845 * (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1846 *
1847  CALL dgebrd( n, n, work( iu ), ldwrku, s,
1848  $ work( ie ), work( itauq ),
1849  $ work( itaup ), work( iwork ),
1850  $ lwork-iwork+1, ierr )
1851  CALL dlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1852  $ ldvt )
1853 *
1854 * Generate left bidiagonalizing vectors in WORK(IU)
1855 * (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1856 *
1857  CALL dorgbr( 'Q', n, n, n, work( iu ), ldwrku,
1858  $ work( itauq ), work( iwork ),
1859  $ lwork-iwork+1, ierr )
1860 *
1861 * Generate right bidiagonalizing vectors in VT
1862 * (Workspace: need N*N+4*N-1,
1863 * prefer N*N+3*N+(N-1)*NB)
1864 *
1865  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1866  $ work( iwork ), lwork-iwork+1, ierr )
1867  iwork = ie + n
1868 *
1869 * Perform bidiagonal QR iteration, computing left
1870 * singular vectors of R in WORK(IU) and computing
1871 * right singular vectors of R in VT
1872 * (Workspace: need N*N+BDSPAC)
1873 *
1874  CALL dbdsqr( 'U', n, n, n, 0, s, work( ie ), vt,
1875  $ ldvt, work( iu ), ldwrku, dum, 1,
1876  $ work( iwork ), info )
1877 *
1878 * Multiply Q in U by left singular vectors of R in
1879 * WORK(IU), storing result in A
1880 * (Workspace: need N*N)
1881 *
1882  CALL dgemm( 'N', 'N', m, n, n, one, u, ldu,
1883  $ work( iu ), ldwrku, zero, a, lda )
1884 *
1885 * Copy left singular vectors of A from A to U
1886 *
1887  CALL dlacpy( 'F', m, n, a, lda, u, ldu )
1888 *
1889  ELSE
1890 *
1891 * Insufficient workspace for a fast algorithm
1892 *
1893  itau = 1
1894  iwork = itau + n
1895 *
1896 * Compute A=Q*R, copying result to U
1897 * (Workspace: need 2*N, prefer N+N*NB)
1898 *
1899  CALL dgeqrf( m, n, a, lda, work( itau ),
1900  $ work( iwork ), lwork-iwork+1, ierr )
1901  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1902 *
1903 * Generate Q in U
1904 * (Workspace: need N+M, prefer N+M*NB)
1905 *
1906  CALL dorgqr( m, m, n, u, ldu, work( itau ),
1907  $ work( iwork ), lwork-iwork+1, ierr )
1908 *
1909 * Copy R from A to VT, zeroing out below it
1910 *
1911  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1912  IF( n.GT.1 )
1913  $ CALL dlaset( 'L', n-1, n-1, zero, zero,
1914  $ vt( 2, 1 ), ldvt )
1915  ie = itau
1916  itauq = ie + n
1917  itaup = itauq + n
1918  iwork = itaup + n
1919 *
1920 * Bidiagonalize R in VT
1921 * (Workspace: need 4*N, prefer 3*N+2*N*NB)
1922 *
1923  CALL dgebrd( n, n, vt, ldvt, s, work( ie ),
1924  $ work( itauq ), work( itaup ),
1925  $ work( iwork ), lwork-iwork+1, ierr )
1926 *
1927 * Multiply Q in U by left bidiagonalizing vectors
1928 * in VT
1929 * (Workspace: need 3*N+M, prefer 3*N+M*NB)
1930 *
1931  CALL dormbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1932  $ work( itauq ), u, ldu, work( iwork ),
1933  $ lwork-iwork+1, ierr )
1934 *
1935 * Generate right bidiagonalizing vectors in VT
1936 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1937 *
1938  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1939  $ work( iwork ), lwork-iwork+1, ierr )
1940  iwork = ie + n
1941 *
1942 * Perform bidiagonal QR iteration, computing left
1943 * singular vectors of A in U and computing right
1944 * singular vectors of A in VT
1945 * (Workspace: need BDSPAC)
1946 *
1947  CALL dbdsqr( 'U', n, n, m, 0, s, work( ie ), vt,
1948  $ ldvt, u, ldu, dum, 1, work( iwork ),
1949  $ info )
1950 *
1951  END IF
1952 *
1953  END IF
1954 *
1955  END IF
1956 *
1957  ELSE
1958 *
1959 * M .LT. MNTHR
1960 *
1961 * Path 10 (M at least N, but not much larger)
1962 * Reduce to bidiagonal form without QR decomposition
1963 *
1964  ie = 1
1965  itauq = ie + n
1966  itaup = itauq + n
1967  iwork = itaup + n
1968 *
1969 * Bidiagonalize A
1970 * (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1971 *
1972  CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
1973  $ work( itaup ), work( iwork ), lwork-iwork+1,
1974  $ ierr )
1975  IF( wntuas ) THEN
1976 *
1977 * If left singular vectors desired in U, copy result to U
1978 * and generate left bidiagonalizing vectors in U
1979 * (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1980 *
1981  CALL dlacpy( 'L', m, n, a, lda, u, ldu )
1982  IF( wntus )
1983  $ ncu = n
1984  IF( wntua )
1985  $ ncu = m
1986  CALL dorgbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
1987  $ work( iwork ), lwork-iwork+1, ierr )
1988  END IF
1989  IF( wntvas ) THEN
1990 *
1991 * If right singular vectors desired in VT, copy result to
1992 * VT and generate right bidiagonalizing vectors in VT
1993 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1994 *
1995  CALL dlacpy( 'U', n, n, a, lda, vt, ldvt )
1996  CALL dorgbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1997  $ work( iwork ), lwork-iwork+1, ierr )
1998  END IF
1999  IF( wntuo ) THEN
2000 *
2001 * If left singular vectors desired in A, generate left
2002 * bidiagonalizing vectors in A
2003 * (Workspace: need 4*N, prefer 3*N+N*NB)
2004 *
2005  CALL dorgbr( 'Q', m, n, n, a, lda, work( itauq ),
2006  $ work( iwork ), lwork-iwork+1, ierr )
2007  END IF
2008  IF( wntvo ) THEN
2009 *
2010 * If right singular vectors desired in A, generate right
2011 * bidiagonalizing vectors in A
2012 * (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
2013 *
2014  CALL dorgbr( 'P', n, n, n, a, lda, work( itaup ),
2015  $ work( iwork ), lwork-iwork+1, ierr )
2016  END IF
2017  iwork = ie + n
2018  IF( wntuas .OR. wntuo )
2019  $ nru = m
2020  IF( wntun )
2021  $ nru = 0
2022  IF( wntvas .OR. wntvo )
2023  $ ncvt = n
2024  IF( wntvn )
2025  $ ncvt = 0
2026  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2027 *
2028 * Perform bidiagonal QR iteration, if desired, computing
2029 * left singular vectors in U and computing right singular
2030 * vectors in VT
2031 * (Workspace: need BDSPAC)
2032 *
2033  CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2034  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
2035  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2036 *
2037 * Perform bidiagonal QR iteration, if desired, computing
2038 * left singular vectors in U and computing right singular
2039 * vectors in A
2040 * (Workspace: need BDSPAC)
2041 *
2042  CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), a, lda,
2043  $ u, ldu, dum, 1, work( iwork ), info )
2044  ELSE
2045 *
2046 * Perform bidiagonal QR iteration, if desired, computing
2047 * left singular vectors in A and computing right singular
2048 * vectors in VT
2049 * (Workspace: need BDSPAC)
2050 *
2051  CALL dbdsqr( 'U', n, ncvt, nru, 0, s, work( ie ), vt,
2052  $ ldvt, a, lda, dum, 1, work( iwork ), info )
2053  END IF
2054 *
2055  END IF
2056 *
2057  ELSE
2058 *
2059 * A has more columns than rows. If A has sufficiently more
2060 * columns than rows, first reduce using the LQ decomposition (if
2061 * sufficient workspace available)
2062 *
2063  IF( n.GE.mnthr ) THEN
2064 *
2065  IF( wntvn ) THEN
2066 *
2067 * Path 1t(N much larger than M, JOBVT='N')
2068 * No right singular vectors to be computed
2069 *
2070  itau = 1
2071  iwork = itau + m
2072 *
2073 * Compute A=L*Q
2074 * (Workspace: need 2*M, prefer M+M*NB)
2075 *
2076  CALL dgelqf( m, n, a, lda, work( itau ), work( iwork ),
2077  $ lwork-iwork+1, ierr )
2078 *
2079 * Zero out above L
2080 *
2081  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ), lda )
2082  ie = 1
2083  itauq = ie + m
2084  itaup = itauq + m
2085  iwork = itaup + m
2086 *
2087 * Bidiagonalize L in A
2088 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2089 *
2090  CALL dgebrd( m, m, a, lda, s, work( ie ), work( itauq ),
2091  $ work( itaup ), work( iwork ), lwork-iwork+1,
2092  $ ierr )
2093  IF( wntuo .OR. wntuas ) THEN
2094 *
2095 * If left singular vectors desired, generate Q
2096 * (Workspace: need 4*M, prefer 3*M+M*NB)
2097 *
2098  CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2099  $ work( iwork ), lwork-iwork+1, ierr )
2100  END IF
2101  iwork = ie + m
2102  nru = 0
2103  IF( wntuo .OR. wntuas )
2104  $ nru = m
2105 *
2106 * Perform bidiagonal QR iteration, computing left singular
2107 * vectors of A in A if desired
2108 * (Workspace: need BDSPAC)
2109 *
2110  CALL dbdsqr( 'U', m, 0, nru, 0, s, work( ie ), dum, 1, a,
2111  $ lda, dum, 1, work( iwork ), info )
2112 *
2113 * If left singular vectors desired in U, copy them there
2114 *
2115  IF( wntuas )
2116  $ CALL dlacpy( 'F', m, m, a, lda, u, ldu )
2117 *
2118  ELSE IF( wntvo .AND. wntun ) THEN
2119 *
2120 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2121 * M right singular vectors to be overwritten on A and
2122 * no left singular vectors to be computed
2123 *
2124  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2125 *
2126 * Sufficient workspace for a fast algorithm
2127 *
2128  ir = 1
2129  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2130 *
2131 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2132 *
2133  ldwrku = lda
2134  chunk = n
2135  ldwrkr = lda
2136  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2137 *
2138 * WORK(IU) is LDA by N and WORK(IR) is M by M
2139 *
2140  ldwrku = lda
2141  chunk = n
2142  ldwrkr = m
2143  ELSE
2144 *
2145 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2146 *
2147  ldwrku = m
2148  chunk = ( lwork-m*m-m ) / m
2149  ldwrkr = m
2150  END IF
2151  itau = ir + ldwrkr*m
2152  iwork = itau + m
2153 *
2154 * Compute A=L*Q
2155 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2156 *
2157  CALL dgelqf( m, n, a, lda, work( itau ),
2158  $ work( iwork ), lwork-iwork+1, ierr )
2159 *
2160 * Copy L to WORK(IR) and zero out above it
2161 *
2162  CALL dlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2163  CALL dlaset( 'U', m-1, m-1, zero, zero,
2164  $ work( ir+ldwrkr ), ldwrkr )
2165 *
2166 * Generate Q in A
2167 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2168 *
2169  CALL dorglq( m, n, m, a, lda, work( itau ),
2170  $ work( iwork ), lwork-iwork+1, ierr )
2171  ie = itau
2172  itauq = ie + m
2173  itaup = itauq + m
2174  iwork = itaup + m
2175 *
2176 * Bidiagonalize L in WORK(IR)
2177 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2178 *
2179  CALL dgebrd( m, m, work( ir ), ldwrkr, s, work( ie ),
2180  $ work( itauq ), work( itaup ),
2181  $ work( iwork ), lwork-iwork+1, ierr )
2182 *
2183 * Generate right vectors bidiagonalizing L
2184 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2185 *
2186  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2187  $ work( itaup ), work( iwork ),
2188  $ lwork-iwork+1, ierr )
2189  iwork = ie + m
2190 *
2191 * Perform bidiagonal QR iteration, computing right
2192 * singular vectors of L in WORK(IR)
2193 * (Workspace: need M*M+BDSPAC)
2194 *
2195  CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2196  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2197  $ work( iwork ), info )
2198  iu = ie + m
2199 *
2200 * Multiply right singular vectors of L in WORK(IR) by Q
2201 * in A, storing result in WORK(IU) and copying to A
2202 * (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2203 *
2204  DO 30 i = 1, n, chunk
2205  blk = min( n-i+1, chunk )
2206  CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2207  $ ldwrkr, a( 1, i ), lda, zero,
2208  $ work( iu ), ldwrku )
2209  CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2210  $ a( 1, i ), lda )
2211  30 continue
2212 *
2213  ELSE
2214 *
2215 * Insufficient workspace for a fast algorithm
2216 *
2217  ie = 1
2218  itauq = ie + m
2219  itaup = itauq + m
2220  iwork = itaup + m
2221 *
2222 * Bidiagonalize A
2223 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2224 *
2225  CALL dgebrd( m, n, a, lda, s, work( ie ),
2226  $ work( itauq ), work( itaup ),
2227  $ work( iwork ), lwork-iwork+1, ierr )
2228 *
2229 * Generate right vectors bidiagonalizing A
2230 * (Workspace: need 4*M, prefer 3*M+M*NB)
2231 *
2232  CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
2233  $ work( iwork ), lwork-iwork+1, ierr )
2234  iwork = ie + m
2235 *
2236 * Perform bidiagonal QR iteration, computing right
2237 * singular vectors of A in A
2238 * (Workspace: need BDSPAC)
2239 *
2240  CALL dbdsqr( 'L', m, n, 0, 0, s, work( ie ), a, lda,
2241  $ dum, 1, dum, 1, work( iwork ), info )
2242 *
2243  END IF
2244 *
2245  ELSE IF( wntvo .AND. wntuas ) THEN
2246 *
2247 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2248 * M right singular vectors to be overwritten on A and
2249 * M left singular vectors to be computed in U
2250 *
2251  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2252 *
2253 * Sufficient workspace for a fast algorithm
2254 *
2255  ir = 1
2256  IF( lwork.GE.max( wrkbl, lda*n+m )+lda*m ) THEN
2257 *
2258 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2259 *
2260  ldwrku = lda
2261  chunk = n
2262  ldwrkr = lda
2263  ELSE IF( lwork.GE.max( wrkbl, lda*n+m )+m*m ) THEN
2264 *
2265 * WORK(IU) is LDA by N and WORK(IR) is M by M
2266 *
2267  ldwrku = lda
2268  chunk = n
2269  ldwrkr = m
2270  ELSE
2271 *
2272 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2273 *
2274  ldwrku = m
2275  chunk = ( lwork-m*m-m ) / m
2276  ldwrkr = m
2277  END IF
2278  itau = ir + ldwrkr*m
2279  iwork = itau + m
2280 *
2281 * Compute A=L*Q
2282 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2283 *
2284  CALL dgelqf( m, n, a, lda, work( itau ),
2285  $ work( iwork ), lwork-iwork+1, ierr )
2286 *
2287 * Copy L to U, zeroing about above it
2288 *
2289  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2290  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2291  $ ldu )
2292 *
2293 * Generate Q in A
2294 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2295 *
2296  CALL dorglq( m, n, m, a, lda, work( itau ),
2297  $ work( iwork ), lwork-iwork+1, ierr )
2298  ie = itau
2299  itauq = ie + m
2300  itaup = itauq + m
2301  iwork = itaup + m
2302 *
2303 * Bidiagonalize L in U, copying result to WORK(IR)
2304 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2305 *
2306  CALL dgebrd( m, m, u, ldu, s, work( ie ),
2307  $ work( itauq ), work( itaup ),
2308  $ work( iwork ), lwork-iwork+1, ierr )
2309  CALL dlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2310 *
2311 * Generate right vectors bidiagonalizing L in WORK(IR)
2312 * (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2313 *
2314  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2315  $ work( itaup ), work( iwork ),
2316  $ lwork-iwork+1, ierr )
2317 *
2318 * Generate left vectors bidiagonalizing L in U
2319 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2320 *
2321  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2322  $ work( iwork ), lwork-iwork+1, ierr )
2323  iwork = ie + m
2324 *
2325 * Perform bidiagonal QR iteration, computing left
2326 * singular vectors of L in U, and computing right
2327 * singular vectors of L in WORK(IR)
2328 * (Workspace: need M*M+BDSPAC)
2329 *
2330  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2331  $ work( ir ), ldwrkr, u, ldu, dum, 1,
2332  $ work( iwork ), info )
2333  iu = ie + m
2334 *
2335 * Multiply right singular vectors of L in WORK(IR) by Q
2336 * in A, storing result in WORK(IU) and copying to A
2337 * (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2338 *
2339  DO 40 i = 1, n, chunk
2340  blk = min( n-i+1, chunk )
2341  CALL dgemm( 'N', 'N', m, blk, m, one, work( ir ),
2342  $ ldwrkr, a( 1, i ), lda, zero,
2343  $ work( iu ), ldwrku )
2344  CALL dlacpy( 'F', m, blk, work( iu ), ldwrku,
2345  $ a( 1, i ), lda )
2346  40 continue
2347 *
2348  ELSE
2349 *
2350 * Insufficient workspace for a fast algorithm
2351 *
2352  itau = 1
2353  iwork = itau + m
2354 *
2355 * Compute A=L*Q
2356 * (Workspace: need 2*M, prefer M+M*NB)
2357 *
2358  CALL dgelqf( m, n, a, lda, work( itau ),
2359  $ work( iwork ), lwork-iwork+1, ierr )
2360 *
2361 * Copy L to U, zeroing out above it
2362 *
2363  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2364  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2365  $ ldu )
2366 *
2367 * Generate Q in A
2368 * (Workspace: need 2*M, prefer M+M*NB)
2369 *
2370  CALL dorglq( m, n, m, a, lda, work( itau ),
2371  $ work( iwork ), lwork-iwork+1, ierr )
2372  ie = itau
2373  itauq = ie + m
2374  itaup = itauq + m
2375  iwork = itaup + m
2376 *
2377 * Bidiagonalize L in U
2378 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2379 *
2380  CALL dgebrd( m, m, u, ldu, s, work( ie ),
2381  $ work( itauq ), work( itaup ),
2382  $ work( iwork ), lwork-iwork+1, ierr )
2383 *
2384 * Multiply right vectors bidiagonalizing L by Q in A
2385 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2386 *
2387  CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2388  $ work( itaup ), a, lda, work( iwork ),
2389  $ lwork-iwork+1, ierr )
2390 *
2391 * Generate left vectors bidiagonalizing L in U
2392 * (Workspace: need 4*M, prefer 3*M+M*NB)
2393 *
2394  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2395  $ work( iwork ), lwork-iwork+1, ierr )
2396  iwork = ie + m
2397 *
2398 * Perform bidiagonal QR iteration, computing left
2399 * singular vectors of A in U and computing right
2400 * singular vectors of A in A
2401 * (Workspace: need BDSPAC)
2402 *
2403  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), a, lda,
2404  $ u, ldu, dum, 1, work( iwork ), info )
2405 *
2406  END IF
2407 *
2408  ELSE IF( wntvs ) THEN
2409 *
2410  IF( wntun ) THEN
2411 *
2412 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2413 * M right singular vectors to be computed in VT and
2414 * no left singular vectors to be computed
2415 *
2416  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2417 *
2418 * Sufficient workspace for a fast algorithm
2419 *
2420  ir = 1
2421  IF( lwork.GE.wrkbl+lda*m ) THEN
2422 *
2423 * WORK(IR) is LDA by M
2424 *
2425  ldwrkr = lda
2426  ELSE
2427 *
2428 * WORK(IR) is M by M
2429 *
2430  ldwrkr = m
2431  END IF
2432  itau = ir + ldwrkr*m
2433  iwork = itau + m
2434 *
2435 * Compute A=L*Q
2436 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2437 *
2438  CALL dgelqf( m, n, a, lda, work( itau ),
2439  $ work( iwork ), lwork-iwork+1, ierr )
2440 *
2441 * Copy L to WORK(IR), zeroing out above it
2442 *
2443  CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2444  $ ldwrkr )
2445  CALL dlaset( 'U', m-1, m-1, zero, zero,
2446  $ work( ir+ldwrkr ), ldwrkr )
2447 *
2448 * Generate Q in A
2449 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2450 *
2451  CALL dorglq( m, n, m, a, lda, work( itau ),
2452  $ work( iwork ), lwork-iwork+1, ierr )
2453  ie = itau
2454  itauq = ie + m
2455  itaup = itauq + m
2456  iwork = itaup + m
2457 *
2458 * Bidiagonalize L in WORK(IR)
2459 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2460 *
2461  CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2462  $ work( ie ), work( itauq ),
2463  $ work( itaup ), work( iwork ),
2464  $ lwork-iwork+1, ierr )
2465 *
2466 * Generate right vectors bidiagonalizing L in
2467 * WORK(IR)
2468 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2469 *
2470  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2471  $ work( itaup ), work( iwork ),
2472  $ lwork-iwork+1, ierr )
2473  iwork = ie + m
2474 *
2475 * Perform bidiagonal QR iteration, computing right
2476 * singular vectors of L in WORK(IR)
2477 * (Workspace: need M*M+BDSPAC)
2478 *
2479  CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2480  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2481  $ work( iwork ), info )
2482 *
2483 * Multiply right singular vectors of L in WORK(IR) by
2484 * Q in A, storing result in VT
2485 * (Workspace: need M*M)
2486 *
2487  CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2488  $ ldwrkr, a, lda, zero, vt, ldvt )
2489 *
2490  ELSE
2491 *
2492 * Insufficient workspace for a fast algorithm
2493 *
2494  itau = 1
2495  iwork = itau + m
2496 *
2497 * Compute A=L*Q
2498 * (Workspace: need 2*M, prefer M+M*NB)
2499 *
2500  CALL dgelqf( m, n, a, lda, work( itau ),
2501  $ work( iwork ), lwork-iwork+1, ierr )
2502 *
2503 * Copy result to VT
2504 *
2505  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2506 *
2507 * Generate Q in VT
2508 * (Workspace: need 2*M, prefer M+M*NB)
2509 *
2510  CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2511  $ work( iwork ), lwork-iwork+1, ierr )
2512  ie = itau
2513  itauq = ie + m
2514  itaup = itauq + m
2515  iwork = itaup + m
2516 *
2517 * Zero out above L in A
2518 *
2519  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2520  $ lda )
2521 *
2522 * Bidiagonalize L in A
2523 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2524 *
2525  CALL dgebrd( m, m, a, lda, s, work( ie ),
2526  $ work( itauq ), work( itaup ),
2527  $ work( iwork ), lwork-iwork+1, ierr )
2528 *
2529 * Multiply right vectors bidiagonalizing L by Q in VT
2530 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2531 *
2532  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2533  $ work( itaup ), vt, ldvt,
2534  $ work( iwork ), lwork-iwork+1, ierr )
2535  iwork = ie + m
2536 *
2537 * Perform bidiagonal QR iteration, computing right
2538 * singular vectors of A in VT
2539 * (Workspace: need BDSPAC)
2540 *
2541  CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
2542  $ ldvt, dum, 1, dum, 1, work( iwork ),
2543  $ info )
2544 *
2545  END IF
2546 *
2547  ELSE IF( wntuo ) THEN
2548 *
2549 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2550 * M right singular vectors to be computed in VT and
2551 * M left singular vectors to be overwritten on A
2552 *
2553  IF( lwork.GE.2*m*m+max( 4*m, bdspac ) ) THEN
2554 *
2555 * Sufficient workspace for a fast algorithm
2556 *
2557  iu = 1
2558  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2559 *
2560 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2561 *
2562  ldwrku = lda
2563  ir = iu + ldwrku*m
2564  ldwrkr = lda
2565  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2566 *
2567 * WORK(IU) is LDA by M and WORK(IR) is M by M
2568 *
2569  ldwrku = lda
2570  ir = iu + ldwrku*m
2571  ldwrkr = m
2572  ELSE
2573 *
2574 * WORK(IU) is M by M and WORK(IR) is M by M
2575 *
2576  ldwrku = m
2577  ir = iu + ldwrku*m
2578  ldwrkr = m
2579  END IF
2580  itau = ir + ldwrkr*m
2581  iwork = itau + m
2582 *
2583 * Compute A=L*Q
2584 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2585 *
2586  CALL dgelqf( m, n, a, lda, work( itau ),
2587  $ work( iwork ), lwork-iwork+1, ierr )
2588 *
2589 * Copy L to WORK(IU), zeroing out below it
2590 *
2591  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2592  $ ldwrku )
2593  CALL dlaset( 'U', m-1, m-1, zero, zero,
2594  $ work( iu+ldwrku ), ldwrku )
2595 *
2596 * Generate Q in A
2597 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2598 *
2599  CALL dorglq( m, n, m, a, lda, work( itau ),
2600  $ work( iwork ), lwork-iwork+1, ierr )
2601  ie = itau
2602  itauq = ie + m
2603  itaup = itauq + m
2604  iwork = itaup + m
2605 *
2606 * Bidiagonalize L in WORK(IU), copying result to
2607 * WORK(IR)
2608 * (Workspace: need 2*M*M+4*M,
2609 * prefer 2*M*M+3*M+2*M*NB)
2610 *
2611  CALL dgebrd( m, m, work( iu ), ldwrku, s,
2612  $ work( ie ), work( itauq ),
2613  $ work( itaup ), work( iwork ),
2614  $ lwork-iwork+1, ierr )
2615  CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
2616  $ work( ir ), ldwrkr )
2617 *
2618 * Generate right bidiagonalizing vectors in WORK(IU)
2619 * (Workspace: need 2*M*M+4*M-1,
2620 * prefer 2*M*M+3*M+(M-1)*NB)
2621 *
2622  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2623  $ work( itaup ), work( iwork ),
2624  $ lwork-iwork+1, ierr )
2625 *
2626 * Generate left bidiagonalizing vectors in WORK(IR)
2627 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2628 *
2629  CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
2630  $ work( itauq ), work( iwork ),
2631  $ lwork-iwork+1, ierr )
2632  iwork = ie + m
2633 *
2634 * Perform bidiagonal QR iteration, computing left
2635 * singular vectors of L in WORK(IR) and computing
2636 * right singular vectors of L in WORK(IU)
2637 * (Workspace: need 2*M*M+BDSPAC)
2638 *
2639  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2640  $ work( iu ), ldwrku, work( ir ),
2641  $ ldwrkr, dum, 1, work( iwork ), info )
2642 *
2643 * Multiply right singular vectors of L in WORK(IU) by
2644 * Q in A, storing result in VT
2645 * (Workspace: need M*M)
2646 *
2647  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2648  $ ldwrku, a, lda, zero, vt, ldvt )
2649 *
2650 * Copy left singular vectors of L to A
2651 * (Workspace: need M*M)
2652 *
2653  CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2654  $ lda )
2655 *
2656  ELSE
2657 *
2658 * Insufficient workspace for a fast algorithm
2659 *
2660  itau = 1
2661  iwork = itau + m
2662 *
2663 * Compute A=L*Q, copying result to VT
2664 * (Workspace: need 2*M, prefer M+M*NB)
2665 *
2666  CALL dgelqf( m, n, a, lda, work( itau ),
2667  $ work( iwork ), lwork-iwork+1, ierr )
2668  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2669 *
2670 * Generate Q in VT
2671 * (Workspace: need 2*M, prefer M+M*NB)
2672 *
2673  CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2674  $ work( iwork ), lwork-iwork+1, ierr )
2675  ie = itau
2676  itauq = ie + m
2677  itaup = itauq + m
2678  iwork = itaup + m
2679 *
2680 * Zero out above L in A
2681 *
2682  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2683  $ lda )
2684 *
2685 * Bidiagonalize L in A
2686 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2687 *
2688  CALL dgebrd( m, m, a, lda, s, work( ie ),
2689  $ work( itauq ), work( itaup ),
2690  $ work( iwork ), lwork-iwork+1, ierr )
2691 *
2692 * Multiply right vectors bidiagonalizing L by Q in VT
2693 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2694 *
2695  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
2696  $ work( itaup ), vt, ldvt,
2697  $ work( iwork ), lwork-iwork+1, ierr )
2698 *
2699 * Generate left bidiagonalizing vectors of L in A
2700 * (Workspace: need 4*M, prefer 3*M+M*NB)
2701 *
2702  CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
2703  $ work( iwork ), lwork-iwork+1, ierr )
2704  iwork = ie + m
2705 *
2706 * Perform bidiagonal QR iteration, compute left
2707 * singular vectors of A in A and compute right
2708 * singular vectors of A in VT
2709 * (Workspace: need BDSPAC)
2710 *
2711  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2712  $ ldvt, a, lda, dum, 1, work( iwork ),
2713  $ info )
2714 *
2715  END IF
2716 *
2717  ELSE IF( wntuas ) THEN
2718 *
2719 * Path 6t(N much larger than M, JOBU='S' or 'A',
2720 * JOBVT='S')
2721 * M right singular vectors to be computed in VT and
2722 * M left singular vectors to be computed in U
2723 *
2724  IF( lwork.GE.m*m+max( 4*m, bdspac ) ) THEN
2725 *
2726 * Sufficient workspace for a fast algorithm
2727 *
2728  iu = 1
2729  IF( lwork.GE.wrkbl+lda*m ) THEN
2730 *
2731 * WORK(IU) is LDA by N
2732 *
2733  ldwrku = lda
2734  ELSE
2735 *
2736 * WORK(IU) is LDA by M
2737 *
2738  ldwrku = m
2739  END IF
2740  itau = iu + ldwrku*m
2741  iwork = itau + m
2742 *
2743 * Compute A=L*Q
2744 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2745 *
2746  CALL dgelqf( m, n, a, lda, work( itau ),
2747  $ work( iwork ), lwork-iwork+1, ierr )
2748 *
2749 * Copy L to WORK(IU), zeroing out above it
2750 *
2751  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
2752  $ ldwrku )
2753  CALL dlaset( 'U', m-1, m-1, zero, zero,
2754  $ work( iu+ldwrku ), ldwrku )
2755 *
2756 * Generate Q in A
2757 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2758 *
2759  CALL dorglq( m, n, m, a, lda, work( itau ),
2760  $ work( iwork ), lwork-iwork+1, ierr )
2761  ie = itau
2762  itauq = ie + m
2763  itaup = itauq + m
2764  iwork = itaup + m
2765 *
2766 * Bidiagonalize L in WORK(IU), copying result to U
2767 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2768 *
2769  CALL dgebrd( m, m, work( iu ), ldwrku, s,
2770  $ work( ie ), work( itauq ),
2771  $ work( itaup ), work( iwork ),
2772  $ lwork-iwork+1, ierr )
2773  CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
2774  $ ldu )
2775 *
2776 * Generate right bidiagonalizing vectors in WORK(IU)
2777 * (Workspace: need M*M+4*M-1,
2778 * prefer M*M+3*M+(M-1)*NB)
2779 *
2780  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
2781  $ work( itaup ), work( iwork ),
2782  $ lwork-iwork+1, ierr )
2783 *
2784 * Generate left bidiagonalizing vectors in U
2785 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2786 *
2787  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2788  $ work( iwork ), lwork-iwork+1, ierr )
2789  iwork = ie + m
2790 *
2791 * Perform bidiagonal QR iteration, computing left
2792 * singular vectors of L in U and computing right
2793 * singular vectors of L in WORK(IU)
2794 * (Workspace: need M*M+BDSPAC)
2795 *
2796  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
2797  $ work( iu ), ldwrku, u, ldu, dum, 1,
2798  $ work( iwork ), info )
2799 *
2800 * Multiply right singular vectors of L in WORK(IU) by
2801 * Q in A, storing result in VT
2802 * (Workspace: need M*M)
2803 *
2804  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
2805  $ ldwrku, a, lda, zero, vt, ldvt )
2806 *
2807  ELSE
2808 *
2809 * Insufficient workspace for a fast algorithm
2810 *
2811  itau = 1
2812  iwork = itau + m
2813 *
2814 * Compute A=L*Q, copying result to VT
2815 * (Workspace: need 2*M, prefer M+M*NB)
2816 *
2817  CALL dgelqf( m, n, a, lda, work( itau ),
2818  $ work( iwork ), lwork-iwork+1, ierr )
2819  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2820 *
2821 * Generate Q in VT
2822 * (Workspace: need 2*M, prefer M+M*NB)
2823 *
2824  CALL dorglq( m, n, m, vt, ldvt, work( itau ),
2825  $ work( iwork ), lwork-iwork+1, ierr )
2826 *
2827 * Copy L to U, zeroing out above it
2828 *
2829  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
2830  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
2831  $ ldu )
2832  ie = itau
2833  itauq = ie + m
2834  itaup = itauq + m
2835  iwork = itaup + m
2836 *
2837 * Bidiagonalize L in U
2838 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2839 *
2840  CALL dgebrd( m, m, u, ldu, s, work( ie ),
2841  $ work( itauq ), work( itaup ),
2842  $ work( iwork ), lwork-iwork+1, ierr )
2843 *
2844 * Multiply right bidiagonalizing vectors in U by Q
2845 * in VT
2846 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2847 *
2848  CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
2849  $ work( itaup ), vt, ldvt,
2850  $ work( iwork ), lwork-iwork+1, ierr )
2851 *
2852 * Generate left bidiagonalizing vectors in U
2853 * (Workspace: need 4*M, prefer 3*M+M*NB)
2854 *
2855  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
2856  $ work( iwork ), lwork-iwork+1, ierr )
2857  iwork = ie + m
2858 *
2859 * Perform bidiagonal QR iteration, computing left
2860 * singular vectors of A in U and computing right
2861 * singular vectors of A in VT
2862 * (Workspace: need BDSPAC)
2863 *
2864  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
2865  $ ldvt, u, ldu, dum, 1, work( iwork ),
2866  $ info )
2867 *
2868  END IF
2869 *
2870  END IF
2871 *
2872  ELSE IF( wntva ) THEN
2873 *
2874  IF( wntun ) THEN
2875 *
2876 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2877 * N right singular vectors to be computed in VT and
2878 * no left singular vectors to be computed
2879 *
2880  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
2881 *
2882 * Sufficient workspace for a fast algorithm
2883 *
2884  ir = 1
2885  IF( lwork.GE.wrkbl+lda*m ) THEN
2886 *
2887 * WORK(IR) is LDA by M
2888 *
2889  ldwrkr = lda
2890  ELSE
2891 *
2892 * WORK(IR) is M by M
2893 *
2894  ldwrkr = m
2895  END IF
2896  itau = ir + ldwrkr*m
2897  iwork = itau + m
2898 *
2899 * Compute A=L*Q, copying result to VT
2900 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2901 *
2902  CALL dgelqf( m, n, a, lda, work( itau ),
2903  $ work( iwork ), lwork-iwork+1, ierr )
2904  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2905 *
2906 * Copy L to WORK(IR), zeroing out above it
2907 *
2908  CALL dlacpy( 'L', m, m, a, lda, work( ir ),
2909  $ ldwrkr )
2910  CALL dlaset( 'U', m-1, m-1, zero, zero,
2911  $ work( ir+ldwrkr ), ldwrkr )
2912 *
2913 * Generate Q in VT
2914 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2915 *
2916  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2917  $ work( iwork ), lwork-iwork+1, ierr )
2918  ie = itau
2919  itauq = ie + m
2920  itaup = itauq + m
2921  iwork = itaup + m
2922 *
2923 * Bidiagonalize L in WORK(IR)
2924 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2925 *
2926  CALL dgebrd( m, m, work( ir ), ldwrkr, s,
2927  $ work( ie ), work( itauq ),
2928  $ work( itaup ), work( iwork ),
2929  $ lwork-iwork+1, ierr )
2930 *
2931 * Generate right bidiagonalizing vectors in WORK(IR)
2932 * (Workspace: need M*M+4*M-1,
2933 * prefer M*M+3*M+(M-1)*NB)
2934 *
2935  CALL dorgbr( 'P', m, m, m, work( ir ), ldwrkr,
2936  $ work( itaup ), work( iwork ),
2937  $ lwork-iwork+1, ierr )
2938  iwork = ie + m
2939 *
2940 * Perform bidiagonal QR iteration, computing right
2941 * singular vectors of L in WORK(IR)
2942 * (Workspace: need M*M+BDSPAC)
2943 *
2944  CALL dbdsqr( 'U', m, m, 0, 0, s, work( ie ),
2945  $ work( ir ), ldwrkr, dum, 1, dum, 1,
2946  $ work( iwork ), info )
2947 *
2948 * Multiply right singular vectors of L in WORK(IR) by
2949 * Q in VT, storing result in A
2950 * (Workspace: need M*M)
2951 *
2952  CALL dgemm( 'N', 'N', m, n, m, one, work( ir ),
2953  $ ldwrkr, vt, ldvt, zero, a, lda )
2954 *
2955 * Copy right singular vectors of A from A to VT
2956 *
2957  CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
2958 *
2959  ELSE
2960 *
2961 * Insufficient workspace for a fast algorithm
2962 *
2963  itau = 1
2964  iwork = itau + m
2965 *
2966 * Compute A=L*Q, copying result to VT
2967 * (Workspace: need 2*M, prefer M+M*NB)
2968 *
2969  CALL dgelqf( m, n, a, lda, work( itau ),
2970  $ work( iwork ), lwork-iwork+1, ierr )
2971  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
2972 *
2973 * Generate Q in VT
2974 * (Workspace: need M+N, prefer M+N*NB)
2975 *
2976  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
2977  $ work( iwork ), lwork-iwork+1, ierr )
2978  ie = itau
2979  itauq = ie + m
2980  itaup = itauq + m
2981  iwork = itaup + m
2982 *
2983 * Zero out above L in A
2984 *
2985  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
2986  $ lda )
2987 *
2988 * Bidiagonalize L in A
2989 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
2990 *
2991  CALL dgebrd( m, m, a, lda, s, work( ie ),
2992  $ work( itauq ), work( itaup ),
2993  $ work( iwork ), lwork-iwork+1, ierr )
2994 *
2995 * Multiply right bidiagonalizing vectors in A by Q
2996 * in VT
2997 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
2998 *
2999  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3000  $ work( itaup ), vt, ldvt,
3001  $ work( iwork ), lwork-iwork+1, ierr )
3002  iwork = ie + m
3003 *
3004 * Perform bidiagonal QR iteration, computing right
3005 * singular vectors of A in VT
3006 * (Workspace: need BDSPAC)
3007 *
3008  CALL dbdsqr( 'U', m, n, 0, 0, s, work( ie ), vt,
3009  $ ldvt, dum, 1, dum, 1, work( iwork ),
3010  $ info )
3011 *
3012  END IF
3013 *
3014  ELSE IF( wntuo ) THEN
3015 *
3016 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3017 * N right singular vectors to be computed in VT and
3018 * M left singular vectors to be overwritten on A
3019 *
3020  IF( lwork.GE.2*m*m+max( n+m, 4*m, bdspac ) ) THEN
3021 *
3022 * Sufficient workspace for a fast algorithm
3023 *
3024  iu = 1
3025  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3026 *
3027 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3028 *
3029  ldwrku = lda
3030  ir = iu + ldwrku*m
3031  ldwrkr = lda
3032  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3033 *
3034 * WORK(IU) is LDA by M and WORK(IR) is M by M
3035 *
3036  ldwrku = lda
3037  ir = iu + ldwrku*m
3038  ldwrkr = m
3039  ELSE
3040 *
3041 * WORK(IU) is M by M and WORK(IR) is M by M
3042 *
3043  ldwrku = m
3044  ir = iu + ldwrku*m
3045  ldwrkr = m
3046  END IF
3047  itau = ir + ldwrkr*m
3048  iwork = itau + m
3049 *
3050 * Compute A=L*Q, copying result to VT
3051 * (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3052 *
3053  CALL dgelqf( m, n, a, lda, work( itau ),
3054  $ work( iwork ), lwork-iwork+1, ierr )
3055  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3056 *
3057 * Generate Q in VT
3058 * (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3059 *
3060  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3061  $ work( iwork ), lwork-iwork+1, ierr )
3062 *
3063 * Copy L to WORK(IU), zeroing out above it
3064 *
3065  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3066  $ ldwrku )
3067  CALL dlaset( 'U', m-1, m-1, zero, zero,
3068  $ work( iu+ldwrku ), ldwrku )
3069  ie = itau
3070  itauq = ie + m
3071  itaup = itauq + m
3072  iwork = itaup + m
3073 *
3074 * Bidiagonalize L in WORK(IU), copying result to
3075 * WORK(IR)
3076 * (Workspace: need 2*M*M+4*M,
3077 * prefer 2*M*M+3*M+2*M*NB)
3078 *
3079  CALL dgebrd( m, m, work( iu ), ldwrku, s,
3080  $ work( ie ), work( itauq ),
3081  $ work( itaup ), work( iwork ),
3082  $ lwork-iwork+1, ierr )
3083  CALL dlacpy( 'L', m, m, work( iu ), ldwrku,
3084  $ work( ir ), ldwrkr )
3085 *
3086 * Generate right bidiagonalizing vectors in WORK(IU)
3087 * (Workspace: need 2*M*M+4*M-1,
3088 * prefer 2*M*M+3*M+(M-1)*NB)
3089 *
3090  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3091  $ work( itaup ), work( iwork ),
3092  $ lwork-iwork+1, ierr )
3093 *
3094 * Generate left bidiagonalizing vectors in WORK(IR)
3095 * (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3096 *
3097  CALL dorgbr( 'Q', m, m, m, work( ir ), ldwrkr,
3098  $ work( itauq ), work( iwork ),
3099  $ lwork-iwork+1, ierr )
3100  iwork = ie + m
3101 *
3102 * Perform bidiagonal QR iteration, computing left
3103 * singular vectors of L in WORK(IR) and computing
3104 * right singular vectors of L in WORK(IU)
3105 * (Workspace: need 2*M*M+BDSPAC)
3106 *
3107  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3108  $ work( iu ), ldwrku, work( ir ),
3109  $ ldwrkr, dum, 1, work( iwork ), info )
3110 *
3111 * Multiply right singular vectors of L in WORK(IU) by
3112 * Q in VT, storing result in A
3113 * (Workspace: need M*M)
3114 *
3115  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3116  $ ldwrku, vt, ldvt, zero, a, lda )
3117 *
3118 * Copy right singular vectors of A from A to VT
3119 *
3120  CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3121 *
3122 * Copy left singular vectors of A from WORK(IR) to A
3123 *
3124  CALL dlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3125  $ lda )
3126 *
3127  ELSE
3128 *
3129 * Insufficient workspace for a fast algorithm
3130 *
3131  itau = 1
3132  iwork = itau + m
3133 *
3134 * Compute A=L*Q, copying result to VT
3135 * (Workspace: need 2*M, prefer M+M*NB)
3136 *
3137  CALL dgelqf( m, n, a, lda, work( itau ),
3138  $ work( iwork ), lwork-iwork+1, ierr )
3139  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3140 *
3141 * Generate Q in VT
3142 * (Workspace: need M+N, prefer M+N*NB)
3143 *
3144  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3145  $ work( iwork ), lwork-iwork+1, ierr )
3146  ie = itau
3147  itauq = ie + m
3148  itaup = itauq + m
3149  iwork = itaup + m
3150 *
3151 * Zero out above L in A
3152 *
3153  CALL dlaset( 'U', m-1, m-1, zero, zero, a( 1, 2 ),
3154  $ lda )
3155 *
3156 * Bidiagonalize L in A
3157 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3158 *
3159  CALL dgebrd( m, m, a, lda, s, work( ie ),
3160  $ work( itauq ), work( itaup ),
3161  $ work( iwork ), lwork-iwork+1, ierr )
3162 *
3163 * Multiply right bidiagonalizing vectors in A by Q
3164 * in VT
3165 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3166 *
3167  CALL dormbr( 'P', 'L', 'T', m, n, m, a, lda,
3168  $ work( itaup ), vt, ldvt,
3169  $ work( iwork ), lwork-iwork+1, ierr )
3170 *
3171 * Generate left bidiagonalizing vectors in A
3172 * (Workspace: need 4*M, prefer 3*M+M*NB)
3173 *
3174  CALL dorgbr( 'Q', m, m, m, a, lda, work( itauq ),
3175  $ work( iwork ), lwork-iwork+1, ierr )
3176  iwork = ie + m
3177 *
3178 * Perform bidiagonal QR iteration, computing left
3179 * singular vectors of A in A and computing right
3180 * singular vectors of A in VT
3181 * (Workspace: need BDSPAC)
3182 *
3183  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3184  $ ldvt, a, lda, dum, 1, work( iwork ),
3185  $ info )
3186 *
3187  END IF
3188 *
3189  ELSE IF( wntuas ) THEN
3190 *
3191 * Path 9t(N much larger than M, JOBU='S' or 'A',
3192 * JOBVT='A')
3193 * N right singular vectors to be computed in VT and
3194 * M left singular vectors to be computed in U
3195 *
3196  IF( lwork.GE.m*m+max( n+m, 4*m, bdspac ) ) THEN
3197 *
3198 * Sufficient workspace for a fast algorithm
3199 *
3200  iu = 1
3201  IF( lwork.GE.wrkbl+lda*m ) THEN
3202 *
3203 * WORK(IU) is LDA by M
3204 *
3205  ldwrku = lda
3206  ELSE
3207 *
3208 * WORK(IU) is M by M
3209 *
3210  ldwrku = m
3211  END IF
3212  itau = iu + ldwrku*m
3213  iwork = itau + m
3214 *
3215 * Compute A=L*Q, copying result to VT
3216 * (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3217 *
3218  CALL dgelqf( m, n, a, lda, work( itau ),
3219  $ work( iwork ), lwork-iwork+1, ierr )
3220  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3221 *
3222 * Generate Q in VT
3223 * (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3224 *
3225  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3226  $ work( iwork ), lwork-iwork+1, ierr )
3227 *
3228 * Copy L to WORK(IU), zeroing out above it
3229 *
3230  CALL dlacpy( 'L', m, m, a, lda, work( iu ),
3231  $ ldwrku )
3232  CALL dlaset( 'U', m-1, m-1, zero, zero,
3233  $ work( iu+ldwrku ), ldwrku )
3234  ie = itau
3235  itauq = ie + m
3236  itaup = itauq + m
3237  iwork = itaup + m
3238 *
3239 * Bidiagonalize L in WORK(IU), copying result to U
3240 * (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3241 *
3242  CALL dgebrd( m, m, work( iu ), ldwrku, s,
3243  $ work( ie ), work( itauq ),
3244  $ work( itaup ), work( iwork ),
3245  $ lwork-iwork+1, ierr )
3246  CALL dlacpy( 'L', m, m, work( iu ), ldwrku, u,
3247  $ ldu )
3248 *
3249 * Generate right bidiagonalizing vectors in WORK(IU)
3250 * (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3251 *
3252  CALL dorgbr( 'P', m, m, m, work( iu ), ldwrku,
3253  $ work( itaup ), work( iwork ),
3254  $ lwork-iwork+1, ierr )
3255 *
3256 * Generate left bidiagonalizing vectors in U
3257 * (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3258 *
3259  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3260  $ work( iwork ), lwork-iwork+1, ierr )
3261  iwork = ie + m
3262 *
3263 * Perform bidiagonal QR iteration, computing left
3264 * singular vectors of L in U and computing right
3265 * singular vectors of L in WORK(IU)
3266 * (Workspace: need M*M+BDSPAC)
3267 *
3268  CALL dbdsqr( 'U', m, m, m, 0, s, work( ie ),
3269  $ work( iu ), ldwrku, u, ldu, dum, 1,
3270  $ work( iwork ), info )
3271 *
3272 * Multiply right singular vectors of L in WORK(IU) by
3273 * Q in VT, storing result in A
3274 * (Workspace: need M*M)
3275 *
3276  CALL dgemm( 'N', 'N', m, n, m, one, work( iu ),
3277  $ ldwrku, vt, ldvt, zero, a, lda )
3278 *
3279 * Copy right singular vectors of A from A to VT
3280 *
3281  CALL dlacpy( 'F', m, n, a, lda, vt, ldvt )
3282 *
3283  ELSE
3284 *
3285 * Insufficient workspace for a fast algorithm
3286 *
3287  itau = 1
3288  iwork = itau + m
3289 *
3290 * Compute A=L*Q, copying result to VT
3291 * (Workspace: need 2*M, prefer M+M*NB)
3292 *
3293  CALL dgelqf( m, n, a, lda, work( itau ),
3294  $ work( iwork ), lwork-iwork+1, ierr )
3295  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3296 *
3297 * Generate Q in VT
3298 * (Workspace: need M+N, prefer M+N*NB)
3299 *
3300  CALL dorglq( n, n, m, vt, ldvt, work( itau ),
3301  $ work( iwork ), lwork-iwork+1, ierr )
3302 *
3303 * Copy L to U, zeroing out above it
3304 *
3305  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3306  CALL dlaset( 'U', m-1, m-1, zero, zero, u( 1, 2 ),
3307  $ ldu )
3308  ie = itau
3309  itauq = ie + m
3310  itaup = itauq + m
3311  iwork = itaup + m
3312 *
3313 * Bidiagonalize L in U
3314 * (Workspace: need 4*M, prefer 3*M+2*M*NB)
3315 *
3316  CALL dgebrd( m, m, u, ldu, s, work( ie ),
3317  $ work( itauq ), work( itaup ),
3318  $ work( iwork ), lwork-iwork+1, ierr )
3319 *
3320 * Multiply right bidiagonalizing vectors in U by Q
3321 * in VT
3322 * (Workspace: need 3*M+N, prefer 3*M+N*NB)
3323 *
3324  CALL dormbr( 'P', 'L', 'T', m, n, m, u, ldu,
3325  $ work( itaup ), vt, ldvt,
3326  $ work( iwork ), lwork-iwork+1, ierr )
3327 *
3328 * Generate left bidiagonalizing vectors in U
3329 * (Workspace: need 4*M, prefer 3*M+M*NB)
3330 *
3331  CALL dorgbr( 'Q', m, m, m, u, ldu, work( itauq ),
3332  $ work( iwork ), lwork-iwork+1, ierr )
3333  iwork = ie + m
3334 *
3335 * Perform bidiagonal QR iteration, computing left
3336 * singular vectors of A in U and computing right
3337 * singular vectors of A in VT
3338 * (Workspace: need BDSPAC)
3339 *
3340  CALL dbdsqr( 'U', m, n, m, 0, s, work( ie ), vt,
3341  $ ldvt, u, ldu, dum, 1, work( iwork ),
3342  $ info )
3343 *
3344  END IF
3345 *
3346  END IF
3347 *
3348  END IF
3349 *
3350  ELSE
3351 *
3352 * N .LT. MNTHR
3353 *
3354 * Path 10t(N greater than M, but not much larger)
3355 * Reduce to bidiagonal form without LQ decomposition
3356 *
3357  ie = 1
3358  itauq = ie + m
3359  itaup = itauq + m
3360  iwork = itaup + m
3361 *
3362 * Bidiagonalize A
3363 * (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3364 *
3365  CALL dgebrd( m, n, a, lda, s, work( ie ), work( itauq ),
3366  $ work( itaup ), work( iwork ), lwork-iwork+1,
3367  $ ierr )
3368  IF( wntuas ) THEN
3369 *
3370 * If left singular vectors desired in U, copy result to U
3371 * and generate left bidiagonalizing vectors in U
3372 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3373 *
3374  CALL dlacpy( 'L', m, m, a, lda, u, ldu )
3375  CALL dorgbr( 'Q', m, m, n, u, ldu, work( itauq ),
3376  $ work( iwork ), lwork-iwork+1, ierr )
3377  END IF
3378  IF( wntvas ) THEN
3379 *
3380 * If right singular vectors desired in VT, copy result to
3381 * VT and generate right bidiagonalizing vectors in VT
3382 * (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3383 *
3384  CALL dlacpy( 'U', m, n, a, lda, vt, ldvt )
3385  IF( wntva )
3386  $ nrvt = n
3387  IF( wntvs )
3388  $ nrvt = m
3389  CALL dorgbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3390  $ work( iwork ), lwork-iwork+1, ierr )
3391  END IF
3392  IF( wntuo ) THEN
3393 *
3394 * If left singular vectors desired in A, generate left
3395 * bidiagonalizing vectors in A
3396 * (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3397 *
3398  CALL dorgbr( 'Q', m, m, n, a, lda, work( itauq ),
3399  $ work( iwork ), lwork-iwork+1, ierr )
3400  END IF
3401  IF( wntvo ) THEN
3402 *
3403 * If right singular vectors desired in A, generate right
3404 * bidiagonalizing vectors in A
3405 * (Workspace: need 4*M, prefer 3*M+M*NB)
3406 *
3407  CALL dorgbr( 'P', m, n, m, a, lda, work( itaup ),
3408  $ work( iwork ), lwork-iwork+1, ierr )
3409  END IF
3410  iwork = ie + m
3411  IF( wntuas .OR. wntuo )
3412  $ nru = m
3413  IF( wntun )
3414  $ nru = 0
3415  IF( wntvas .OR. wntvo )
3416  $ ncvt = n
3417  IF( wntvn )
3418  $ ncvt = 0
3419  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3420 *
3421 * Perform bidiagonal QR iteration, if desired, computing
3422 * left singular vectors in U and computing right singular
3423 * vectors in VT
3424 * (Workspace: need BDSPAC)
3425 *
3426  CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3427  $ ldvt, u, ldu, dum, 1, work( iwork ), info )
3428  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3429 *
3430 * Perform bidiagonal QR iteration, if desired, computing
3431 * left singular vectors in U and computing right singular
3432 * vectors in A
3433 * (Workspace: need BDSPAC)
3434 *
3435  CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), a, lda,
3436  $ u, ldu, dum, 1, work( iwork ), info )
3437  ELSE
3438 *
3439 * Perform bidiagonal QR iteration, if desired, computing
3440 * left singular vectors in A and computing right singular
3441 * vectors in VT
3442 * (Workspace: need BDSPAC)
3443 *
3444  CALL dbdsqr( 'L', m, ncvt, nru, 0, s, work( ie ), vt,
3445  $ ldvt, a, lda, dum, 1, work( iwork ), info )
3446  END IF
3447 *
3448  END IF
3449 *
3450  END IF
3451 *
3452 * If DBDSQR failed to converge, copy unconverged superdiagonals
3453 * to WORK( 2:MINMN )
3454 *
3455  IF( info.NE.0 ) THEN
3456  IF( ie.GT.2 ) THEN
3457  DO 50 i = 1, minmn - 1
3458  work( i+1 ) = work( i+ie-1 )
3459  50 continue
3460  END IF
3461  IF( ie.LT.2 ) THEN
3462  DO 60 i = minmn - 1, 1, -1
3463  work( i+1 ) = work( i+ie-1 )
3464  60 continue
3465  END IF
3466  END IF
3467 *
3468 * Undo scaling if necessary
3469 *
3470  IF( iscl.EQ.1 ) THEN
3471  IF( anrm.GT.bignum )
3472  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3473  $ ierr )
3474  IF( info.NE.0 .AND. anrm.GT.bignum )
3475  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1, work( 2 ),
3476  $ minmn, ierr )
3477  IF( anrm.LT.smlnum )
3478  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3479  $ ierr )
3480  IF( info.NE.0 .AND. anrm.LT.smlnum )
3481  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1, work( 2 ),
3482  $ minmn, ierr )
3483  END IF
3484 *
3485 * Return optimal workspace in WORK(1)
3486 *
3487  work( 1 ) = maxwrk
3488 *
3489  return
3490 *
3491 * End of DGESVD
3492 *
3493  END