LAPACK  3.4.2
LAPACK: Linear Algebra PACKage
 All Files Functions Groups
zgesvd.f
Go to the documentation of this file.
1 *> \brief <b> ZGESVD computes the singular value decomposition (SVD) for GE matrices</b>
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download ZGESVD + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgesvd.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgesvd.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgesvd.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
22 * WORK, LWORK, RWORK, INFO )
23 *
24 * .. Scalar Arguments ..
25 * CHARACTER JOBU, JOBVT
26 * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
27 * ..
28 * .. Array Arguments ..
29 * DOUBLE PRECISION RWORK( * ), S( * )
30 * COMPLEX*16 A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
31 * $ WORK( * )
32 * ..
33 *
34 *
35 *> \par Purpose:
36 * =============
37 *>
38 *> \verbatim
39 *>
40 *> ZGESVD computes the singular value decomposition (SVD) of a complex
41 *> M-by-N matrix A, optionally computing the left and/or right singular
42 *> vectors. The SVD is written
43 *>
44 *> A = U * SIGMA * conjugate-transpose(V)
45 *>
46 *> where SIGMA is an M-by-N matrix which is zero except for its
47 *> min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
48 *> V is an N-by-N unitary matrix. The diagonal elements of SIGMA
49 *> are the singular values of A; they are real and non-negative, and
50 *> are returned in descending order. The first min(m,n) columns of
51 *> U and V are the left and right singular vectors of A.
52 *>
53 *> Note that the routine returns V**H, not V.
54 *> \endverbatim
55 *
56 * Arguments:
57 * ==========
58 *
59 *> \param[in] JOBU
60 *> \verbatim
61 *> JOBU is CHARACTER*1
62 *> Specifies options for computing all or part of the matrix U:
63 *> = 'A': all M columns of U are returned in array U:
64 *> = 'S': the first min(m,n) columns of U (the left singular
65 *> vectors) are returned in the array U;
66 *> = 'O': the first min(m,n) columns of U (the left singular
67 *> vectors) are overwritten on the array A;
68 *> = 'N': no columns of U (no left singular vectors) are
69 *> computed.
70 *> \endverbatim
71 *>
72 *> \param[in] JOBVT
73 *> \verbatim
74 *> JOBVT is CHARACTER*1
75 *> Specifies options for computing all or part of the matrix
76 *> V**H:
77 *> = 'A': all N rows of V**H are returned in the array VT;
78 *> = 'S': the first min(m,n) rows of V**H (the right singular
79 *> vectors) are returned in the array VT;
80 *> = 'O': the first min(m,n) rows of V**H (the right singular
81 *> vectors) are overwritten on the array A;
82 *> = 'N': no rows of V**H (no right singular vectors) are
83 *> computed.
84 *>
85 *> JOBVT and JOBU cannot both be 'O'.
86 *> \endverbatim
87 *>
88 *> \param[in] M
89 *> \verbatim
90 *> M is INTEGER
91 *> The number of rows of the input matrix A. M >= 0.
92 *> \endverbatim
93 *>
94 *> \param[in] N
95 *> \verbatim
96 *> N is INTEGER
97 *> The number of columns of the input matrix A. N >= 0.
98 *> \endverbatim
99 *>
100 *> \param[in,out] A
101 *> \verbatim
102 *> A is COMPLEX*16 array, dimension (LDA,N)
103 *> On entry, the M-by-N matrix A.
104 *> On exit,
105 *> if JOBU = 'O', A is overwritten with the first min(m,n)
106 *> columns of U (the left singular vectors,
107 *> stored columnwise);
108 *> if JOBVT = 'O', A is overwritten with the first min(m,n)
109 *> rows of V**H (the right singular vectors,
110 *> stored rowwise);
111 *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
112 *> are destroyed.
113 *> \endverbatim
114 *>
115 *> \param[in] LDA
116 *> \verbatim
117 *> LDA is INTEGER
118 *> The leading dimension of the array A. LDA >= max(1,M).
119 *> \endverbatim
120 *>
121 *> \param[out] S
122 *> \verbatim
123 *> S is DOUBLE PRECISION array, dimension (min(M,N))
124 *> The singular values of A, sorted so that S(i) >= S(i+1).
125 *> \endverbatim
126 *>
127 *> \param[out] U
128 *> \verbatim
129 *> U is COMPLEX*16 array, dimension (LDU,UCOL)
130 *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
131 *> If JOBU = 'A', U contains the M-by-M unitary matrix U;
132 *> if JOBU = 'S', U contains the first min(m,n) columns of U
133 *> (the left singular vectors, stored columnwise);
134 *> if JOBU = 'N' or 'O', U is not referenced.
135 *> \endverbatim
136 *>
137 *> \param[in] LDU
138 *> \verbatim
139 *> LDU is INTEGER
140 *> The leading dimension of the array U. LDU >= 1; if
141 *> JOBU = 'S' or 'A', LDU >= M.
142 *> \endverbatim
143 *>
144 *> \param[out] VT
145 *> \verbatim
146 *> VT is COMPLEX*16 array, dimension (LDVT,N)
147 *> If JOBVT = 'A', VT contains the N-by-N unitary matrix
148 *> V**H;
149 *> if JOBVT = 'S', VT contains the first min(m,n) rows of
150 *> V**H (the right singular vectors, stored rowwise);
151 *> if JOBVT = 'N' or 'O', VT is not referenced.
152 *> \endverbatim
153 *>
154 *> \param[in] LDVT
155 *> \verbatim
156 *> LDVT is INTEGER
157 *> The leading dimension of the array VT. LDVT >= 1; if
158 *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
159 *> \endverbatim
160 *>
161 *> \param[out] WORK
162 *> \verbatim
163 *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
164 *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
165 *> \endverbatim
166 *>
167 *> \param[in] LWORK
168 *> \verbatim
169 *> LWORK is INTEGER
170 *> The dimension of the array WORK.
171 *> LWORK >= MAX(1,2*MIN(M,N)+MAX(M,N)).
172 *> For good performance, LWORK should generally be larger.
173 *>
174 *> If LWORK = -1, then a workspace query is assumed; the routine
175 *> only calculates the optimal size of the WORK array, returns
176 *> this value as the first entry of the WORK array, and no error
177 *> message related to LWORK is issued by XERBLA.
178 *> \endverbatim
179 *>
180 *> \param[out] RWORK
181 *> \verbatim
182 *> RWORK is DOUBLE PRECISION array, dimension (5*min(M,N))
183 *> On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
184 *> unconverged superdiagonal elements of an upper bidiagonal
185 *> matrix B whose diagonal is in S (not necessarily sorted).
186 *> B satisfies A = U * B * VT, so it has the same singular
187 *> values as A, and singular vectors related by U and VT.
188 *> \endverbatim
189 *>
190 *> \param[out] INFO
191 *> \verbatim
192 *> INFO is INTEGER
193 *> = 0: successful exit.
194 *> < 0: if INFO = -i, the i-th argument had an illegal value.
195 *> > 0: if ZBDSQR did not converge, INFO specifies how many
196 *> superdiagonals of an intermediate bidiagonal form B
197 *> did not converge to zero. See the description of RWORK
198 *> above for details.
199 *> \endverbatim
200 *
201 * Authors:
202 * ========
203 *
204 *> \author Univ. of Tennessee
205 *> \author Univ. of California Berkeley
206 *> \author Univ. of Colorado Denver
207 *> \author NAG Ltd.
208 *
209 *> \date April 2012
210 *
211 *> \ingroup complex16GEsing
212 *
213 * =====================================================================
214  SUBROUTINE zgesvd( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
215  $ vt, ldvt, work, lwork, rwork, info )
216 *
217 * -- LAPACK driver routine (version 3.4.1) --
218 * -- LAPACK is a software package provided by Univ. of Tennessee, --
219 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
220 * April 2012
221 *
222 * .. Scalar Arguments ..
223  CHARACTER jobu, jobvt
224  INTEGER info, lda, ldu, ldvt, lwork, m, n
225 * ..
226 * .. Array Arguments ..
227  DOUBLE PRECISION rwork( * ), s( * )
228  COMPLEX*16 a( lda, * ), u( ldu, * ), vt( ldvt, * ),
229  $ work( * )
230 * ..
231 *
232 * =====================================================================
233 *
234 * .. Parameters ..
235  COMPLEX*16 czero, cone
236  parameter( czero = ( 0.0d0, 0.0d0 ),
237  $ cone = ( 1.0d0, 0.0d0 ) )
238  DOUBLE PRECISION zero, one
239  parameter( zero = 0.0d0, one = 1.0d0 )
240 * ..
241 * .. Local Scalars ..
242  LOGICAL lquery, wntua, wntuas, wntun, wntuo, wntus,
243  $ wntva, wntvas, wntvn, wntvo, wntvs
244  INTEGER blk, chunk, i, ie, ierr, ir, irwork, iscl,
245  $ itau, itaup, itauq, iu, iwork, ldwrkr, ldwrku,
246  $ maxwrk, minmn, minwrk, mnthr, ncu, ncvt, nru,
247  $ nrvt, wrkbl
248  INTEGER lwork_zgeqrf, lwork_zungqr_n, lwork_zungqr_m,
249  $ lwork_zgebrd, lwork_zungbr_p, lwork_zungbr_q,
250  $ lwork_zgelqf, lwork_zunglq_n, lwork_zunglq_m
251  DOUBLE PRECISION anrm, bignum, eps, smlnum
252 * ..
253 * .. Local Arrays ..
254  DOUBLE PRECISION dum( 1 )
255  COMPLEX*16 cdum( 1 )
256 * ..
257 * .. External Subroutines ..
258  EXTERNAL dlascl, xerbla, zbdsqr, zgebrd, zgelqf, zgemm,
260  $ zungqr, zunmbr
261 * ..
262 * .. External Functions ..
263  LOGICAL lsame
264  INTEGER ilaenv
265  DOUBLE PRECISION dlamch, zlange
266  EXTERNAL lsame, ilaenv, dlamch, zlange
267 * ..
268 * .. Intrinsic Functions ..
269  INTRINSIC max, min, sqrt
270 * ..
271 * .. Executable Statements ..
272 *
273 * Test the input arguments
274 *
275  info = 0
276  minmn = min( m, n )
277  wntua = lsame( jobu, 'A' )
278  wntus = lsame( jobu, 'S' )
279  wntuas = wntua .OR. wntus
280  wntuo = lsame( jobu, 'O' )
281  wntun = lsame( jobu, 'N' )
282  wntva = lsame( jobvt, 'A' )
283  wntvs = lsame( jobvt, 'S' )
284  wntvas = wntva .OR. wntvs
285  wntvo = lsame( jobvt, 'O' )
286  wntvn = lsame( jobvt, 'N' )
287  lquery = ( lwork.EQ.-1 )
288 *
289  IF( .NOT.( wntua .OR. wntus .OR. wntuo .OR. wntun ) ) THEN
290  info = -1
291  ELSE IF( .NOT.( wntva .OR. wntvs .OR. wntvo .OR. wntvn ) .OR.
292  $ ( wntvo .AND. wntuo ) ) THEN
293  info = -2
294  ELSE IF( m.LT.0 ) THEN
295  info = -3
296  ELSE IF( n.LT.0 ) THEN
297  info = -4
298  ELSE IF( lda.LT.max( 1, m ) ) THEN
299  info = -6
300  ELSE IF( ldu.LT.1 .OR. ( wntuas .AND. ldu.LT.m ) ) THEN
301  info = -9
302  ELSE IF( ldvt.LT.1 .OR. ( wntva .AND. ldvt.LT.n ) .OR.
303  $ ( wntvs .AND. ldvt.LT.minmn ) ) THEN
304  info = -11
305  END IF
306 *
307 * Compute workspace
308 * (Note: Comments in the code beginning "Workspace:" describe the
309 * minimal amount of workspace needed at that point in the code,
310 * as well as the preferred amount for good performance.
311 * CWorkspace refers to complex workspace, and RWorkspace to
312 * real workspace. NB refers to the optimal block size for the
313 * immediately following subroutine, as returned by ILAENV.)
314 *
315  IF( info.EQ.0 ) THEN
316  minwrk = 1
317  maxwrk = 1
318  IF( m.GE.n .AND. minmn.GT.0 ) THEN
319 *
320 * Space needed for ZBDSQR is BDSPAC = 5*N
321 *
322  mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
323 * Compute space needed for ZGEQRF
324  CALL zgeqrf( m, n, a, lda, dum(1), dum(1), -1, ierr )
325  lwork_zgeqrf=dum(1)
326 * Compute space needed for ZUNGQR
327  CALL zungqr( m, n, n, a, lda, dum(1), dum(1), -1, ierr )
328  lwork_zungqr_n=dum(1)
329  CALL zungqr( m, m, n, a, lda, dum(1), dum(1), -1, ierr )
330  lwork_zungqr_m=dum(1)
331 * Compute space needed for ZGEBRD
332  CALL zgebrd( n, n, a, lda, s, dum(1), dum(1),
333  $ dum(1), dum(1), -1, ierr )
334  lwork_zgebrd=dum(1)
335 * Compute space needed for ZUNGBR
336  CALL zungbr( 'P', n, n, n, a, lda, dum(1),
337  $ dum(1), -1, ierr )
338  lwork_zungbr_p=dum(1)
339  CALL zungbr( 'Q', n, n, n, a, lda, dum(1),
340  $ dum(1), -1, ierr )
341  lwork_zungbr_q=dum(1)
342 *
343  IF( m.GE.mnthr ) THEN
344  IF( wntun ) THEN
345 *
346 * Path 1 (M much larger than N, JOBU='N')
347 *
348  maxwrk = n + lwork_zgeqrf
349  maxwrk = max( maxwrk, 2*n+lwork_zgebrd )
350  IF( wntvo .OR. wntvas )
351  $ maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
352  minwrk = 3*n
353  ELSE IF( wntuo .AND. wntvn ) THEN
354 *
355 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
356 *
357  wrkbl = n + lwork_zgeqrf
358  wrkbl = max( wrkbl, n+lwork_zungqr_n )
359  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
360  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
361  maxwrk = max( n*n+wrkbl, n*n+m*n )
362  minwrk = 2*n + m
363  ELSE IF( wntuo .AND. wntvas ) THEN
364 *
365 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
366 * 'A')
367 *
368  wrkbl = n + lwork_zgeqrf
369  wrkbl = max( wrkbl, n+lwork_zungqr_n )
370  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
371  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
372  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
373  maxwrk = max( n*n+wrkbl, n*n+m*n )
374  minwrk = 2*n + m
375  ELSE IF( wntus .AND. wntvn ) THEN
376 *
377 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
378 *
379  wrkbl = n + lwork_zgeqrf
380  wrkbl = max( wrkbl, n+lwork_zungqr_n )
381  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
382  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
383  maxwrk = n*n + wrkbl
384  minwrk = 2*n + m
385  ELSE IF( wntus .AND. wntvo ) THEN
386 *
387 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
388 *
389  wrkbl = n + lwork_zgeqrf
390  wrkbl = max( wrkbl, n+lwork_zungqr_n )
391  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
392  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
393  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
394  maxwrk = 2*n*n + wrkbl
395  minwrk = 2*n + m
396  ELSE IF( wntus .AND. wntvas ) THEN
397 *
398 * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
399 * 'A')
400 *
401  wrkbl = n + lwork_zgeqrf
402  wrkbl = max( wrkbl, n+lwork_zungqr_n )
403  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
404  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
405  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
406  maxwrk = n*n + wrkbl
407  minwrk = 2*n + m
408  ELSE IF( wntua .AND. wntvn ) THEN
409 *
410 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
411 *
412  wrkbl = n + lwork_zgeqrf
413  wrkbl = max( wrkbl, n+lwork_zungqr_m )
414  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
415  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
416  maxwrk = n*n + wrkbl
417  minwrk = 2*n + m
418  ELSE IF( wntua .AND. wntvo ) THEN
419 *
420 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
421 *
422  wrkbl = n + lwork_zgeqrf
423  wrkbl = max( wrkbl, n+lwork_zungqr_m )
424  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
425  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
426  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
427  maxwrk = 2*n*n + wrkbl
428  minwrk = 2*n + m
429  ELSE IF( wntua .AND. wntvas ) THEN
430 *
431 * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
432 * 'A')
433 *
434  wrkbl = n + lwork_zgeqrf
435  wrkbl = max( wrkbl, n+lwork_zungqr_m )
436  wrkbl = max( wrkbl, 2*n+lwork_zgebrd )
437  wrkbl = max( wrkbl, 2*n+lwork_zungbr_q )
438  wrkbl = max( wrkbl, 2*n+lwork_zungbr_p )
439  maxwrk = n*n + wrkbl
440  minwrk = 2*n + m
441  END IF
442  ELSE
443 *
444 * Path 10 (M at least N, but not much larger)
445 *
446  CALL zgebrd( m, n, a, lda, s, dum(1), dum(1),
447  $ dum(1), dum(1), -1, ierr )
448  lwork_zgebrd=dum(1)
449  maxwrk = 2*n + lwork_zgebrd
450  IF( wntus .OR. wntuo ) THEN
451  CALL zungbr( 'Q', m, n, n, a, lda, dum(1),
452  $ dum(1), -1, ierr )
453  lwork_zungbr_q=dum(1)
454  maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
455  END IF
456  IF( wntua ) THEN
457  CALL zungbr( 'Q', m, m, n, a, lda, dum(1),
458  $ dum(1), -1, ierr )
459  lwork_zungbr_q=dum(1)
460  maxwrk = max( maxwrk, 2*n+lwork_zungbr_q )
461  END IF
462  IF( .NOT.wntvn ) THEN
463  maxwrk = max( maxwrk, 2*n+lwork_zungbr_p )
464  minwrk = 2*n + m
465  END IF
466  END IF
467  ELSE IF( minmn.GT.0 ) THEN
468 *
469 * Space needed for ZBDSQR is BDSPAC = 5*M
470 *
471  mnthr = ilaenv( 6, 'ZGESVD', jobu // jobvt, m, n, 0, 0 )
472 * Compute space needed for ZGELQF
473  CALL zgelqf( m, n, a, lda, dum(1), dum(1), -1, ierr )
474  lwork_zgelqf=dum(1)
475 * Compute space needed for ZUNGLQ
476  CALL zunglq( n, n, m, dum(1), n, dum(1), dum(1), -1, ierr )
477  lwork_zunglq_n=dum(1)
478  CALL zunglq( m, n, m, a, lda, dum(1), dum(1), -1, ierr )
479  lwork_zunglq_m=dum(1)
480 * Compute space needed for ZGEBRD
481  CALL zgebrd( m, m, a, lda, s, dum(1), dum(1),
482  $ dum(1), dum(1), -1, ierr )
483  lwork_zgebrd=dum(1)
484 * Compute space needed for ZUNGBR P
485  CALL zungbr( 'P', m, m, m, a, n, dum(1),
486  $ dum(1), -1, ierr )
487  lwork_zungbr_p=dum(1)
488 * Compute space needed for ZUNGBR Q
489  CALL zungbr( 'Q', m, m, m, a, n, dum(1),
490  $ dum(1), -1, ierr )
491  lwork_zungbr_q=dum(1)
492  IF( n.GE.mnthr ) THEN
493  IF( wntvn ) THEN
494 *
495 * Path 1t(N much larger than M, JOBVT='N')
496 *
497  maxwrk = m + lwork_zgelqf
498  maxwrk = max( maxwrk, 2*m+lwork_zgebrd )
499  IF( wntuo .OR. wntuas )
500  $ maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
501  minwrk = 3*m
502  ELSE IF( wntvo .AND. wntun ) THEN
503 *
504 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
505 *
506  wrkbl = m + lwork_zgelqf
507  wrkbl = max( wrkbl, m+lwork_zunglq_m )
508  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
509  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
510  maxwrk = max( m*m+wrkbl, m*m+m*n )
511  minwrk = 2*m + n
512  ELSE IF( wntvo .AND. wntuas ) THEN
513 *
514 * Path 3t(N much larger than M, JOBU='S' or 'A',
515 * JOBVT='O')
516 *
517  wrkbl = m + lwork_zgelqf
518  wrkbl = max( wrkbl, m+lwork_zunglq_m )
519  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
520  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
521  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
522  maxwrk = max( m*m+wrkbl, m*m+m*n )
523  minwrk = 2*m + n
524  ELSE IF( wntvs .AND. wntun ) THEN
525 *
526 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
527 *
528  wrkbl = m + lwork_zgelqf
529  wrkbl = max( wrkbl, m+lwork_zunglq_m )
530  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
531  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
532  maxwrk = m*m + wrkbl
533  minwrk = 2*m + n
534  ELSE IF( wntvs .AND. wntuo ) THEN
535 *
536 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
537 *
538  wrkbl = m + lwork_zgelqf
539  wrkbl = max( wrkbl, m+lwork_zunglq_m )
540  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
541  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
542  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
543  maxwrk = 2*m*m + wrkbl
544  minwrk = 2*m + n
545  ELSE IF( wntvs .AND. wntuas ) THEN
546 *
547 * Path 6t(N much larger than M, JOBU='S' or 'A',
548 * JOBVT='S')
549 *
550  wrkbl = m + lwork_zgelqf
551  wrkbl = max( wrkbl, m+lwork_zunglq_m )
552  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
553  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
554  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
555  maxwrk = m*m + wrkbl
556  minwrk = 2*m + n
557  ELSE IF( wntva .AND. wntun ) THEN
558 *
559 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
560 *
561  wrkbl = m + lwork_zgelqf
562  wrkbl = max( wrkbl, m+lwork_zunglq_n )
563  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
564  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
565  maxwrk = m*m + wrkbl
566  minwrk = 2*m + n
567  ELSE IF( wntva .AND. wntuo ) THEN
568 *
569 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
570 *
571  wrkbl = m + lwork_zgelqf
572  wrkbl = max( wrkbl, m+lwork_zunglq_n )
573  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
574  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
575  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
576  maxwrk = 2*m*m + wrkbl
577  minwrk = 2*m + n
578  ELSE IF( wntva .AND. wntuas ) THEN
579 *
580 * Path 9t(N much larger than M, JOBU='S' or 'A',
581 * JOBVT='A')
582 *
583  wrkbl = m + lwork_zgelqf
584  wrkbl = max( wrkbl, m+lwork_zunglq_n )
585  wrkbl = max( wrkbl, 2*m+lwork_zgebrd )
586  wrkbl = max( wrkbl, 2*m+lwork_zungbr_p )
587  wrkbl = max( wrkbl, 2*m+lwork_zungbr_q )
588  maxwrk = m*m + wrkbl
589  minwrk = 2*m + n
590  END IF
591  ELSE
592 *
593 * Path 10t(N greater than M, but not much larger)
594 *
595  CALL zgebrd( m, n, a, lda, s, dum(1), dum(1),
596  $ dum(1), dum(1), -1, ierr )
597  lwork_zgebrd=dum(1)
598  maxwrk = 2*m + lwork_zgebrd
599  IF( wntvs .OR. wntvo ) THEN
600 * Compute space needed for ZUNGBR P
601  CALL zungbr( 'P', m, n, m, a, n, dum(1),
602  $ dum(1), -1, ierr )
603  lwork_zungbr_p=dum(1)
604  maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
605  END IF
606  IF( wntva ) THEN
607  CALL zungbr( 'P', n, n, m, a, n, dum(1),
608  $ dum(1), -1, ierr )
609  lwork_zungbr_p=dum(1)
610  maxwrk = max( maxwrk, 2*m+lwork_zungbr_p )
611  END IF
612  IF( .NOT.wntun ) THEN
613  maxwrk = max( maxwrk, 2*m+lwork_zungbr_q )
614  minwrk = 2*m + n
615  END IF
616  END IF
617  END IF
618  maxwrk = max( maxwrk, minwrk )
619  work( 1 ) = maxwrk
620 *
621  IF( lwork.LT.minwrk .AND. .NOT.lquery ) THEN
622  info = -13
623  END IF
624  END IF
625 *
626  IF( info.NE.0 ) THEN
627  CALL xerbla( 'ZGESVD', -info )
628  return
629  ELSE IF( lquery ) THEN
630  return
631  END IF
632 *
633 * Quick return if possible
634 *
635  IF( m.EQ.0 .OR. n.EQ.0 ) THEN
636  return
637  END IF
638 *
639 * Get machine constants
640 *
641  eps = dlamch( 'P' )
642  smlnum = sqrt( dlamch( 'S' ) ) / eps
643  bignum = one / smlnum
644 *
645 * Scale A if max element outside range [SMLNUM,BIGNUM]
646 *
647  anrm = zlange( 'M', m, n, a, lda, dum )
648  iscl = 0
649  IF( anrm.GT.zero .AND. anrm.LT.smlnum ) THEN
650  iscl = 1
651  CALL zlascl( 'G', 0, 0, anrm, smlnum, m, n, a, lda, ierr )
652  ELSE IF( anrm.GT.bignum ) THEN
653  iscl = 1
654  CALL zlascl( 'G', 0, 0, anrm, bignum, m, n, a, lda, ierr )
655  END IF
656 *
657  IF( m.GE.n ) THEN
658 *
659 * A has at least as many rows as columns. If A has sufficiently
660 * more rows than columns, first reduce using the QR
661 * decomposition (if sufficient workspace available)
662 *
663  IF( m.GE.mnthr ) THEN
664 *
665  IF( wntun ) THEN
666 *
667 * Path 1 (M much larger than N, JOBU='N')
668 * No left singular vectors to be computed
669 *
670  itau = 1
671  iwork = itau + n
672 *
673 * Compute A=Q*R
674 * (CWorkspace: need 2*N, prefer N+N*NB)
675 * (RWorkspace: need 0)
676 *
677  CALL zgeqrf( m, n, a, lda, work( itau ), work( iwork ),
678  $ lwork-iwork+1, ierr )
679 *
680 * Zero out below R
681 *
682  CALL zlaset( 'L', n-1, n-1, czero, czero, a( 2, 1 ),
683  $ lda )
684  ie = 1
685  itauq = 1
686  itaup = itauq + n
687  iwork = itaup + n
688 *
689 * Bidiagonalize R in A
690 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
691 * (RWorkspace: need N)
692 *
693  CALL zgebrd( n, n, a, lda, s, rwork( ie ), work( itauq ),
694  $ work( itaup ), work( iwork ), lwork-iwork+1,
695  $ ierr )
696  ncvt = 0
697  IF( wntvo .OR. wntvas ) THEN
698 *
699 * If right singular vectors desired, generate P'.
700 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
701 * (RWorkspace: 0)
702 *
703  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
704  $ work( iwork ), lwork-iwork+1, ierr )
705  ncvt = n
706  END IF
707  irwork = ie + n
708 *
709 * Perform bidiagonal QR iteration, computing right
710 * singular vectors of A in A if desired
711 * (CWorkspace: 0)
712 * (RWorkspace: need BDSPAC)
713 *
714  CALL zbdsqr( 'U', n, ncvt, 0, 0, s, rwork( ie ), a, lda,
715  $ cdum, 1, cdum, 1, rwork( irwork ), info )
716 *
717 * If right singular vectors desired in VT, copy them there
718 *
719  IF( wntvas )
720  $ CALL zlacpy( 'F', n, n, a, lda, vt, ldvt )
721 *
722  ELSE IF( wntuo .AND. wntvn ) THEN
723 *
724 * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
725 * N left singular vectors to be overwritten on A and
726 * no right singular vectors to be computed
727 *
728  IF( lwork.GE.n*n+3*n ) THEN
729 *
730 * Sufficient workspace for a fast algorithm
731 *
732  ir = 1
733  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
734 *
735 * WORK(IU) is LDA by N, WORK(IR) is LDA by N
736 *
737  ldwrku = lda
738  ldwrkr = lda
739  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
740 *
741 * WORK(IU) is LDA by N, WORK(IR) is N by N
742 *
743  ldwrku = lda
744  ldwrkr = n
745  ELSE
746 *
747 * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
748 *
749  ldwrku = ( lwork-n*n ) / n
750  ldwrkr = n
751  END IF
752  itau = ir + ldwrkr*n
753  iwork = itau + n
754 *
755 * Compute A=Q*R
756 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
757 * (RWorkspace: 0)
758 *
759  CALL zgeqrf( m, n, a, lda, work( itau ),
760  $ work( iwork ), lwork-iwork+1, ierr )
761 *
762 * Copy R to WORK(IR) and zero out below it
763 *
764  CALL zlacpy( 'U', n, n, a, lda, work( ir ), ldwrkr )
765  CALL zlaset( 'L', n-1, n-1, czero, czero,
766  $ work( ir+1 ), ldwrkr )
767 *
768 * Generate Q in A
769 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
770 * (RWorkspace: 0)
771 *
772  CALL zungqr( m, n, n, a, lda, work( itau ),
773  $ work( iwork ), lwork-iwork+1, ierr )
774  ie = 1
775  itauq = itau
776  itaup = itauq + n
777  iwork = itaup + n
778 *
779 * Bidiagonalize R in WORK(IR)
780 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
781 * (RWorkspace: need N)
782 *
783  CALL zgebrd( n, n, work( ir ), ldwrkr, s, rwork( ie ),
784  $ work( itauq ), work( itaup ),
785  $ work( iwork ), lwork-iwork+1, ierr )
786 *
787 * Generate left vectors bidiagonalizing R
788 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
789 * (RWorkspace: need 0)
790 *
791  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
792  $ work( itauq ), work( iwork ),
793  $ lwork-iwork+1, ierr )
794  irwork = ie + n
795 *
796 * Perform bidiagonal QR iteration, computing left
797 * singular vectors of R in WORK(IR)
798 * (CWorkspace: need N*N)
799 * (RWorkspace: need BDSPAC)
800 *
801  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum, 1,
802  $ work( ir ), ldwrkr, cdum, 1,
803  $ rwork( irwork ), info )
804  iu = itauq
805 *
806 * Multiply Q in A by left singular vectors of R in
807 * WORK(IR), storing result in WORK(IU) and copying to A
808 * (CWorkspace: need N*N+N, prefer N*N+M*N)
809 * (RWorkspace: 0)
810 *
811  DO 10 i = 1, m, ldwrku
812  chunk = min( m-i+1, ldwrku )
813  CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
814  $ lda, work( ir ), ldwrkr, czero,
815  $ work( iu ), ldwrku )
816  CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
817  $ a( i, 1 ), lda )
818  10 continue
819 *
820  ELSE
821 *
822 * Insufficient workspace for a fast algorithm
823 *
824  ie = 1
825  itauq = 1
826  itaup = itauq + n
827  iwork = itaup + n
828 *
829 * Bidiagonalize A
830 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
831 * (RWorkspace: N)
832 *
833  CALL zgebrd( m, n, a, lda, s, rwork( ie ),
834  $ work( itauq ), work( itaup ),
835  $ work( iwork ), lwork-iwork+1, ierr )
836 *
837 * Generate left vectors bidiagonalizing A
838 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
839 * (RWorkspace: 0)
840 *
841  CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
842  $ work( iwork ), lwork-iwork+1, ierr )
843  irwork = ie + n
844 *
845 * Perform bidiagonal QR iteration, computing left
846 * singular vectors of A in A
847 * (CWorkspace: need 0)
848 * (RWorkspace: need BDSPAC)
849 *
850  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum, 1,
851  $ a, lda, cdum, 1, rwork( irwork ), info )
852 *
853  END IF
854 *
855  ELSE IF( wntuo .AND. wntvas ) THEN
856 *
857 * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
858 * N left singular vectors to be overwritten on A and
859 * N right singular vectors to be computed in VT
860 *
861  IF( lwork.GE.n*n+3*n ) THEN
862 *
863 * Sufficient workspace for a fast algorithm
864 *
865  ir = 1
866  IF( lwork.GE.max( wrkbl, lda*n )+lda*n ) THEN
867 *
868 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
869 *
870  ldwrku = lda
871  ldwrkr = lda
872  ELSE IF( lwork.GE.max( wrkbl, lda*n )+n*n ) THEN
873 *
874 * WORK(IU) is LDA by N and WORK(IR) is N by N
875 *
876  ldwrku = lda
877  ldwrkr = n
878  ELSE
879 *
880 * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
881 *
882  ldwrku = ( lwork-n*n ) / n
883  ldwrkr = n
884  END IF
885  itau = ir + ldwrkr*n
886  iwork = itau + n
887 *
888 * Compute A=Q*R
889 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
890 * (RWorkspace: 0)
891 *
892  CALL zgeqrf( m, n, a, lda, work( itau ),
893  $ work( iwork ), lwork-iwork+1, ierr )
894 *
895 * Copy R to VT, zeroing out below it
896 *
897  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
898  IF( n.GT.1 )
899  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
900  $ vt( 2, 1 ), ldvt )
901 *
902 * Generate Q in A
903 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
904 * (RWorkspace: 0)
905 *
906  CALL zungqr( m, n, n, a, lda, work( itau ),
907  $ work( iwork ), lwork-iwork+1, ierr )
908  ie = 1
909  itauq = itau
910  itaup = itauq + n
911  iwork = itaup + n
912 *
913 * Bidiagonalize R in VT, copying result to WORK(IR)
914 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
915 * (RWorkspace: need N)
916 *
917  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
918  $ work( itauq ), work( itaup ),
919  $ work( iwork ), lwork-iwork+1, ierr )
920  CALL zlacpy( 'L', n, n, vt, ldvt, work( ir ), ldwrkr )
921 *
922 * Generate left vectors bidiagonalizing R in WORK(IR)
923 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
924 * (RWorkspace: 0)
925 *
926  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
927  $ work( itauq ), work( iwork ),
928  $ lwork-iwork+1, ierr )
929 *
930 * Generate right vectors bidiagonalizing R in VT
931 * (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
932 * (RWorkspace: 0)
933 *
934  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
935  $ work( iwork ), lwork-iwork+1, ierr )
936  irwork = ie + n
937 *
938 * Perform bidiagonal QR iteration, computing left
939 * singular vectors of R in WORK(IR) and computing right
940 * singular vectors of R in VT
941 * (CWorkspace: need N*N)
942 * (RWorkspace: need BDSPAC)
943 *
944  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
945  $ ldvt, work( ir ), ldwrkr, cdum, 1,
946  $ rwork( irwork ), info )
947  iu = itauq
948 *
949 * Multiply Q in A by left singular vectors of R in
950 * WORK(IR), storing result in WORK(IU) and copying to A
951 * (CWorkspace: need N*N+N, prefer N*N+M*N)
952 * (RWorkspace: 0)
953 *
954  DO 20 i = 1, m, ldwrku
955  chunk = min( m-i+1, ldwrku )
956  CALL zgemm( 'N', 'N', chunk, n, n, cone, a( i, 1 ),
957  $ lda, work( ir ), ldwrkr, czero,
958  $ work( iu ), ldwrku )
959  CALL zlacpy( 'F', chunk, n, work( iu ), ldwrku,
960  $ a( i, 1 ), lda )
961  20 continue
962 *
963  ELSE
964 *
965 * Insufficient workspace for a fast algorithm
966 *
967  itau = 1
968  iwork = itau + n
969 *
970 * Compute A=Q*R
971 * (CWorkspace: need 2*N, prefer N+N*NB)
972 * (RWorkspace: 0)
973 *
974  CALL zgeqrf( m, n, a, lda, work( itau ),
975  $ work( iwork ), lwork-iwork+1, ierr )
976 *
977 * Copy R to VT, zeroing out below it
978 *
979  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
980  IF( n.GT.1 )
981  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
982  $ vt( 2, 1 ), ldvt )
983 *
984 * Generate Q in A
985 * (CWorkspace: need 2*N, prefer N+N*NB)
986 * (RWorkspace: 0)
987 *
988  CALL zungqr( m, n, n, a, lda, work( itau ),
989  $ work( iwork ), lwork-iwork+1, ierr )
990  ie = 1
991  itauq = itau
992  itaup = itauq + n
993  iwork = itaup + n
994 *
995 * Bidiagonalize R in VT
996 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
997 * (RWorkspace: N)
998 *
999  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
1000  $ work( itauq ), work( itaup ),
1001  $ work( iwork ), lwork-iwork+1, ierr )
1002 *
1003 * Multiply Q in A by left vectors bidiagonalizing R
1004 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1005 * (RWorkspace: 0)
1006 *
1007  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1008  $ work( itauq ), a, lda, work( iwork ),
1009  $ lwork-iwork+1, ierr )
1010 *
1011 * Generate right vectors bidiagonalizing R in VT
1012 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1013 * (RWorkspace: 0)
1014 *
1015  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1016  $ work( iwork ), lwork-iwork+1, ierr )
1017  irwork = ie + n
1018 *
1019 * Perform bidiagonal QR iteration, computing left
1020 * singular vectors of A in A and computing right
1021 * singular vectors of A in VT
1022 * (CWorkspace: 0)
1023 * (RWorkspace: need BDSPAC)
1024 *
1025  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1026  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
1027  $ info )
1028 *
1029  END IF
1030 *
1031  ELSE IF( wntus ) THEN
1032 *
1033  IF( wntvn ) THEN
1034 *
1035 * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
1036 * N left singular vectors to be computed in U and
1037 * no right singular vectors to be computed
1038 *
1039  IF( lwork.GE.n*n+3*n ) THEN
1040 *
1041 * Sufficient workspace for a fast algorithm
1042 *
1043  ir = 1
1044  IF( lwork.GE.wrkbl+lda*n ) THEN
1045 *
1046 * WORK(IR) is LDA by N
1047 *
1048  ldwrkr = lda
1049  ELSE
1050 *
1051 * WORK(IR) is N by N
1052 *
1053  ldwrkr = n
1054  END IF
1055  itau = ir + ldwrkr*n
1056  iwork = itau + n
1057 *
1058 * Compute A=Q*R
1059 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1060 * (RWorkspace: 0)
1061 *
1062  CALL zgeqrf( m, n, a, lda, work( itau ),
1063  $ work( iwork ), lwork-iwork+1, ierr )
1064 *
1065 * Copy R to WORK(IR), zeroing out below it
1066 *
1067  CALL zlacpy( 'U', n, n, a, lda, work( ir ),
1068  $ ldwrkr )
1069  CALL zlaset( 'L', n-1, n-1, czero, czero,
1070  $ work( ir+1 ), ldwrkr )
1071 *
1072 * Generate Q in A
1073 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1074 * (RWorkspace: 0)
1075 *
1076  CALL zungqr( m, n, n, a, lda, work( itau ),
1077  $ work( iwork ), lwork-iwork+1, ierr )
1078  ie = 1
1079  itauq = itau
1080  itaup = itauq + n
1081  iwork = itaup + n
1082 *
1083 * Bidiagonalize R in WORK(IR)
1084 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1085 * (RWorkspace: need N)
1086 *
1087  CALL zgebrd( n, n, work( ir ), ldwrkr, s,
1088  $ rwork( ie ), work( itauq ),
1089  $ work( itaup ), work( iwork ),
1090  $ lwork-iwork+1, ierr )
1091 *
1092 * Generate left vectors bidiagonalizing R in WORK(IR)
1093 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1094 * (RWorkspace: 0)
1095 *
1096  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1097  $ work( itauq ), work( iwork ),
1098  $ lwork-iwork+1, ierr )
1099  irwork = ie + n
1100 *
1101 * Perform bidiagonal QR iteration, computing left
1102 * singular vectors of R in WORK(IR)
1103 * (CWorkspace: need N*N)
1104 * (RWorkspace: need BDSPAC)
1105 *
1106  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1107  $ 1, work( ir ), ldwrkr, cdum, 1,
1108  $ rwork( irwork ), info )
1109 *
1110 * Multiply Q in A by left singular vectors of R in
1111 * WORK(IR), storing result in U
1112 * (CWorkspace: need N*N)
1113 * (RWorkspace: 0)
1114 *
1115  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1116  $ work( ir ), ldwrkr, czero, u, ldu )
1117 *
1118  ELSE
1119 *
1120 * Insufficient workspace for a fast algorithm
1121 *
1122  itau = 1
1123  iwork = itau + n
1124 *
1125 * Compute A=Q*R, copying result to U
1126 * (CWorkspace: need 2*N, prefer N+N*NB)
1127 * (RWorkspace: 0)
1128 *
1129  CALL zgeqrf( m, n, a, lda, work( itau ),
1130  $ work( iwork ), lwork-iwork+1, ierr )
1131  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1132 *
1133 * Generate Q in U
1134 * (CWorkspace: need 2*N, prefer N+N*NB)
1135 * (RWorkspace: 0)
1136 *
1137  CALL zungqr( m, n, n, u, ldu, work( itau ),
1138  $ work( iwork ), lwork-iwork+1, ierr )
1139  ie = 1
1140  itauq = itau
1141  itaup = itauq + n
1142  iwork = itaup + n
1143 *
1144 * Zero out below R in A
1145 *
1146  CALL zlaset( 'L', n-1, n-1, czero, czero,
1147  $ a( 2, 1 ), lda )
1148 *
1149 * Bidiagonalize R in A
1150 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1151 * (RWorkspace: need N)
1152 *
1153  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1154  $ work( itauq ), work( itaup ),
1155  $ work( iwork ), lwork-iwork+1, ierr )
1156 *
1157 * Multiply Q in U by left vectors bidiagonalizing R
1158 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1159 * (RWorkspace: 0)
1160 *
1161  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1162  $ work( itauq ), u, ldu, work( iwork ),
1163  $ lwork-iwork+1, ierr )
1164  irwork = ie + n
1165 *
1166 * Perform bidiagonal QR iteration, computing left
1167 * singular vectors of A in U
1168 * (CWorkspace: 0)
1169 * (RWorkspace: need BDSPAC)
1170 *
1171  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1172  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1173  $ info )
1174 *
1175  END IF
1176 *
1177  ELSE IF( wntvo ) THEN
1178 *
1179 * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1180 * N left singular vectors to be computed in U and
1181 * N right singular vectors to be overwritten on A
1182 *
1183  IF( lwork.GE.2*n*n+3*n ) THEN
1184 *
1185 * Sufficient workspace for a fast algorithm
1186 *
1187  iu = 1
1188  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1189 *
1190 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1191 *
1192  ldwrku = lda
1193  ir = iu + ldwrku*n
1194  ldwrkr = lda
1195  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1196 *
1197 * WORK(IU) is LDA by N and WORK(IR) is N by N
1198 *
1199  ldwrku = lda
1200  ir = iu + ldwrku*n
1201  ldwrkr = n
1202  ELSE
1203 *
1204 * WORK(IU) is N by N and WORK(IR) is N by N
1205 *
1206  ldwrku = n
1207  ir = iu + ldwrku*n
1208  ldwrkr = n
1209  END IF
1210  itau = ir + ldwrkr*n
1211  iwork = itau + n
1212 *
1213 * Compute A=Q*R
1214 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1215 * (RWorkspace: 0)
1216 *
1217  CALL zgeqrf( m, n, a, lda, work( itau ),
1218  $ work( iwork ), lwork-iwork+1, ierr )
1219 *
1220 * Copy R to WORK(IU), zeroing out below it
1221 *
1222  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1223  $ ldwrku )
1224  CALL zlaset( 'L', n-1, n-1, czero, czero,
1225  $ work( iu+1 ), ldwrku )
1226 *
1227 * Generate Q in A
1228 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1229 * (RWorkspace: 0)
1230 *
1231  CALL zungqr( m, n, n, a, lda, work( itau ),
1232  $ work( iwork ), lwork-iwork+1, ierr )
1233  ie = 1
1234  itauq = itau
1235  itaup = itauq + n
1236  iwork = itaup + n
1237 *
1238 * Bidiagonalize R in WORK(IU), copying result to
1239 * WORK(IR)
1240 * (CWorkspace: need 2*N*N+3*N,
1241 * prefer 2*N*N+2*N+2*N*NB)
1242 * (RWorkspace: need N)
1243 *
1244  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1245  $ rwork( ie ), work( itauq ),
1246  $ work( itaup ), work( iwork ),
1247  $ lwork-iwork+1, ierr )
1248  CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
1249  $ work( ir ), ldwrkr )
1250 *
1251 * Generate left bidiagonalizing vectors in WORK(IU)
1252 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1253 * (RWorkspace: 0)
1254 *
1255  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1256  $ work( itauq ), work( iwork ),
1257  $ lwork-iwork+1, ierr )
1258 *
1259 * Generate right bidiagonalizing vectors in WORK(IR)
1260 * (CWorkspace: need 2*N*N+3*N-1,
1261 * prefer 2*N*N+2*N+(N-1)*NB)
1262 * (RWorkspace: 0)
1263 *
1264  CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
1265  $ work( itaup ), work( iwork ),
1266  $ lwork-iwork+1, ierr )
1267  irwork = ie + n
1268 *
1269 * Perform bidiagonal QR iteration, computing left
1270 * singular vectors of R in WORK(IU) and computing
1271 * right singular vectors of R in WORK(IR)
1272 * (CWorkspace: need 2*N*N)
1273 * (RWorkspace: need BDSPAC)
1274 *
1275  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1276  $ work( ir ), ldwrkr, work( iu ),
1277  $ ldwrku, cdum, 1, rwork( irwork ),
1278  $ info )
1279 *
1280 * Multiply Q in A by left singular vectors of R in
1281 * WORK(IU), storing result in U
1282 * (CWorkspace: need N*N)
1283 * (RWorkspace: 0)
1284 *
1285  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1286  $ work( iu ), ldwrku, czero, u, ldu )
1287 *
1288 * Copy right singular vectors of R to A
1289 * (CWorkspace: need N*N)
1290 * (RWorkspace: 0)
1291 *
1292  CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1293  $ lda )
1294 *
1295  ELSE
1296 *
1297 * Insufficient workspace for a fast algorithm
1298 *
1299  itau = 1
1300  iwork = itau + n
1301 *
1302 * Compute A=Q*R, copying result to U
1303 * (CWorkspace: need 2*N, prefer N+N*NB)
1304 * (RWorkspace: 0)
1305 *
1306  CALL zgeqrf( m, n, a, lda, work( itau ),
1307  $ work( iwork ), lwork-iwork+1, ierr )
1308  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1309 *
1310 * Generate Q in U
1311 * (CWorkspace: need 2*N, prefer N+N*NB)
1312 * (RWorkspace: 0)
1313 *
1314  CALL zungqr( m, n, n, u, ldu, work( itau ),
1315  $ work( iwork ), lwork-iwork+1, ierr )
1316  ie = 1
1317  itauq = itau
1318  itaup = itauq + n
1319  iwork = itaup + n
1320 *
1321 * Zero out below R in A
1322 *
1323  CALL zlaset( 'L', n-1, n-1, czero, czero,
1324  $ a( 2, 1 ), lda )
1325 *
1326 * Bidiagonalize R in A
1327 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1328 * (RWorkspace: need N)
1329 *
1330  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1331  $ work( itauq ), work( itaup ),
1332  $ work( iwork ), lwork-iwork+1, ierr )
1333 *
1334 * Multiply Q in U by left vectors bidiagonalizing R
1335 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1336 * (RWorkspace: 0)
1337 *
1338  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1339  $ work( itauq ), u, ldu, work( iwork ),
1340  $ lwork-iwork+1, ierr )
1341 *
1342 * Generate right vectors bidiagonalizing R in A
1343 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1344 * (RWorkspace: 0)
1345 *
1346  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
1347  $ work( iwork ), lwork-iwork+1, ierr )
1348  irwork = ie + n
1349 *
1350 * Perform bidiagonal QR iteration, computing left
1351 * singular vectors of A in U and computing right
1352 * singular vectors of A in A
1353 * (CWorkspace: 0)
1354 * (RWorkspace: need BDSPAC)
1355 *
1356  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1357  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1358  $ info )
1359 *
1360  END IF
1361 *
1362  ELSE IF( wntvas ) THEN
1363 *
1364 * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1365 * or 'A')
1366 * N left singular vectors to be computed in U and
1367 * N right singular vectors to be computed in VT
1368 *
1369  IF( lwork.GE.n*n+3*n ) THEN
1370 *
1371 * Sufficient workspace for a fast algorithm
1372 *
1373  iu = 1
1374  IF( lwork.GE.wrkbl+lda*n ) THEN
1375 *
1376 * WORK(IU) is LDA by N
1377 *
1378  ldwrku = lda
1379  ELSE
1380 *
1381 * WORK(IU) is N by N
1382 *
1383  ldwrku = n
1384  END IF
1385  itau = iu + ldwrku*n
1386  iwork = itau + n
1387 *
1388 * Compute A=Q*R
1389 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1390 * (RWorkspace: 0)
1391 *
1392  CALL zgeqrf( m, n, a, lda, work( itau ),
1393  $ work( iwork ), lwork-iwork+1, ierr )
1394 *
1395 * Copy R to WORK(IU), zeroing out below it
1396 *
1397  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1398  $ ldwrku )
1399  CALL zlaset( 'L', n-1, n-1, czero, czero,
1400  $ work( iu+1 ), ldwrku )
1401 *
1402 * Generate Q in A
1403 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1404 * (RWorkspace: 0)
1405 *
1406  CALL zungqr( m, n, n, a, lda, work( itau ),
1407  $ work( iwork ), lwork-iwork+1, ierr )
1408  ie = 1
1409  itauq = itau
1410  itaup = itauq + n
1411  iwork = itaup + n
1412 *
1413 * Bidiagonalize R in WORK(IU), copying result to VT
1414 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1415 * (RWorkspace: need N)
1416 *
1417  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1418  $ rwork( ie ), work( itauq ),
1419  $ work( itaup ), work( iwork ),
1420  $ lwork-iwork+1, ierr )
1421  CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1422  $ ldvt )
1423 *
1424 * Generate left bidiagonalizing vectors in WORK(IU)
1425 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1426 * (RWorkspace: 0)
1427 *
1428  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1429  $ work( itauq ), work( iwork ),
1430  $ lwork-iwork+1, ierr )
1431 *
1432 * Generate right bidiagonalizing vectors in VT
1433 * (CWorkspace: need N*N+3*N-1,
1434 * prefer N*N+2*N+(N-1)*NB)
1435 * (RWorkspace: 0)
1436 *
1437  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1438  $ work( iwork ), lwork-iwork+1, ierr )
1439  irwork = ie + n
1440 *
1441 * Perform bidiagonal QR iteration, computing left
1442 * singular vectors of R in WORK(IU) and computing
1443 * right singular vectors of R in VT
1444 * (CWorkspace: need N*N)
1445 * (RWorkspace: need BDSPAC)
1446 *
1447  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1448  $ ldvt, work( iu ), ldwrku, cdum, 1,
1449  $ rwork( irwork ), info )
1450 *
1451 * Multiply Q in A by left singular vectors of R in
1452 * WORK(IU), storing result in U
1453 * (CWorkspace: need N*N)
1454 * (RWorkspace: 0)
1455 *
1456  CALL zgemm( 'N', 'N', m, n, n, cone, a, lda,
1457  $ work( iu ), ldwrku, czero, u, ldu )
1458 *
1459  ELSE
1460 *
1461 * Insufficient workspace for a fast algorithm
1462 *
1463  itau = 1
1464  iwork = itau + n
1465 *
1466 * Compute A=Q*R, copying result to U
1467 * (CWorkspace: need 2*N, prefer N+N*NB)
1468 * (RWorkspace: 0)
1469 *
1470  CALL zgeqrf( m, n, a, lda, work( itau ),
1471  $ work( iwork ), lwork-iwork+1, ierr )
1472  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1473 *
1474 * Generate Q in U
1475 * (CWorkspace: need 2*N, prefer N+N*NB)
1476 * (RWorkspace: 0)
1477 *
1478  CALL zungqr( m, n, n, u, ldu, work( itau ),
1479  $ work( iwork ), lwork-iwork+1, ierr )
1480 *
1481 * Copy R to VT, zeroing out below it
1482 *
1483  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1484  IF( n.GT.1 )
1485  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
1486  $ vt( 2, 1 ), ldvt )
1487  ie = 1
1488  itauq = itau
1489  itaup = itauq + n
1490  iwork = itaup + n
1491 *
1492 * Bidiagonalize R in VT
1493 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1494 * (RWorkspace: need N)
1495 *
1496  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
1497  $ work( itauq ), work( itaup ),
1498  $ work( iwork ), lwork-iwork+1, ierr )
1499 *
1500 * Multiply Q in U by left bidiagonalizing vectors
1501 * in VT
1502 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1503 * (RWorkspace: 0)
1504 *
1505  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
1506  $ work( itauq ), u, ldu, work( iwork ),
1507  $ lwork-iwork+1, ierr )
1508 *
1509 * Generate right bidiagonalizing vectors in VT
1510 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1511 * (RWorkspace: 0)
1512 *
1513  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1514  $ work( iwork ), lwork-iwork+1, ierr )
1515  irwork = ie + n
1516 *
1517 * Perform bidiagonal QR iteration, computing left
1518 * singular vectors of A in U and computing right
1519 * singular vectors of A in VT
1520 * (CWorkspace: 0)
1521 * (RWorkspace: need BDSPAC)
1522 *
1523  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
1524  $ ldvt, u, ldu, cdum, 1,
1525  $ rwork( irwork ), info )
1526 *
1527  END IF
1528 *
1529  END IF
1530 *
1531  ELSE IF( wntua ) THEN
1532 *
1533  IF( wntvn ) THEN
1534 *
1535 * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1536 * M left singular vectors to be computed in U and
1537 * no right singular vectors to be computed
1538 *
1539  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1540 *
1541 * Sufficient workspace for a fast algorithm
1542 *
1543  ir = 1
1544  IF( lwork.GE.wrkbl+lda*n ) THEN
1545 *
1546 * WORK(IR) is LDA by N
1547 *
1548  ldwrkr = lda
1549  ELSE
1550 *
1551 * WORK(IR) is N by N
1552 *
1553  ldwrkr = n
1554  END IF
1555  itau = ir + ldwrkr*n
1556  iwork = itau + n
1557 *
1558 * Compute A=Q*R, copying result to U
1559 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1560 * (RWorkspace: 0)
1561 *
1562  CALL zgeqrf( m, n, a, lda, work( itau ),
1563  $ work( iwork ), lwork-iwork+1, ierr )
1564  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1565 *
1566 * Copy R to WORK(IR), zeroing out below it
1567 *
1568  CALL zlacpy( 'U', n, n, a, lda, work( ir ),
1569  $ ldwrkr )
1570  CALL zlaset( 'L', n-1, n-1, czero, czero,
1571  $ work( ir+1 ), ldwrkr )
1572 *
1573 * Generate Q in U
1574 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1575 * (RWorkspace: 0)
1576 *
1577  CALL zungqr( m, m, n, u, ldu, work( itau ),
1578  $ work( iwork ), lwork-iwork+1, ierr )
1579  ie = 1
1580  itauq = itau
1581  itaup = itauq + n
1582  iwork = itaup + n
1583 *
1584 * Bidiagonalize R in WORK(IR)
1585 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1586 * (RWorkspace: need N)
1587 *
1588  CALL zgebrd( n, n, work( ir ), ldwrkr, s,
1589  $ rwork( ie ), work( itauq ),
1590  $ work( itaup ), work( iwork ),
1591  $ lwork-iwork+1, ierr )
1592 *
1593 * Generate left bidiagonalizing vectors in WORK(IR)
1594 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1595 * (RWorkspace: 0)
1596 *
1597  CALL zungbr( 'Q', n, n, n, work( ir ), ldwrkr,
1598  $ work( itauq ), work( iwork ),
1599  $ lwork-iwork+1, ierr )
1600  irwork = ie + n
1601 *
1602 * Perform bidiagonal QR iteration, computing left
1603 * singular vectors of R in WORK(IR)
1604 * (CWorkspace: need N*N)
1605 * (RWorkspace: need BDSPAC)
1606 *
1607  CALL zbdsqr( 'U', n, 0, n, 0, s, rwork( ie ), cdum,
1608  $ 1, work( ir ), ldwrkr, cdum, 1,
1609  $ rwork( irwork ), info )
1610 *
1611 * Multiply Q in U by left singular vectors of R in
1612 * WORK(IR), storing result in A
1613 * (CWorkspace: need N*N)
1614 * (RWorkspace: 0)
1615 *
1616  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1617  $ work( ir ), ldwrkr, czero, a, lda )
1618 *
1619 * Copy left singular vectors of A from A to U
1620 *
1621  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1622 *
1623  ELSE
1624 *
1625 * Insufficient workspace for a fast algorithm
1626 *
1627  itau = 1
1628  iwork = itau + n
1629 *
1630 * Compute A=Q*R, copying result to U
1631 * (CWorkspace: need 2*N, prefer N+N*NB)
1632 * (RWorkspace: 0)
1633 *
1634  CALL zgeqrf( m, n, a, lda, work( itau ),
1635  $ work( iwork ), lwork-iwork+1, ierr )
1636  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1637 *
1638 * Generate Q in U
1639 * (CWorkspace: need N+M, prefer N+M*NB)
1640 * (RWorkspace: 0)
1641 *
1642  CALL zungqr( m, m, n, u, ldu, work( itau ),
1643  $ work( iwork ), lwork-iwork+1, ierr )
1644  ie = 1
1645  itauq = itau
1646  itaup = itauq + n
1647  iwork = itaup + n
1648 *
1649 * Zero out below R in A
1650 *
1651  CALL zlaset( 'L', n-1, n-1, czero, czero,
1652  $ a( 2, 1 ), lda )
1653 *
1654 * Bidiagonalize R in A
1655 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1656 * (RWorkspace: need N)
1657 *
1658  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1659  $ work( itauq ), work( itaup ),
1660  $ work( iwork ), lwork-iwork+1, ierr )
1661 *
1662 * Multiply Q in U by left bidiagonalizing vectors
1663 * in A
1664 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1665 * (RWorkspace: 0)
1666 *
1667  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1668  $ work( itauq ), u, ldu, work( iwork ),
1669  $ lwork-iwork+1, ierr )
1670  irwork = ie + n
1671 *
1672 * Perform bidiagonal QR iteration, computing left
1673 * singular vectors of A in U
1674 * (CWorkspace: 0)
1675 * (RWorkspace: need BDSPAC)
1676 *
1677  CALL zbdsqr( 'U', n, 0, m, 0, s, rwork( ie ), cdum,
1678  $ 1, u, ldu, cdum, 1, rwork( irwork ),
1679  $ info )
1680 *
1681  END IF
1682 *
1683  ELSE IF( wntvo ) THEN
1684 *
1685 * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1686 * M left singular vectors to be computed in U and
1687 * N right singular vectors to be overwritten on A
1688 *
1689  IF( lwork.GE.2*n*n+max( n+m, 3*n ) ) THEN
1690 *
1691 * Sufficient workspace for a fast algorithm
1692 *
1693  iu = 1
1694  IF( lwork.GE.wrkbl+2*lda*n ) THEN
1695 *
1696 * WORK(IU) is LDA by N and WORK(IR) is LDA by N
1697 *
1698  ldwrku = lda
1699  ir = iu + ldwrku*n
1700  ldwrkr = lda
1701  ELSE IF( lwork.GE.wrkbl+( lda+n )*n ) THEN
1702 *
1703 * WORK(IU) is LDA by N and WORK(IR) is N by N
1704 *
1705  ldwrku = lda
1706  ir = iu + ldwrku*n
1707  ldwrkr = n
1708  ELSE
1709 *
1710 * WORK(IU) is N by N and WORK(IR) is N by N
1711 *
1712  ldwrku = n
1713  ir = iu + ldwrku*n
1714  ldwrkr = n
1715  END IF
1716  itau = ir + ldwrkr*n
1717  iwork = itau + n
1718 *
1719 * Compute A=Q*R, copying result to U
1720 * (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1721 * (RWorkspace: 0)
1722 *
1723  CALL zgeqrf( m, n, a, lda, work( itau ),
1724  $ work( iwork ), lwork-iwork+1, ierr )
1725  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1726 *
1727 * Generate Q in U
1728 * (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1729 * (RWorkspace: 0)
1730 *
1731  CALL zungqr( m, m, n, u, ldu, work( itau ),
1732  $ work( iwork ), lwork-iwork+1, ierr )
1733 *
1734 * Copy R to WORK(IU), zeroing out below it
1735 *
1736  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1737  $ ldwrku )
1738  CALL zlaset( 'L', n-1, n-1, czero, czero,
1739  $ work( iu+1 ), ldwrku )
1740  ie = 1
1741  itauq = itau
1742  itaup = itauq + n
1743  iwork = itaup + n
1744 *
1745 * Bidiagonalize R in WORK(IU), copying result to
1746 * WORK(IR)
1747 * (CWorkspace: need 2*N*N+3*N,
1748 * prefer 2*N*N+2*N+2*N*NB)
1749 * (RWorkspace: need N)
1750 *
1751  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1752  $ rwork( ie ), work( itauq ),
1753  $ work( itaup ), work( iwork ),
1754  $ lwork-iwork+1, ierr )
1755  CALL zlacpy( 'U', n, n, work( iu ), ldwrku,
1756  $ work( ir ), ldwrkr )
1757 *
1758 * Generate left bidiagonalizing vectors in WORK(IU)
1759 * (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1760 * (RWorkspace: 0)
1761 *
1762  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1763  $ work( itauq ), work( iwork ),
1764  $ lwork-iwork+1, ierr )
1765 *
1766 * Generate right bidiagonalizing vectors in WORK(IR)
1767 * (CWorkspace: need 2*N*N+3*N-1,
1768 * prefer 2*N*N+2*N+(N-1)*NB)
1769 * (RWorkspace: 0)
1770 *
1771  CALL zungbr( 'P', n, n, n, work( ir ), ldwrkr,
1772  $ work( itaup ), work( iwork ),
1773  $ lwork-iwork+1, ierr )
1774  irwork = ie + n
1775 *
1776 * Perform bidiagonal QR iteration, computing left
1777 * singular vectors of R in WORK(IU) and computing
1778 * right singular vectors of R in WORK(IR)
1779 * (CWorkspace: need 2*N*N)
1780 * (RWorkspace: need BDSPAC)
1781 *
1782  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ),
1783  $ work( ir ), ldwrkr, work( iu ),
1784  $ ldwrku, cdum, 1, rwork( irwork ),
1785  $ info )
1786 *
1787 * Multiply Q in U by left singular vectors of R in
1788 * WORK(IU), storing result in A
1789 * (CWorkspace: need N*N)
1790 * (RWorkspace: 0)
1791 *
1792  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1793  $ work( iu ), ldwrku, czero, a, lda )
1794 *
1795 * Copy left singular vectors of A from A to U
1796 *
1797  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1798 *
1799 * Copy right singular vectors of R from WORK(IR) to A
1800 *
1801  CALL zlacpy( 'F', n, n, work( ir ), ldwrkr, a,
1802  $ lda )
1803 *
1804  ELSE
1805 *
1806 * Insufficient workspace for a fast algorithm
1807 *
1808  itau = 1
1809  iwork = itau + n
1810 *
1811 * Compute A=Q*R, copying result to U
1812 * (CWorkspace: need 2*N, prefer N+N*NB)
1813 * (RWorkspace: 0)
1814 *
1815  CALL zgeqrf( m, n, a, lda, work( itau ),
1816  $ work( iwork ), lwork-iwork+1, ierr )
1817  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1818 *
1819 * Generate Q in U
1820 * (CWorkspace: need N+M, prefer N+M*NB)
1821 * (RWorkspace: 0)
1822 *
1823  CALL zungqr( m, m, n, u, ldu, work( itau ),
1824  $ work( iwork ), lwork-iwork+1, ierr )
1825  ie = 1
1826  itauq = itau
1827  itaup = itauq + n
1828  iwork = itaup + n
1829 *
1830 * Zero out below R in A
1831 *
1832  CALL zlaset( 'L', n-1, n-1, czero, czero,
1833  $ a( 2, 1 ), lda )
1834 *
1835 * Bidiagonalize R in A
1836 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1837 * (RWorkspace: need N)
1838 *
1839  CALL zgebrd( n, n, a, lda, s, rwork( ie ),
1840  $ work( itauq ), work( itaup ),
1841  $ work( iwork ), lwork-iwork+1, ierr )
1842 *
1843 * Multiply Q in U by left bidiagonalizing vectors
1844 * in A
1845 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1846 * (RWorkspace: 0)
1847 *
1848  CALL zunmbr( 'Q', 'R', 'N', m, n, n, a, lda,
1849  $ work( itauq ), u, ldu, work( iwork ),
1850  $ lwork-iwork+1, ierr )
1851 *
1852 * Generate right bidiagonalizing vectors in A
1853 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1854 * (RWorkspace: 0)
1855 *
1856  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
1857  $ work( iwork ), lwork-iwork+1, ierr )
1858  irwork = ie + n
1859 *
1860 * Perform bidiagonal QR iteration, computing left
1861 * singular vectors of A in U and computing right
1862 * singular vectors of A in A
1863 * (CWorkspace: 0)
1864 * (RWorkspace: need BDSPAC)
1865 *
1866  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), a,
1867  $ lda, u, ldu, cdum, 1, rwork( irwork ),
1868  $ info )
1869 *
1870  END IF
1871 *
1872  ELSE IF( wntvas ) THEN
1873 *
1874 * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1875 * or 'A')
1876 * M left singular vectors to be computed in U and
1877 * N right singular vectors to be computed in VT
1878 *
1879  IF( lwork.GE.n*n+max( n+m, 3*n ) ) THEN
1880 *
1881 * Sufficient workspace for a fast algorithm
1882 *
1883  iu = 1
1884  IF( lwork.GE.wrkbl+lda*n ) THEN
1885 *
1886 * WORK(IU) is LDA by N
1887 *
1888  ldwrku = lda
1889  ELSE
1890 *
1891 * WORK(IU) is N by N
1892 *
1893  ldwrku = n
1894  END IF
1895  itau = iu + ldwrku*n
1896  iwork = itau + n
1897 *
1898 * Compute A=Q*R, copying result to U
1899 * (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1900 * (RWorkspace: 0)
1901 *
1902  CALL zgeqrf( m, n, a, lda, work( itau ),
1903  $ work( iwork ), lwork-iwork+1, ierr )
1904  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1905 *
1906 * Generate Q in U
1907 * (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1908 * (RWorkspace: 0)
1909 *
1910  CALL zungqr( m, m, n, u, ldu, work( itau ),
1911  $ work( iwork ), lwork-iwork+1, ierr )
1912 *
1913 * Copy R to WORK(IU), zeroing out below it
1914 *
1915  CALL zlacpy( 'U', n, n, a, lda, work( iu ),
1916  $ ldwrku )
1917  CALL zlaset( 'L', n-1, n-1, czero, czero,
1918  $ work( iu+1 ), ldwrku )
1919  ie = 1
1920  itauq = itau
1921  itaup = itauq + n
1922  iwork = itaup + n
1923 *
1924 * Bidiagonalize R in WORK(IU), copying result to VT
1925 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1926 * (RWorkspace: need N)
1927 *
1928  CALL zgebrd( n, n, work( iu ), ldwrku, s,
1929  $ rwork( ie ), work( itauq ),
1930  $ work( itaup ), work( iwork ),
1931  $ lwork-iwork+1, ierr )
1932  CALL zlacpy( 'U', n, n, work( iu ), ldwrku, vt,
1933  $ ldvt )
1934 *
1935 * Generate left bidiagonalizing vectors in WORK(IU)
1936 * (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1937 * (RWorkspace: 0)
1938 *
1939  CALL zungbr( 'Q', n, n, n, work( iu ), ldwrku,
1940  $ work( itauq ), work( iwork ),
1941  $ lwork-iwork+1, ierr )
1942 *
1943 * Generate right bidiagonalizing vectors in VT
1944 * (CWorkspace: need N*N+3*N-1,
1945 * prefer N*N+2*N+(N-1)*NB)
1946 * (RWorkspace: need 0)
1947 *
1948  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
1949  $ work( iwork ), lwork-iwork+1, ierr )
1950  irwork = ie + n
1951 *
1952 * Perform bidiagonal QR iteration, computing left
1953 * singular vectors of R in WORK(IU) and computing
1954 * right singular vectors of R in VT
1955 * (CWorkspace: need N*N)
1956 * (RWorkspace: need BDSPAC)
1957 *
1958  CALL zbdsqr( 'U', n, n, n, 0, s, rwork( ie ), vt,
1959  $ ldvt, work( iu ), ldwrku, cdum, 1,
1960  $ rwork( irwork ), info )
1961 *
1962 * Multiply Q in U by left singular vectors of R in
1963 * WORK(IU), storing result in A
1964 * (CWorkspace: need N*N)
1965 * (RWorkspace: 0)
1966 *
1967  CALL zgemm( 'N', 'N', m, n, n, cone, u, ldu,
1968  $ work( iu ), ldwrku, czero, a, lda )
1969 *
1970 * Copy left singular vectors of A from A to U
1971 *
1972  CALL zlacpy( 'F', m, n, a, lda, u, ldu )
1973 *
1974  ELSE
1975 *
1976 * Insufficient workspace for a fast algorithm
1977 *
1978  itau = 1
1979  iwork = itau + n
1980 *
1981 * Compute A=Q*R, copying result to U
1982 * (CWorkspace: need 2*N, prefer N+N*NB)
1983 * (RWorkspace: 0)
1984 *
1985  CALL zgeqrf( m, n, a, lda, work( itau ),
1986  $ work( iwork ), lwork-iwork+1, ierr )
1987  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
1988 *
1989 * Generate Q in U
1990 * (CWorkspace: need N+M, prefer N+M*NB)
1991 * (RWorkspace: 0)
1992 *
1993  CALL zungqr( m, m, n, u, ldu, work( itau ),
1994  $ work( iwork ), lwork-iwork+1, ierr )
1995 *
1996 * Copy R from A to VT, zeroing out below it
1997 *
1998  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
1999  IF( n.GT.1 )
2000  $ CALL zlaset( 'L', n-1, n-1, czero, czero,
2001  $ vt( 2, 1 ), ldvt )
2002  ie = 1
2003  itauq = itau
2004  itaup = itauq + n
2005  iwork = itaup + n
2006 *
2007 * Bidiagonalize R in VT
2008 * (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
2009 * (RWorkspace: need N)
2010 *
2011  CALL zgebrd( n, n, vt, ldvt, s, rwork( ie ),
2012  $ work( itauq ), work( itaup ),
2013  $ work( iwork ), lwork-iwork+1, ierr )
2014 *
2015 * Multiply Q in U by left bidiagonalizing vectors
2016 * in VT
2017 * (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
2018 * (RWorkspace: 0)
2019 *
2020  CALL zunmbr( 'Q', 'R', 'N', m, n, n, vt, ldvt,
2021  $ work( itauq ), u, ldu, work( iwork ),
2022  $ lwork-iwork+1, ierr )
2023 *
2024 * Generate right bidiagonalizing vectors in VT
2025 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2026 * (RWorkspace: 0)
2027 *
2028  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2029  $ work( iwork ), lwork-iwork+1, ierr )
2030  irwork = ie + n
2031 *
2032 * Perform bidiagonal QR iteration, computing left
2033 * singular vectors of A in U and computing right
2034 * singular vectors of A in VT
2035 * (CWorkspace: 0)
2036 * (RWorkspace: need BDSPAC)
2037 *
2038  CALL zbdsqr( 'U', n, n, m, 0, s, rwork( ie ), vt,
2039  $ ldvt, u, ldu, cdum, 1,
2040  $ rwork( irwork ), info )
2041 *
2042  END IF
2043 *
2044  END IF
2045 *
2046  END IF
2047 *
2048  ELSE
2049 *
2050 * M .LT. MNTHR
2051 *
2052 * Path 10 (M at least N, but not much larger)
2053 * Reduce to bidiagonal form without QR decomposition
2054 *
2055  ie = 1
2056  itauq = 1
2057  itaup = itauq + n
2058  iwork = itaup + n
2059 *
2060 * Bidiagonalize A
2061 * (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
2062 * (RWorkspace: need N)
2063 *
2064  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
2065  $ work( itaup ), work( iwork ), lwork-iwork+1,
2066  $ ierr )
2067  IF( wntuas ) THEN
2068 *
2069 * If left singular vectors desired in U, copy result to U
2070 * and generate left bidiagonalizing vectors in U
2071 * (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
2072 * (RWorkspace: 0)
2073 *
2074  CALL zlacpy( 'L', m, n, a, lda, u, ldu )
2075  IF( wntus )
2076  $ ncu = n
2077  IF( wntua )
2078  $ ncu = m
2079  CALL zungbr( 'Q', m, ncu, n, u, ldu, work( itauq ),
2080  $ work( iwork ), lwork-iwork+1, ierr )
2081  END IF
2082  IF( wntvas ) THEN
2083 *
2084 * If right singular vectors desired in VT, copy result to
2085 * VT and generate right bidiagonalizing vectors in VT
2086 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2087 * (RWorkspace: 0)
2088 *
2089  CALL zlacpy( 'U', n, n, a, lda, vt, ldvt )
2090  CALL zungbr( 'P', n, n, n, vt, ldvt, work( itaup ),
2091  $ work( iwork ), lwork-iwork+1, ierr )
2092  END IF
2093  IF( wntuo ) THEN
2094 *
2095 * If left singular vectors desired in A, generate left
2096 * bidiagonalizing vectors in A
2097 * (CWorkspace: need 3*N, prefer 2*N+N*NB)
2098 * (RWorkspace: 0)
2099 *
2100  CALL zungbr( 'Q', m, n, n, a, lda, work( itauq ),
2101  $ work( iwork ), lwork-iwork+1, ierr )
2102  END IF
2103  IF( wntvo ) THEN
2104 *
2105 * If right singular vectors desired in A, generate right
2106 * bidiagonalizing vectors in A
2107 * (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2108 * (RWorkspace: 0)
2109 *
2110  CALL zungbr( 'P', n, n, n, a, lda, work( itaup ),
2111  $ work( iwork ), lwork-iwork+1, ierr )
2112  END IF
2113  irwork = ie + n
2114  IF( wntuas .OR. wntuo )
2115  $ nru = m
2116  IF( wntun )
2117  $ nru = 0
2118  IF( wntvas .OR. wntvo )
2119  $ ncvt = n
2120  IF( wntvn )
2121  $ ncvt = 0
2122  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
2123 *
2124 * Perform bidiagonal QR iteration, if desired, computing
2125 * left singular vectors in U and computing right singular
2126 * vectors in VT
2127 * (CWorkspace: 0)
2128 * (RWorkspace: need BDSPAC)
2129 *
2130  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2131  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
2132  $ info )
2133  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
2134 *
2135 * Perform bidiagonal QR iteration, if desired, computing
2136 * left singular vectors in U and computing right singular
2137 * vectors in A
2138 * (CWorkspace: 0)
2139 * (RWorkspace: need BDSPAC)
2140 *
2141  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), a,
2142  $ lda, u, ldu, cdum, 1, rwork( irwork ),
2143  $ info )
2144  ELSE
2145 *
2146 * Perform bidiagonal QR iteration, if desired, computing
2147 * left singular vectors in A and computing right singular
2148 * vectors in VT
2149 * (CWorkspace: 0)
2150 * (RWorkspace: need BDSPAC)
2151 *
2152  CALL zbdsqr( 'U', n, ncvt, nru, 0, s, rwork( ie ), vt,
2153  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
2154  $ info )
2155  END IF
2156 *
2157  END IF
2158 *
2159  ELSE
2160 *
2161 * A has more columns than rows. If A has sufficiently more
2162 * columns than rows, first reduce using the LQ decomposition (if
2163 * sufficient workspace available)
2164 *
2165  IF( n.GE.mnthr ) THEN
2166 *
2167  IF( wntvn ) THEN
2168 *
2169 * Path 1t(N much larger than M, JOBVT='N')
2170 * No right singular vectors to be computed
2171 *
2172  itau = 1
2173  iwork = itau + m
2174 *
2175 * Compute A=L*Q
2176 * (CWorkspace: need 2*M, prefer M+M*NB)
2177 * (RWorkspace: 0)
2178 *
2179  CALL zgelqf( m, n, a, lda, work( itau ), work( iwork ),
2180  $ lwork-iwork+1, ierr )
2181 *
2182 * Zero out above L
2183 *
2184  CALL zlaset( 'U', m-1, m-1, czero, czero, a( 1, 2 ),
2185  $ lda )
2186  ie = 1
2187  itauq = 1
2188  itaup = itauq + m
2189  iwork = itaup + m
2190 *
2191 * Bidiagonalize L in A
2192 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2193 * (RWorkspace: need M)
2194 *
2195  CALL zgebrd( m, m, a, lda, s, rwork( ie ), work( itauq ),
2196  $ work( itaup ), work( iwork ), lwork-iwork+1,
2197  $ ierr )
2198  IF( wntuo .OR. wntuas ) THEN
2199 *
2200 * If left singular vectors desired, generate Q
2201 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2202 * (RWorkspace: 0)
2203 *
2204  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
2205  $ work( iwork ), lwork-iwork+1, ierr )
2206  END IF
2207  irwork = ie + m
2208  nru = 0
2209  IF( wntuo .OR. wntuas )
2210  $ nru = m
2211 *
2212 * Perform bidiagonal QR iteration, computing left singular
2213 * vectors of A in A if desired
2214 * (CWorkspace: 0)
2215 * (RWorkspace: need BDSPAC)
2216 *
2217  CALL zbdsqr( 'U', m, 0, nru, 0, s, rwork( ie ), cdum, 1,
2218  $ a, lda, cdum, 1, rwork( irwork ), info )
2219 *
2220 * If left singular vectors desired in U, copy them there
2221 *
2222  IF( wntuas )
2223  $ CALL zlacpy( 'F', m, m, a, lda, u, ldu )
2224 *
2225  ELSE IF( wntvo .AND. wntun ) THEN
2226 *
2227 * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2228 * M right singular vectors to be overwritten on A and
2229 * no left singular vectors to be computed
2230 *
2231  IF( lwork.GE.m*m+3*m ) THEN
2232 *
2233 * Sufficient workspace for a fast algorithm
2234 *
2235  ir = 1
2236  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2237 *
2238 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2239 *
2240  ldwrku = lda
2241  chunk = n
2242  ldwrkr = lda
2243  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2244 *
2245 * WORK(IU) is LDA by N and WORK(IR) is M by M
2246 *
2247  ldwrku = lda
2248  chunk = n
2249  ldwrkr = m
2250  ELSE
2251 *
2252 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2253 *
2254  ldwrku = m
2255  chunk = ( lwork-m*m ) / m
2256  ldwrkr = m
2257  END IF
2258  itau = ir + ldwrkr*m
2259  iwork = itau + m
2260 *
2261 * Compute A=L*Q
2262 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2263 * (RWorkspace: 0)
2264 *
2265  CALL zgelqf( m, n, a, lda, work( itau ),
2266  $ work( iwork ), lwork-iwork+1, ierr )
2267 *
2268 * Copy L to WORK(IR) and zero out above it
2269 *
2270  CALL zlacpy( 'L', m, m, a, lda, work( ir ), ldwrkr )
2271  CALL zlaset( 'U', m-1, m-1, czero, czero,
2272  $ work( ir+ldwrkr ), ldwrkr )
2273 *
2274 * Generate Q in A
2275 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2276 * (RWorkspace: 0)
2277 *
2278  CALL zunglq( m, n, m, a, lda, work( itau ),
2279  $ work( iwork ), lwork-iwork+1, ierr )
2280  ie = 1
2281  itauq = itau
2282  itaup = itauq + m
2283  iwork = itaup + m
2284 *
2285 * Bidiagonalize L in WORK(IR)
2286 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2287 * (RWorkspace: need M)
2288 *
2289  CALL zgebrd( m, m, work( ir ), ldwrkr, s, rwork( ie ),
2290  $ work( itauq ), work( itaup ),
2291  $ work( iwork ), lwork-iwork+1, ierr )
2292 *
2293 * Generate right vectors bidiagonalizing L
2294 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2295 * (RWorkspace: 0)
2296 *
2297  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2298  $ work( itaup ), work( iwork ),
2299  $ lwork-iwork+1, ierr )
2300  irwork = ie + m
2301 *
2302 * Perform bidiagonal QR iteration, computing right
2303 * singular vectors of L in WORK(IR)
2304 * (CWorkspace: need M*M)
2305 * (RWorkspace: need BDSPAC)
2306 *
2307  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2308  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2309  $ rwork( irwork ), info )
2310  iu = itauq
2311 *
2312 * Multiply right singular vectors of L in WORK(IR) by Q
2313 * in A, storing result in WORK(IU) and copying to A
2314 * (CWorkspace: need M*M+M, prefer M*M+M*N)
2315 * (RWorkspace: 0)
2316 *
2317  DO 30 i = 1, n, chunk
2318  blk = min( n-i+1, chunk )
2319  CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2320  $ ldwrkr, a( 1, i ), lda, czero,
2321  $ work( iu ), ldwrku )
2322  CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
2323  $ a( 1, i ), lda )
2324  30 continue
2325 *
2326  ELSE
2327 *
2328 * Insufficient workspace for a fast algorithm
2329 *
2330  ie = 1
2331  itauq = 1
2332  itaup = itauq + m
2333  iwork = itaup + m
2334 *
2335 * Bidiagonalize A
2336 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2337 * (RWorkspace: need M)
2338 *
2339  CALL zgebrd( m, n, a, lda, s, rwork( ie ),
2340  $ work( itauq ), work( itaup ),
2341  $ work( iwork ), lwork-iwork+1, ierr )
2342 *
2343 * Generate right vectors bidiagonalizing A
2344 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2345 * (RWorkspace: 0)
2346 *
2347  CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
2348  $ work( iwork ), lwork-iwork+1, ierr )
2349  irwork = ie + m
2350 *
2351 * Perform bidiagonal QR iteration, computing right
2352 * singular vectors of A in A
2353 * (CWorkspace: 0)
2354 * (RWorkspace: need BDSPAC)
2355 *
2356  CALL zbdsqr( 'L', m, n, 0, 0, s, rwork( ie ), a, lda,
2357  $ cdum, 1, cdum, 1, rwork( irwork ), info )
2358 *
2359  END IF
2360 *
2361  ELSE IF( wntvo .AND. wntuas ) THEN
2362 *
2363 * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2364 * M right singular vectors to be overwritten on A and
2365 * M left singular vectors to be computed in U
2366 *
2367  IF( lwork.GE.m*m+3*m ) THEN
2368 *
2369 * Sufficient workspace for a fast algorithm
2370 *
2371  ir = 1
2372  IF( lwork.GE.max( wrkbl, lda*n )+lda*m ) THEN
2373 *
2374 * WORK(IU) is LDA by N and WORK(IR) is LDA by M
2375 *
2376  ldwrku = lda
2377  chunk = n
2378  ldwrkr = lda
2379  ELSE IF( lwork.GE.max( wrkbl, lda*n )+m*m ) THEN
2380 *
2381 * WORK(IU) is LDA by N and WORK(IR) is M by M
2382 *
2383  ldwrku = lda
2384  chunk = n
2385  ldwrkr = m
2386  ELSE
2387 *
2388 * WORK(IU) is M by CHUNK and WORK(IR) is M by M
2389 *
2390  ldwrku = m
2391  chunk = ( lwork-m*m ) / m
2392  ldwrkr = m
2393  END IF
2394  itau = ir + ldwrkr*m
2395  iwork = itau + m
2396 *
2397 * Compute A=L*Q
2398 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2399 * (RWorkspace: 0)
2400 *
2401  CALL zgelqf( m, n, a, lda, work( itau ),
2402  $ work( iwork ), lwork-iwork+1, ierr )
2403 *
2404 * Copy L to U, zeroing about above it
2405 *
2406  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2407  CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2408  $ ldu )
2409 *
2410 * Generate Q in A
2411 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2412 * (RWorkspace: 0)
2413 *
2414  CALL zunglq( m, n, m, a, lda, work( itau ),
2415  $ work( iwork ), lwork-iwork+1, ierr )
2416  ie = 1
2417  itauq = itau
2418  itaup = itauq + m
2419  iwork = itaup + m
2420 *
2421 * Bidiagonalize L in U, copying result to WORK(IR)
2422 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2423 * (RWorkspace: need M)
2424 *
2425  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
2426  $ work( itauq ), work( itaup ),
2427  $ work( iwork ), lwork-iwork+1, ierr )
2428  CALL zlacpy( 'U', m, m, u, ldu, work( ir ), ldwrkr )
2429 *
2430 * Generate right vectors bidiagonalizing L in WORK(IR)
2431 * (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2432 * (RWorkspace: 0)
2433 *
2434  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2435  $ work( itaup ), work( iwork ),
2436  $ lwork-iwork+1, ierr )
2437 *
2438 * Generate left vectors bidiagonalizing L in U
2439 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2440 * (RWorkspace: 0)
2441 *
2442  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2443  $ work( iwork ), lwork-iwork+1, ierr )
2444  irwork = ie + m
2445 *
2446 * Perform bidiagonal QR iteration, computing left
2447 * singular vectors of L in U, and computing right
2448 * singular vectors of L in WORK(IR)
2449 * (CWorkspace: need M*M)
2450 * (RWorkspace: need BDSPAC)
2451 *
2452  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2453  $ work( ir ), ldwrkr, u, ldu, cdum, 1,
2454  $ rwork( irwork ), info )
2455  iu = itauq
2456 *
2457 * Multiply right singular vectors of L in WORK(IR) by Q
2458 * in A, storing result in WORK(IU) and copying to A
2459 * (CWorkspace: need M*M+M, prefer M*M+M*N))
2460 * (RWorkspace: 0)
2461 *
2462  DO 40 i = 1, n, chunk
2463  blk = min( n-i+1, chunk )
2464  CALL zgemm( 'N', 'N', m, blk, m, cone, work( ir ),
2465  $ ldwrkr, a( 1, i ), lda, czero,
2466  $ work( iu ), ldwrku )
2467  CALL zlacpy( 'F', m, blk, work( iu ), ldwrku,
2468  $ a( 1, i ), lda )
2469  40 continue
2470 *
2471  ELSE
2472 *
2473 * Insufficient workspace for a fast algorithm
2474 *
2475  itau = 1
2476  iwork = itau + m
2477 *
2478 * Compute A=L*Q
2479 * (CWorkspace: need 2*M, prefer M+M*NB)
2480 * (RWorkspace: 0)
2481 *
2482  CALL zgelqf( m, n, a, lda, work( itau ),
2483  $ work( iwork ), lwork-iwork+1, ierr )
2484 *
2485 * Copy L to U, zeroing out above it
2486 *
2487  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2488  CALL zlaset( 'U', m-1, m-1, czero, czero, u( 1, 2 ),
2489  $ ldu )
2490 *
2491 * Generate Q in A
2492 * (CWorkspace: need 2*M, prefer M+M*NB)
2493 * (RWorkspace: 0)
2494 *
2495  CALL zunglq( m, n, m, a, lda, work( itau ),
2496  $ work( iwork ), lwork-iwork+1, ierr )
2497  ie = 1
2498  itauq = itau
2499  itaup = itauq + m
2500  iwork = itaup + m
2501 *
2502 * Bidiagonalize L in U
2503 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2504 * (RWorkspace: need M)
2505 *
2506  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
2507  $ work( itauq ), work( itaup ),
2508  $ work( iwork ), lwork-iwork+1, ierr )
2509 *
2510 * Multiply right vectors bidiagonalizing L by Q in A
2511 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2512 * (RWorkspace: 0)
2513 *
2514  CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
2515  $ work( itaup ), a, lda, work( iwork ),
2516  $ lwork-iwork+1, ierr )
2517 *
2518 * Generate left vectors bidiagonalizing L in U
2519 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2520 * (RWorkspace: 0)
2521 *
2522  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2523  $ work( iwork ), lwork-iwork+1, ierr )
2524  irwork = ie + m
2525 *
2526 * Perform bidiagonal QR iteration, computing left
2527 * singular vectors of A in U and computing right
2528 * singular vectors of A in A
2529 * (CWorkspace: 0)
2530 * (RWorkspace: need BDSPAC)
2531 *
2532  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), a, lda,
2533  $ u, ldu, cdum, 1, rwork( irwork ), info )
2534 *
2535  END IF
2536 *
2537  ELSE IF( wntvs ) THEN
2538 *
2539  IF( wntun ) THEN
2540 *
2541 * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2542 * M right singular vectors to be computed in VT and
2543 * no left singular vectors to be computed
2544 *
2545  IF( lwork.GE.m*m+3*m ) THEN
2546 *
2547 * Sufficient workspace for a fast algorithm
2548 *
2549  ir = 1
2550  IF( lwork.GE.wrkbl+lda*m ) THEN
2551 *
2552 * WORK(IR) is LDA by M
2553 *
2554  ldwrkr = lda
2555  ELSE
2556 *
2557 * WORK(IR) is M by M
2558 *
2559  ldwrkr = m
2560  END IF
2561  itau = ir + ldwrkr*m
2562  iwork = itau + m
2563 *
2564 * Compute A=L*Q
2565 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2566 * (RWorkspace: 0)
2567 *
2568  CALL zgelqf( m, n, a, lda, work( itau ),
2569  $ work( iwork ), lwork-iwork+1, ierr )
2570 *
2571 * Copy L to WORK(IR), zeroing out above it
2572 *
2573  CALL zlacpy( 'L', m, m, a, lda, work( ir ),
2574  $ ldwrkr )
2575  CALL zlaset( 'U', m-1, m-1, czero, czero,
2576  $ work( ir+ldwrkr ), ldwrkr )
2577 *
2578 * Generate Q in A
2579 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2580 * (RWorkspace: 0)
2581 *
2582  CALL zunglq( m, n, m, a, lda, work( itau ),
2583  $ work( iwork ), lwork-iwork+1, ierr )
2584  ie = 1
2585  itauq = itau
2586  itaup = itauq + m
2587  iwork = itaup + m
2588 *
2589 * Bidiagonalize L in WORK(IR)
2590 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2591 * (RWorkspace: need M)
2592 *
2593  CALL zgebrd( m, m, work( ir ), ldwrkr, s,
2594  $ rwork( ie ), work( itauq ),
2595  $ work( itaup ), work( iwork ),
2596  $ lwork-iwork+1, ierr )
2597 *
2598 * Generate right vectors bidiagonalizing L in
2599 * WORK(IR)
2600 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2601 * (RWorkspace: 0)
2602 *
2603  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
2604  $ work( itaup ), work( iwork ),
2605  $ lwork-iwork+1, ierr )
2606  irwork = ie + m
2607 *
2608 * Perform bidiagonal QR iteration, computing right
2609 * singular vectors of L in WORK(IR)
2610 * (CWorkspace: need M*M)
2611 * (RWorkspace: need BDSPAC)
2612 *
2613  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
2614  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
2615  $ rwork( irwork ), info )
2616 *
2617 * Multiply right singular vectors of L in WORK(IR) by
2618 * Q in A, storing result in VT
2619 * (CWorkspace: need M*M)
2620 * (RWorkspace: 0)
2621 *
2622  CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
2623  $ ldwrkr, a, lda, czero, vt, ldvt )
2624 *
2625  ELSE
2626 *
2627 * Insufficient workspace for a fast algorithm
2628 *
2629  itau = 1
2630  iwork = itau + m
2631 *
2632 * Compute A=L*Q
2633 * (CWorkspace: need 2*M, prefer M+M*NB)
2634 * (RWorkspace: 0)
2635 *
2636  CALL zgelqf( m, n, a, lda, work( itau ),
2637  $ work( iwork ), lwork-iwork+1, ierr )
2638 *
2639 * Copy result to VT
2640 *
2641  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2642 *
2643 * Generate Q in VT
2644 * (CWorkspace: need 2*M, prefer M+M*NB)
2645 * (RWorkspace: 0)
2646 *
2647  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2648  $ work( iwork ), lwork-iwork+1, ierr )
2649  ie = 1
2650  itauq = itau
2651  itaup = itauq + m
2652  iwork = itaup + m
2653 *
2654 * Zero out above L in A
2655 *
2656  CALL zlaset( 'U', m-1, m-1, czero, czero,
2657  $ a( 1, 2 ), lda )
2658 *
2659 * Bidiagonalize L in A
2660 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2661 * (RWorkspace: need M)
2662 *
2663  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
2664  $ work( itauq ), work( itaup ),
2665  $ work( iwork ), lwork-iwork+1, ierr )
2666 *
2667 * Multiply right vectors bidiagonalizing L by Q in VT
2668 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2669 * (RWorkspace: 0)
2670 *
2671  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2672  $ work( itaup ), vt, ldvt,
2673  $ work( iwork ), lwork-iwork+1, ierr )
2674  irwork = ie + m
2675 *
2676 * Perform bidiagonal QR iteration, computing right
2677 * singular vectors of A in VT
2678 * (CWorkspace: 0)
2679 * (RWorkspace: need BDSPAC)
2680 *
2681  CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
2682  $ ldvt, cdum, 1, cdum, 1,
2683  $ rwork( irwork ), info )
2684 *
2685  END IF
2686 *
2687  ELSE IF( wntuo ) THEN
2688 *
2689 * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2690 * M right singular vectors to be computed in VT and
2691 * M left singular vectors to be overwritten on A
2692 *
2693  IF( lwork.GE.2*m*m+3*m ) THEN
2694 *
2695 * Sufficient workspace for a fast algorithm
2696 *
2697  iu = 1
2698  IF( lwork.GE.wrkbl+2*lda*m ) THEN
2699 *
2700 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
2701 *
2702  ldwrku = lda
2703  ir = iu + ldwrku*m
2704  ldwrkr = lda
2705  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
2706 *
2707 * WORK(IU) is LDA by M and WORK(IR) is M by M
2708 *
2709  ldwrku = lda
2710  ir = iu + ldwrku*m
2711  ldwrkr = m
2712  ELSE
2713 *
2714 * WORK(IU) is M by M and WORK(IR) is M by M
2715 *
2716  ldwrku = m
2717  ir = iu + ldwrku*m
2718  ldwrkr = m
2719  END IF
2720  itau = ir + ldwrkr*m
2721  iwork = itau + m
2722 *
2723 * Compute A=L*Q
2724 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2725 * (RWorkspace: 0)
2726 *
2727  CALL zgelqf( m, n, a, lda, work( itau ),
2728  $ work( iwork ), lwork-iwork+1, ierr )
2729 *
2730 * Copy L to WORK(IU), zeroing out below it
2731 *
2732  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
2733  $ ldwrku )
2734  CALL zlaset( 'U', m-1, m-1, czero, czero,
2735  $ work( iu+ldwrku ), ldwrku )
2736 *
2737 * Generate Q in A
2738 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2739 * (RWorkspace: 0)
2740 *
2741  CALL zunglq( m, n, m, a, lda, work( itau ),
2742  $ work( iwork ), lwork-iwork+1, ierr )
2743  ie = 1
2744  itauq = itau
2745  itaup = itauq + m
2746  iwork = itaup + m
2747 *
2748 * Bidiagonalize L in WORK(IU), copying result to
2749 * WORK(IR)
2750 * (CWorkspace: need 2*M*M+3*M,
2751 * prefer 2*M*M+2*M+2*M*NB)
2752 * (RWorkspace: need M)
2753 *
2754  CALL zgebrd( m, m, work( iu ), ldwrku, s,
2755  $ rwork( ie ), work( itauq ),
2756  $ work( itaup ), work( iwork ),
2757  $ lwork-iwork+1, ierr )
2758  CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
2759  $ work( ir ), ldwrkr )
2760 *
2761 * Generate right bidiagonalizing vectors in WORK(IU)
2762 * (CWorkspace: need 2*M*M+3*M-1,
2763 * prefer 2*M*M+2*M+(M-1)*NB)
2764 * (RWorkspace: 0)
2765 *
2766  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
2767  $ work( itaup ), work( iwork ),
2768  $ lwork-iwork+1, ierr )
2769 *
2770 * Generate left bidiagonalizing vectors in WORK(IR)
2771 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2772 * (RWorkspace: 0)
2773 *
2774  CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
2775  $ work( itauq ), work( iwork ),
2776  $ lwork-iwork+1, ierr )
2777  irwork = ie + m
2778 *
2779 * Perform bidiagonal QR iteration, computing left
2780 * singular vectors of L in WORK(IR) and computing
2781 * right singular vectors of L in WORK(IU)
2782 * (CWorkspace: need 2*M*M)
2783 * (RWorkspace: need BDSPAC)
2784 *
2785  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2786  $ work( iu ), ldwrku, work( ir ),
2787  $ ldwrkr, cdum, 1, rwork( irwork ),
2788  $ info )
2789 *
2790 * Multiply right singular vectors of L in WORK(IU) by
2791 * Q in A, storing result in VT
2792 * (CWorkspace: need M*M)
2793 * (RWorkspace: 0)
2794 *
2795  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
2796  $ ldwrku, a, lda, czero, vt, ldvt )
2797 *
2798 * Copy left singular vectors of L to A
2799 * (CWorkspace: need M*M)
2800 * (RWorkspace: 0)
2801 *
2802  CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
2803  $ lda )
2804 *
2805  ELSE
2806 *
2807 * Insufficient workspace for a fast algorithm
2808 *
2809  itau = 1
2810  iwork = itau + m
2811 *
2812 * Compute A=L*Q, copying result to VT
2813 * (CWorkspace: need 2*M, prefer M+M*NB)
2814 * (RWorkspace: 0)
2815 *
2816  CALL zgelqf( m, n, a, lda, work( itau ),
2817  $ work( iwork ), lwork-iwork+1, ierr )
2818  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2819 *
2820 * Generate Q in VT
2821 * (CWorkspace: need 2*M, prefer M+M*NB)
2822 * (RWorkspace: 0)
2823 *
2824  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2825  $ work( iwork ), lwork-iwork+1, ierr )
2826  ie = 1
2827  itauq = itau
2828  itaup = itauq + m
2829  iwork = itaup + m
2830 *
2831 * Zero out above L in A
2832 *
2833  CALL zlaset( 'U', m-1, m-1, czero, czero,
2834  $ a( 1, 2 ), lda )
2835 *
2836 * Bidiagonalize L in A
2837 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2838 * (RWorkspace: need M)
2839 *
2840  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
2841  $ work( itauq ), work( itaup ),
2842  $ work( iwork ), lwork-iwork+1, ierr )
2843 *
2844 * Multiply right vectors bidiagonalizing L by Q in VT
2845 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2846 * (RWorkspace: 0)
2847 *
2848  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
2849  $ work( itaup ), vt, ldvt,
2850  $ work( iwork ), lwork-iwork+1, ierr )
2851 *
2852 * Generate left bidiagonalizing vectors of L in A
2853 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
2854 * (RWorkspace: 0)
2855 *
2856  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
2857  $ work( iwork ), lwork-iwork+1, ierr )
2858  irwork = ie + m
2859 *
2860 * Perform bidiagonal QR iteration, computing left
2861 * singular vectors of A in A and computing right
2862 * singular vectors of A in VT
2863 * (CWorkspace: 0)
2864 * (RWorkspace: need BDSPAC)
2865 *
2866  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
2867  $ ldvt, a, lda, cdum, 1,
2868  $ rwork( irwork ), info )
2869 *
2870  END IF
2871 *
2872  ELSE IF( wntuas ) THEN
2873 *
2874 * Path 6t(N much larger than M, JOBU='S' or 'A',
2875 * JOBVT='S')
2876 * M right singular vectors to be computed in VT and
2877 * M left singular vectors to be computed in U
2878 *
2879  IF( lwork.GE.m*m+3*m ) THEN
2880 *
2881 * Sufficient workspace for a fast algorithm
2882 *
2883  iu = 1
2884  IF( lwork.GE.wrkbl+lda*m ) THEN
2885 *
2886 * WORK(IU) is LDA by N
2887 *
2888  ldwrku = lda
2889  ELSE
2890 *
2891 * WORK(IU) is LDA by M
2892 *
2893  ldwrku = m
2894  END IF
2895  itau = iu + ldwrku*m
2896  iwork = itau + m
2897 *
2898 * Compute A=L*Q
2899 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2900 * (RWorkspace: 0)
2901 *
2902  CALL zgelqf( m, n, a, lda, work( itau ),
2903  $ work( iwork ), lwork-iwork+1, ierr )
2904 *
2905 * Copy L to WORK(IU), zeroing out above it
2906 *
2907  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
2908  $ ldwrku )
2909  CALL zlaset( 'U', m-1, m-1, czero, czero,
2910  $ work( iu+ldwrku ), ldwrku )
2911 *
2912 * Generate Q in A
2913 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2914 * (RWorkspace: 0)
2915 *
2916  CALL zunglq( m, n, m, a, lda, work( itau ),
2917  $ work( iwork ), lwork-iwork+1, ierr )
2918  ie = 1
2919  itauq = itau
2920  itaup = itauq + m
2921  iwork = itaup + m
2922 *
2923 * Bidiagonalize L in WORK(IU), copying result to U
2924 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2925 * (RWorkspace: need M)
2926 *
2927  CALL zgebrd( m, m, work( iu ), ldwrku, s,
2928  $ rwork( ie ), work( itauq ),
2929  $ work( itaup ), work( iwork ),
2930  $ lwork-iwork+1, ierr )
2931  CALL zlacpy( 'L', m, m, work( iu ), ldwrku, u,
2932  $ ldu )
2933 *
2934 * Generate right bidiagonalizing vectors in WORK(IU)
2935 * (CWorkspace: need M*M+3*M-1,
2936 * prefer M*M+2*M+(M-1)*NB)
2937 * (RWorkspace: 0)
2938 *
2939  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
2940  $ work( itaup ), work( iwork ),
2941  $ lwork-iwork+1, ierr )
2942 *
2943 * Generate left bidiagonalizing vectors in U
2944 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2945 * (RWorkspace: 0)
2946 *
2947  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
2948  $ work( iwork ), lwork-iwork+1, ierr )
2949  irwork = ie + m
2950 *
2951 * Perform bidiagonal QR iteration, computing left
2952 * singular vectors of L in U and computing right
2953 * singular vectors of L in WORK(IU)
2954 * (CWorkspace: need M*M)
2955 * (RWorkspace: need BDSPAC)
2956 *
2957  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
2958  $ work( iu ), ldwrku, u, ldu, cdum, 1,
2959  $ rwork( irwork ), info )
2960 *
2961 * Multiply right singular vectors of L in WORK(IU) by
2962 * Q in A, storing result in VT
2963 * (CWorkspace: need M*M)
2964 * (RWorkspace: 0)
2965 *
2966  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
2967  $ ldwrku, a, lda, czero, vt, ldvt )
2968 *
2969  ELSE
2970 *
2971 * Insufficient workspace for a fast algorithm
2972 *
2973  itau = 1
2974  iwork = itau + m
2975 *
2976 * Compute A=L*Q, copying result to VT
2977 * (CWorkspace: need 2*M, prefer M+M*NB)
2978 * (RWorkspace: 0)
2979 *
2980  CALL zgelqf( m, n, a, lda, work( itau ),
2981  $ work( iwork ), lwork-iwork+1, ierr )
2982  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
2983 *
2984 * Generate Q in VT
2985 * (CWorkspace: need 2*M, prefer M+M*NB)
2986 * (RWorkspace: 0)
2987 *
2988  CALL zunglq( m, n, m, vt, ldvt, work( itau ),
2989  $ work( iwork ), lwork-iwork+1, ierr )
2990 *
2991 * Copy L to U, zeroing out above it
2992 *
2993  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
2994  CALL zlaset( 'U', m-1, m-1, czero, czero,
2995  $ u( 1, 2 ), ldu )
2996  ie = 1
2997  itauq = itau
2998  itaup = itauq + m
2999  iwork = itaup + m
3000 *
3001 * Bidiagonalize L in U
3002 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3003 * (RWorkspace: need M)
3004 *
3005  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
3006  $ work( itauq ), work( itaup ),
3007  $ work( iwork ), lwork-iwork+1, ierr )
3008 *
3009 * Multiply right bidiagonalizing vectors in U by Q
3010 * in VT
3011 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3012 * (RWorkspace: 0)
3013 *
3014  CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3015  $ work( itaup ), vt, ldvt,
3016  $ work( iwork ), lwork-iwork+1, ierr )
3017 *
3018 * Generate left bidiagonalizing vectors in U
3019 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3020 * (RWorkspace: 0)
3021 *
3022  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3023  $ work( iwork ), lwork-iwork+1, ierr )
3024  irwork = ie + m
3025 *
3026 * Perform bidiagonal QR iteration, computing left
3027 * singular vectors of A in U and computing right
3028 * singular vectors of A in VT
3029 * (CWorkspace: 0)
3030 * (RWorkspace: need BDSPAC)
3031 *
3032  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3033  $ ldvt, u, ldu, cdum, 1,
3034  $ rwork( irwork ), info )
3035 *
3036  END IF
3037 *
3038  END IF
3039 *
3040  ELSE IF( wntva ) THEN
3041 *
3042  IF( wntun ) THEN
3043 *
3044 * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
3045 * N right singular vectors to be computed in VT and
3046 * no left singular vectors to be computed
3047 *
3048  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3049 *
3050 * Sufficient workspace for a fast algorithm
3051 *
3052  ir = 1
3053  IF( lwork.GE.wrkbl+lda*m ) THEN
3054 *
3055 * WORK(IR) is LDA by M
3056 *
3057  ldwrkr = lda
3058  ELSE
3059 *
3060 * WORK(IR) is M by M
3061 *
3062  ldwrkr = m
3063  END IF
3064  itau = ir + ldwrkr*m
3065  iwork = itau + m
3066 *
3067 * Compute A=L*Q, copying result to VT
3068 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3069 * (RWorkspace: 0)
3070 *
3071  CALL zgelqf( m, n, a, lda, work( itau ),
3072  $ work( iwork ), lwork-iwork+1, ierr )
3073  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3074 *
3075 * Copy L to WORK(IR), zeroing out above it
3076 *
3077  CALL zlacpy( 'L', m, m, a, lda, work( ir ),
3078  $ ldwrkr )
3079  CALL zlaset( 'U', m-1, m-1, czero, czero,
3080  $ work( ir+ldwrkr ), ldwrkr )
3081 *
3082 * Generate Q in VT
3083 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3084 * (RWorkspace: 0)
3085 *
3086  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3087  $ work( iwork ), lwork-iwork+1, ierr )
3088  ie = 1
3089  itauq = itau
3090  itaup = itauq + m
3091  iwork = itaup + m
3092 *
3093 * Bidiagonalize L in WORK(IR)
3094 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3095 * (RWorkspace: need M)
3096 *
3097  CALL zgebrd( m, m, work( ir ), ldwrkr, s,
3098  $ rwork( ie ), work( itauq ),
3099  $ work( itaup ), work( iwork ),
3100  $ lwork-iwork+1, ierr )
3101 *
3102 * Generate right bidiagonalizing vectors in WORK(IR)
3103 * (CWorkspace: need M*M+3*M-1,
3104 * prefer M*M+2*M+(M-1)*NB)
3105 * (RWorkspace: 0)
3106 *
3107  CALL zungbr( 'P', m, m, m, work( ir ), ldwrkr,
3108  $ work( itaup ), work( iwork ),
3109  $ lwork-iwork+1, ierr )
3110  irwork = ie + m
3111 *
3112 * Perform bidiagonal QR iteration, computing right
3113 * singular vectors of L in WORK(IR)
3114 * (CWorkspace: need M*M)
3115 * (RWorkspace: need BDSPAC)
3116 *
3117  CALL zbdsqr( 'U', m, m, 0, 0, s, rwork( ie ),
3118  $ work( ir ), ldwrkr, cdum, 1, cdum, 1,
3119  $ rwork( irwork ), info )
3120 *
3121 * Multiply right singular vectors of L in WORK(IR) by
3122 * Q in VT, storing result in A
3123 * (CWorkspace: need M*M)
3124 * (RWorkspace: 0)
3125 *
3126  CALL zgemm( 'N', 'N', m, n, m, cone, work( ir ),
3127  $ ldwrkr, vt, ldvt, czero, a, lda )
3128 *
3129 * Copy right singular vectors of A from A to VT
3130 *
3131  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3132 *
3133  ELSE
3134 *
3135 * Insufficient workspace for a fast algorithm
3136 *
3137  itau = 1
3138  iwork = itau + m
3139 *
3140 * Compute A=L*Q, copying result to VT
3141 * (CWorkspace: need 2*M, prefer M+M*NB)
3142 * (RWorkspace: 0)
3143 *
3144  CALL zgelqf( m, n, a, lda, work( itau ),
3145  $ work( iwork ), lwork-iwork+1, ierr )
3146  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3147 *
3148 * Generate Q in VT
3149 * (CWorkspace: need M+N, prefer M+N*NB)
3150 * (RWorkspace: 0)
3151 *
3152  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3153  $ work( iwork ), lwork-iwork+1, ierr )
3154  ie = 1
3155  itauq = itau
3156  itaup = itauq + m
3157  iwork = itaup + m
3158 *
3159 * Zero out above L in A
3160 *
3161  CALL zlaset( 'U', m-1, m-1, czero, czero,
3162  $ a( 1, 2 ), lda )
3163 *
3164 * Bidiagonalize L in A
3165 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3166 * (RWorkspace: need M)
3167 *
3168  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
3169  $ work( itauq ), work( itaup ),
3170  $ work( iwork ), lwork-iwork+1, ierr )
3171 *
3172 * Multiply right bidiagonalizing vectors in A by Q
3173 * in VT
3174 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3175 * (RWorkspace: 0)
3176 *
3177  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3178  $ work( itaup ), vt, ldvt,
3179  $ work( iwork ), lwork-iwork+1, ierr )
3180  irwork = ie + m
3181 *
3182 * Perform bidiagonal QR iteration, computing right
3183 * singular vectors of A in VT
3184 * (CWorkspace: 0)
3185 * (RWorkspace: need BDSPAC)
3186 *
3187  CALL zbdsqr( 'U', m, n, 0, 0, s, rwork( ie ), vt,
3188  $ ldvt, cdum, 1, cdum, 1,
3189  $ rwork( irwork ), info )
3190 *
3191  END IF
3192 *
3193  ELSE IF( wntuo ) THEN
3194 *
3195 * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3196 * N right singular vectors to be computed in VT and
3197 * M left singular vectors to be overwritten on A
3198 *
3199  IF( lwork.GE.2*m*m+max( n+m, 3*m ) ) THEN
3200 *
3201 * Sufficient workspace for a fast algorithm
3202 *
3203  iu = 1
3204  IF( lwork.GE.wrkbl+2*lda*m ) THEN
3205 *
3206 * WORK(IU) is LDA by M and WORK(IR) is LDA by M
3207 *
3208  ldwrku = lda
3209  ir = iu + ldwrku*m
3210  ldwrkr = lda
3211  ELSE IF( lwork.GE.wrkbl+( lda+m )*m ) THEN
3212 *
3213 * WORK(IU) is LDA by M and WORK(IR) is M by M
3214 *
3215  ldwrku = lda
3216  ir = iu + ldwrku*m
3217  ldwrkr = m
3218  ELSE
3219 *
3220 * WORK(IU) is M by M and WORK(IR) is M by M
3221 *
3222  ldwrku = m
3223  ir = iu + ldwrku*m
3224  ldwrkr = m
3225  END IF
3226  itau = ir + ldwrkr*m
3227  iwork = itau + m
3228 *
3229 * Compute A=L*Q, copying result to VT
3230 * (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3231 * (RWorkspace: 0)
3232 *
3233  CALL zgelqf( m, n, a, lda, work( itau ),
3234  $ work( iwork ), lwork-iwork+1, ierr )
3235  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3236 *
3237 * Generate Q in VT
3238 * (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3239 * (RWorkspace: 0)
3240 *
3241  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3242  $ work( iwork ), lwork-iwork+1, ierr )
3243 *
3244 * Copy L to WORK(IU), zeroing out above it
3245 *
3246  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
3247  $ ldwrku )
3248  CALL zlaset( 'U', m-1, m-1, czero, czero,
3249  $ work( iu+ldwrku ), ldwrku )
3250  ie = 1
3251  itauq = itau
3252  itaup = itauq + m
3253  iwork = itaup + m
3254 *
3255 * Bidiagonalize L in WORK(IU), copying result to
3256 * WORK(IR)
3257 * (CWorkspace: need 2*M*M+3*M,
3258 * prefer 2*M*M+2*M+2*M*NB)
3259 * (RWorkspace: need M)
3260 *
3261  CALL zgebrd( m, m, work( iu ), ldwrku, s,
3262  $ rwork( ie ), work( itauq ),
3263  $ work( itaup ), work( iwork ),
3264  $ lwork-iwork+1, ierr )
3265  CALL zlacpy( 'L', m, m, work( iu ), ldwrku,
3266  $ work( ir ), ldwrkr )
3267 *
3268 * Generate right bidiagonalizing vectors in WORK(IU)
3269 * (CWorkspace: need 2*M*M+3*M-1,
3270 * prefer 2*M*M+2*M+(M-1)*NB)
3271 * (RWorkspace: 0)
3272 *
3273  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
3274  $ work( itaup ), work( iwork ),
3275  $ lwork-iwork+1, ierr )
3276 *
3277 * Generate left bidiagonalizing vectors in WORK(IR)
3278 * (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3279 * (RWorkspace: 0)
3280 *
3281  CALL zungbr( 'Q', m, m, m, work( ir ), ldwrkr,
3282  $ work( itauq ), work( iwork ),
3283  $ lwork-iwork+1, ierr )
3284  irwork = ie + m
3285 *
3286 * Perform bidiagonal QR iteration, computing left
3287 * singular vectors of L in WORK(IR) and computing
3288 * right singular vectors of L in WORK(IU)
3289 * (CWorkspace: need 2*M*M)
3290 * (RWorkspace: need BDSPAC)
3291 *
3292  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3293  $ work( iu ), ldwrku, work( ir ),
3294  $ ldwrkr, cdum, 1, rwork( irwork ),
3295  $ info )
3296 *
3297 * Multiply right singular vectors of L in WORK(IU) by
3298 * Q in VT, storing result in A
3299 * (CWorkspace: need M*M)
3300 * (RWorkspace: 0)
3301 *
3302  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
3303  $ ldwrku, vt, ldvt, czero, a, lda )
3304 *
3305 * Copy right singular vectors of A from A to VT
3306 *
3307  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3308 *
3309 * Copy left singular vectors of A from WORK(IR) to A
3310 *
3311  CALL zlacpy( 'F', m, m, work( ir ), ldwrkr, a,
3312  $ lda )
3313 *
3314  ELSE
3315 *
3316 * Insufficient workspace for a fast algorithm
3317 *
3318  itau = 1
3319  iwork = itau + m
3320 *
3321 * Compute A=L*Q, copying result to VT
3322 * (CWorkspace: need 2*M, prefer M+M*NB)
3323 * (RWorkspace: 0)
3324 *
3325  CALL zgelqf( m, n, a, lda, work( itau ),
3326  $ work( iwork ), lwork-iwork+1, ierr )
3327  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3328 *
3329 * Generate Q in VT
3330 * (CWorkspace: need M+N, prefer M+N*NB)
3331 * (RWorkspace: 0)
3332 *
3333  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3334  $ work( iwork ), lwork-iwork+1, ierr )
3335  ie = 1
3336  itauq = itau
3337  itaup = itauq + m
3338  iwork = itaup + m
3339 *
3340 * Zero out above L in A
3341 *
3342  CALL zlaset( 'U', m-1, m-1, czero, czero,
3343  $ a( 1, 2 ), lda )
3344 *
3345 * Bidiagonalize L in A
3346 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3347 * (RWorkspace: need M)
3348 *
3349  CALL zgebrd( m, m, a, lda, s, rwork( ie ),
3350  $ work( itauq ), work( itaup ),
3351  $ work( iwork ), lwork-iwork+1, ierr )
3352 *
3353 * Multiply right bidiagonalizing vectors in A by Q
3354 * in VT
3355 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3356 * (RWorkspace: 0)
3357 *
3358  CALL zunmbr( 'P', 'L', 'C', m, n, m, a, lda,
3359  $ work( itaup ), vt, ldvt,
3360  $ work( iwork ), lwork-iwork+1, ierr )
3361 *
3362 * Generate left bidiagonalizing vectors in A
3363 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3364 * (RWorkspace: 0)
3365 *
3366  CALL zungbr( 'Q', m, m, m, a, lda, work( itauq ),
3367  $ work( iwork ), lwork-iwork+1, ierr )
3368  irwork = ie + m
3369 *
3370 * Perform bidiagonal QR iteration, computing left
3371 * singular vectors of A in A and computing right
3372 * singular vectors of A in VT
3373 * (CWorkspace: 0)
3374 * (RWorkspace: need BDSPAC)
3375 *
3376  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3377  $ ldvt, a, lda, cdum, 1,
3378  $ rwork( irwork ), info )
3379 *
3380  END IF
3381 *
3382  ELSE IF( wntuas ) THEN
3383 *
3384 * Path 9t(N much larger than M, JOBU='S' or 'A',
3385 * JOBVT='A')
3386 * N right singular vectors to be computed in VT and
3387 * M left singular vectors to be computed in U
3388 *
3389  IF( lwork.GE.m*m+max( n+m, 3*m ) ) THEN
3390 *
3391 * Sufficient workspace for a fast algorithm
3392 *
3393  iu = 1
3394  IF( lwork.GE.wrkbl+lda*m ) THEN
3395 *
3396 * WORK(IU) is LDA by M
3397 *
3398  ldwrku = lda
3399  ELSE
3400 *
3401 * WORK(IU) is M by M
3402 *
3403  ldwrku = m
3404  END IF
3405  itau = iu + ldwrku*m
3406  iwork = itau + m
3407 *
3408 * Compute A=L*Q, copying result to VT
3409 * (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3410 * (RWorkspace: 0)
3411 *
3412  CALL zgelqf( m, n, a, lda, work( itau ),
3413  $ work( iwork ), lwork-iwork+1, ierr )
3414  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3415 *
3416 * Generate Q in VT
3417 * (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3418 * (RWorkspace: 0)
3419 *
3420  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3421  $ work( iwork ), lwork-iwork+1, ierr )
3422 *
3423 * Copy L to WORK(IU), zeroing out above it
3424 *
3425  CALL zlacpy( 'L', m, m, a, lda, work( iu ),
3426  $ ldwrku )
3427  CALL zlaset( 'U', m-1, m-1, czero, czero,
3428  $ work( iu+ldwrku ), ldwrku )
3429  ie = 1
3430  itauq = itau
3431  itaup = itauq + m
3432  iwork = itaup + m
3433 *
3434 * Bidiagonalize L in WORK(IU), copying result to U
3435 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3436 * (RWorkspace: need M)
3437 *
3438  CALL zgebrd( m, m, work( iu ), ldwrku, s,
3439  $ rwork( ie ), work( itauq ),
3440  $ work( itaup ), work( iwork ),
3441  $ lwork-iwork+1, ierr )
3442  CALL zlacpy( 'L', m, m, work( iu ), ldwrku, u,
3443  $ ldu )
3444 *
3445 * Generate right bidiagonalizing vectors in WORK(IU)
3446 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3447 * (RWorkspace: 0)
3448 *
3449  CALL zungbr( 'P', m, m, m, work( iu ), ldwrku,
3450  $ work( itaup ), work( iwork ),
3451  $ lwork-iwork+1, ierr )
3452 *
3453 * Generate left bidiagonalizing vectors in U
3454 * (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3455 * (RWorkspace: 0)
3456 *
3457  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3458  $ work( iwork ), lwork-iwork+1, ierr )
3459  irwork = ie + m
3460 *
3461 * Perform bidiagonal QR iteration, computing left
3462 * singular vectors of L in U and computing right
3463 * singular vectors of L in WORK(IU)
3464 * (CWorkspace: need M*M)
3465 * (RWorkspace: need BDSPAC)
3466 *
3467  CALL zbdsqr( 'U', m, m, m, 0, s, rwork( ie ),
3468  $ work( iu ), ldwrku, u, ldu, cdum, 1,
3469  $ rwork( irwork ), info )
3470 *
3471 * Multiply right singular vectors of L in WORK(IU) by
3472 * Q in VT, storing result in A
3473 * (CWorkspace: need M*M)
3474 * (RWorkspace: 0)
3475 *
3476  CALL zgemm( 'N', 'N', m, n, m, cone, work( iu ),
3477  $ ldwrku, vt, ldvt, czero, a, lda )
3478 *
3479 * Copy right singular vectors of A from A to VT
3480 *
3481  CALL zlacpy( 'F', m, n, a, lda, vt, ldvt )
3482 *
3483  ELSE
3484 *
3485 * Insufficient workspace for a fast algorithm
3486 *
3487  itau = 1
3488  iwork = itau + m
3489 *
3490 * Compute A=L*Q, copying result to VT
3491 * (CWorkspace: need 2*M, prefer M+M*NB)
3492 * (RWorkspace: 0)
3493 *
3494  CALL zgelqf( m, n, a, lda, work( itau ),
3495  $ work( iwork ), lwork-iwork+1, ierr )
3496  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3497 *
3498 * Generate Q in VT
3499 * (CWorkspace: need M+N, prefer M+N*NB)
3500 * (RWorkspace: 0)
3501 *
3502  CALL zunglq( n, n, m, vt, ldvt, work( itau ),
3503  $ work( iwork ), lwork-iwork+1, ierr )
3504 *
3505 * Copy L to U, zeroing out above it
3506 *
3507  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3508  CALL zlaset( 'U', m-1, m-1, czero, czero,
3509  $ u( 1, 2 ), ldu )
3510  ie = 1
3511  itauq = itau
3512  itaup = itauq + m
3513  iwork = itaup + m
3514 *
3515 * Bidiagonalize L in U
3516 * (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3517 * (RWorkspace: need M)
3518 *
3519  CALL zgebrd( m, m, u, ldu, s, rwork( ie ),
3520  $ work( itauq ), work( itaup ),
3521  $ work( iwork ), lwork-iwork+1, ierr )
3522 *
3523 * Multiply right bidiagonalizing vectors in U by Q
3524 * in VT
3525 * (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3526 * (RWorkspace: 0)
3527 *
3528  CALL zunmbr( 'P', 'L', 'C', m, n, m, u, ldu,
3529  $ work( itaup ), vt, ldvt,
3530  $ work( iwork ), lwork-iwork+1, ierr )
3531 *
3532 * Generate left bidiagonalizing vectors in U
3533 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3534 * (RWorkspace: 0)
3535 *
3536  CALL zungbr( 'Q', m, m, m, u, ldu, work( itauq ),
3537  $ work( iwork ), lwork-iwork+1, ierr )
3538  irwork = ie + m
3539 *
3540 * Perform bidiagonal QR iteration, computing left
3541 * singular vectors of A in U and computing right
3542 * singular vectors of A in VT
3543 * (CWorkspace: 0)
3544 * (RWorkspace: need BDSPAC)
3545 *
3546  CALL zbdsqr( 'U', m, n, m, 0, s, rwork( ie ), vt,
3547  $ ldvt, u, ldu, cdum, 1,
3548  $ rwork( irwork ), info )
3549 *
3550  END IF
3551 *
3552  END IF
3553 *
3554  END IF
3555 *
3556  ELSE
3557 *
3558 * N .LT. MNTHR
3559 *
3560 * Path 10t(N greater than M, but not much larger)
3561 * Reduce to bidiagonal form without LQ decomposition
3562 *
3563  ie = 1
3564  itauq = 1
3565  itaup = itauq + m
3566  iwork = itaup + m
3567 *
3568 * Bidiagonalize A
3569 * (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3570 * (RWorkspace: M)
3571 *
3572  CALL zgebrd( m, n, a, lda, s, rwork( ie ), work( itauq ),
3573  $ work( itaup ), work( iwork ), lwork-iwork+1,
3574  $ ierr )
3575  IF( wntuas ) THEN
3576 *
3577 * If left singular vectors desired in U, copy result to U
3578 * and generate left bidiagonalizing vectors in U
3579 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3580 * (RWorkspace: 0)
3581 *
3582  CALL zlacpy( 'L', m, m, a, lda, u, ldu )
3583  CALL zungbr( 'Q', m, m, n, u, ldu, work( itauq ),
3584  $ work( iwork ), lwork-iwork+1, ierr )
3585  END IF
3586  IF( wntvas ) THEN
3587 *
3588 * If right singular vectors desired in VT, copy result to
3589 * VT and generate right bidiagonalizing vectors in VT
3590 * (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3591 * (RWorkspace: 0)
3592 *
3593  CALL zlacpy( 'U', m, n, a, lda, vt, ldvt )
3594  IF( wntva )
3595  $ nrvt = n
3596  IF( wntvs )
3597  $ nrvt = m
3598  CALL zungbr( 'P', nrvt, n, m, vt, ldvt, work( itaup ),
3599  $ work( iwork ), lwork-iwork+1, ierr )
3600  END IF
3601  IF( wntuo ) THEN
3602 *
3603 * If left singular vectors desired in A, generate left
3604 * bidiagonalizing vectors in A
3605 * (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3606 * (RWorkspace: 0)
3607 *
3608  CALL zungbr( 'Q', m, m, n, a, lda, work( itauq ),
3609  $ work( iwork ), lwork-iwork+1, ierr )
3610  END IF
3611  IF( wntvo ) THEN
3612 *
3613 * If right singular vectors desired in A, generate right
3614 * bidiagonalizing vectors in A
3615 * (CWorkspace: need 3*M, prefer 2*M+M*NB)
3616 * (RWorkspace: 0)
3617 *
3618  CALL zungbr( 'P', m, n, m, a, lda, work( itaup ),
3619  $ work( iwork ), lwork-iwork+1, ierr )
3620  END IF
3621  irwork = ie + m
3622  IF( wntuas .OR. wntuo )
3623  $ nru = m
3624  IF( wntun )
3625  $ nru = 0
3626  IF( wntvas .OR. wntvo )
3627  $ ncvt = n
3628  IF( wntvn )
3629  $ ncvt = 0
3630  IF( ( .NOT.wntuo ) .AND. ( .NOT.wntvo ) ) THEN
3631 *
3632 * Perform bidiagonal QR iteration, if desired, computing
3633 * left singular vectors in U and computing right singular
3634 * vectors in VT
3635 * (CWorkspace: 0)
3636 * (RWorkspace: need BDSPAC)
3637 *
3638  CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3639  $ ldvt, u, ldu, cdum, 1, rwork( irwork ),
3640  $ info )
3641  ELSE IF( ( .NOT.wntuo ) .AND. wntvo ) THEN
3642 *
3643 * Perform bidiagonal QR iteration, if desired, computing
3644 * left singular vectors in U and computing right singular
3645 * vectors in A
3646 * (CWorkspace: 0)
3647 * (RWorkspace: need BDSPAC)
3648 *
3649  CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), a,
3650  $ lda, u, ldu, cdum, 1, rwork( irwork ),
3651  $ info )
3652  ELSE
3653 *
3654 * Perform bidiagonal QR iteration, if desired, computing
3655 * left singular vectors in A and computing right singular
3656 * vectors in VT
3657 * (CWorkspace: 0)
3658 * (RWorkspace: need BDSPAC)
3659 *
3660  CALL zbdsqr( 'L', m, ncvt, nru, 0, s, rwork( ie ), vt,
3661  $ ldvt, a, lda, cdum, 1, rwork( irwork ),
3662  $ info )
3663  END IF
3664 *
3665  END IF
3666 *
3667  END IF
3668 *
3669 * Undo scaling if necessary
3670 *
3671  IF( iscl.EQ.1 ) THEN
3672  IF( anrm.GT.bignum )
3673  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn, 1, s, minmn,
3674  $ ierr )
3675  IF( info.NE.0 .AND. anrm.GT.bignum )
3676  $ CALL dlascl( 'G', 0, 0, bignum, anrm, minmn-1, 1,
3677  $ rwork( ie ), minmn, ierr )
3678  IF( anrm.LT.smlnum )
3679  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn, 1, s, minmn,
3680  $ ierr )
3681  IF( info.NE.0 .AND. anrm.LT.smlnum )
3682  $ CALL dlascl( 'G', 0, 0, smlnum, anrm, minmn-1, 1,
3683  $ rwork( ie ), minmn, ierr )
3684  END IF
3685 *
3686 * Return optimal workspace in WORK(1)
3687 *
3688  work( 1 ) = maxwrk
3689 *
3690  return
3691 *
3692 * End of ZGESVD
3693 *
3694  END