LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches
zgesvj.f
Go to the documentation of this file.
1*> \brief <b> ZGESVJ </b>
2*
3* =========== DOCUMENTATION ===========
4*
5* Online html documentation available at
6* http://www.netlib.org/lapack/explore-html/
7*
8*> Download ZGESVJ + dependencies
9*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgesvj.f">
10*> [TGZ]</a>
11*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgesvj.f">
12*> [ZIP]</a>
13*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgesvj.f">
14*> [TXT]</a>
15*
16* Definition:
17* ===========
18*
19* SUBROUTINE ZGESVJ( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V,
20* LDV, CWORK, LWORK, RWORK, LRWORK, INFO )
21*
22* .. Scalar Arguments ..
23* INTEGER INFO, LDA, LDV, LWORK, LRWORK, M, MV, N
24* CHARACTER*1 JOBA, JOBU, JOBV
25* ..
26* .. Array Arguments ..
27* COMPLEX*16 A( LDA, * ), V( LDV, * ), CWORK( LWORK )
28* DOUBLE PRECISION RWORK( LRWORK ), SVA( N )
29* ..
30*
31*
32*> \par Purpose:
33* =============
34*>
35*> \verbatim
36*>
37*> ZGESVJ computes the singular value decomposition (SVD) of a complex
38*> M-by-N matrix A, where M >= N. The SVD of A is written as
39*> [++] [xx] [x0] [xx]
40*> A = U * SIGMA * V^*, [++] = [xx] * [ox] * [xx]
41*> [++] [xx]
42*> where SIGMA is an N-by-N diagonal matrix, U is an M-by-N orthonormal
43*> matrix, and V is an N-by-N unitary matrix. The diagonal elements
44*> of SIGMA are the singular values of A. The columns of U and V are the
45*> left and the right singular vectors of A, respectively.
46*> \endverbatim
47*
48* Arguments:
49* ==========
50*
51*> \param[in] JOBA
52*> \verbatim
53*> JOBA is CHARACTER*1
54*> Specifies the structure of A.
55*> = 'L': The input matrix A is lower triangular;
56*> = 'U': The input matrix A is upper triangular;
57*> = 'G': The input matrix A is general M-by-N matrix, M >= N.
58*> \endverbatim
59*>
60*> \param[in] JOBU
61*> \verbatim
62*> JOBU is CHARACTER*1
63*> Specifies whether to compute the left singular vectors
64*> (columns of U):
65*> = 'U' or 'F': The left singular vectors corresponding to the nonzero
66*> singular values are computed and returned in the leading
67*> columns of A. See more details in the description of A.
68*> The default numerical orthogonality threshold is set to
69*> approximately TOL=CTOL*EPS, CTOL=SQRT(M), EPS=DLAMCH('E').
70*> = 'C': Analogous to JOBU='U', except that user can control the
71*> level of numerical orthogonality of the computed left
72*> singular vectors. TOL can be set to TOL = CTOL*EPS, where
73*> CTOL is given on input in the array WORK.
74*> No CTOL smaller than ONE is allowed. CTOL greater
75*> than 1 / EPS is meaningless. The option 'C'
76*> can be used if M*EPS is satisfactory orthogonality
77*> of the computed left singular vectors, so CTOL=M could
78*> save few sweeps of Jacobi rotations.
79*> See the descriptions of A and WORK(1).
80*> = 'N': The matrix U is not computed. However, see the
81*> description of A.
82*> \endverbatim
83*>
84*> \param[in] JOBV
85*> \verbatim
86*> JOBV is CHARACTER*1
87*> Specifies whether to compute the right singular vectors, that
88*> is, the matrix V:
89*> = 'V' or 'J': the matrix V is computed and returned in the array V
90*> = 'A': the Jacobi rotations are applied to the MV-by-N
91*> array V. In other words, the right singular vector
92*> matrix V is not computed explicitly; instead it is
93*> applied to an MV-by-N matrix initially stored in the
94*> first MV rows of V.
95*> = 'N': the matrix V is not computed and the array V is not
96*> referenced
97*> \endverbatim
98*>
99*> \param[in] M
100*> \verbatim
101*> M is INTEGER
102*> The number of rows of the input matrix A. 1/DLAMCH('E') > M >= 0.
103*> \endverbatim
104*>
105*> \param[in] N
106*> \verbatim
107*> N is INTEGER
108*> The number of columns of the input matrix A.
109*> M >= N >= 0.
110*> \endverbatim
111*>
112*> \param[in,out] A
113*> \verbatim
114*> A is COMPLEX*16 array, dimension (LDA,N)
115*> On entry, the M-by-N matrix A.
116*> On exit,
117*> If JOBU = 'U' .OR. JOBU = 'C':
118*> If INFO = 0 :
119*> RANKA orthonormal columns of U are returned in the
120*> leading RANKA columns of the array A. Here RANKA <= N
121*> is the number of computed singular values of A that are
122*> above the underflow threshold DLAMCH('S'). The singular
123*> vectors corresponding to underflowed or zero singular
124*> values are not computed. The value of RANKA is returned
125*> in the array RWORK as RANKA=NINT(RWORK(2)). Also see the
126*> descriptions of SVA and RWORK. The computed columns of U
127*> are mutually numerically orthogonal up to approximately
128*> TOL=SQRT(M)*EPS (default); or TOL=CTOL*EPS (JOBU = 'C'),
129*> see the description of JOBU.
130*> If INFO > 0,
131*> the procedure ZGESVJ did not converge in the given number
132*> of iterations (sweeps). In that case, the computed
133*> columns of U may not be orthogonal up to TOL. The output
134*> U (stored in A), SIGMA (given by the computed singular
135*> values in SVA(1:N)) and V is still a decomposition of the
136*> input matrix A in the sense that the residual
137*> || A - SCALE * U * SIGMA * V^* ||_2 / ||A||_2 is small.
138*> If JOBU = 'N':
139*> If INFO = 0 :
140*> Note that the left singular vectors are 'for free' in the
141*> one-sided Jacobi SVD algorithm. However, if only the
142*> singular values are needed, the level of numerical
143*> orthogonality of U is not an issue and iterations are
144*> stopped when the columns of the iterated matrix are
145*> numerically orthogonal up to approximately M*EPS. Thus,
146*> on exit, A contains the columns of U scaled with the
147*> corresponding singular values.
148*> If INFO > 0:
149*> the procedure ZGESVJ did not converge in the given number
150*> of iterations (sweeps).
151*> \endverbatim
152*>
153*> \param[in] LDA
154*> \verbatim
155*> LDA is INTEGER
156*> The leading dimension of the array A. LDA >= max(1,M).
157*> \endverbatim
158*>
159*> \param[out] SVA
160*> \verbatim
161*> SVA is DOUBLE PRECISION array, dimension (N)
162*> On exit,
163*> If INFO = 0 :
164*> depending on the value SCALE = RWORK(1), we have:
165*> If SCALE = ONE:
166*> SVA(1:N) contains the computed singular values of A.
167*> During the computation SVA contains the Euclidean column
168*> norms of the iterated matrices in the array A.
169*> If SCALE .NE. ONE:
170*> The singular values of A are SCALE*SVA(1:N), and this
171*> factored representation is due to the fact that some of the
172*> singular values of A might underflow or overflow.
173*>
174*> If INFO > 0:
175*> the procedure ZGESVJ did not converge in the given number of
176*> iterations (sweeps) and SCALE*SVA(1:N) may not be accurate.
177*> \endverbatim
178*>
179*> \param[in] MV
180*> \verbatim
181*> MV is INTEGER
182*> If JOBV = 'A', then the product of Jacobi rotations in ZGESVJ
183*> is applied to the first MV rows of V. See the description of JOBV.
184*> \endverbatim
185*>
186*> \param[in,out] V
187*> \verbatim
188*> V is COMPLEX*16 array, dimension (LDV,N)
189*> If JOBV = 'V', then V contains on exit the N-by-N matrix of
190*> the right singular vectors;
191*> If JOBV = 'A', then V contains the product of the computed right
192*> singular vector matrix and the initial matrix in
193*> the array V.
194*> If JOBV = 'N', then V is not referenced.
195*> \endverbatim
196*>
197*> \param[in] LDV
198*> \verbatim
199*> LDV is INTEGER
200*> The leading dimension of the array V, LDV >= 1.
201*> If JOBV = 'V', then LDV >= MAX(1,N).
202*> If JOBV = 'A', then LDV >= MAX(1,MV) .
203*> \endverbatim
204*>
205*> \param[in,out] CWORK
206*> \verbatim
207*> CWORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
208*> Used as workspace.
209*> \endverbatim
210*>
211*> \param[in] LWORK
212*> \verbatim
213*> LWORK is INTEGER.
214*> Length of CWORK.
215*> LWORK >= 1, if MIN(M,N) = 0, and LWORK >= M+N, otherwise.
216*>
217*> If on entry LWORK = -1, then a workspace query is assumed and
218*> no computation is done; CWORK(1) is set to the minial (and optimal)
219*> length of CWORK.
220*> \endverbatim
221*>
222*> \param[in,out] RWORK
223*> \verbatim
224*> RWORK is DOUBLE PRECISION array, dimension (max(6,LRWORK))
225*> On entry,
226*> If JOBU = 'C' :
227*> RWORK(1) = CTOL, where CTOL defines the threshold for convergence.
228*> The process stops if all columns of A are mutually
229*> orthogonal up to CTOL*EPS, EPS=DLAMCH('E').
230*> It is required that CTOL >= ONE, i.e. it is not
231*> allowed to force the routine to obtain orthogonality
232*> below EPSILON.
233*> On exit,
234*> RWORK(1) = SCALE is the scaling factor such that SCALE*SVA(1:N)
235*> are the computed singular values of A.
236*> (See description of SVA().)
237*> RWORK(2) = NINT(RWORK(2)) is the number of the computed nonzero
238*> singular values.
239*> RWORK(3) = NINT(RWORK(3)) is the number of the computed singular
240*> values that are larger than the underflow threshold.
241*> RWORK(4) = NINT(RWORK(4)) is the number of sweeps of Jacobi
242*> rotations needed for numerical convergence.
243*> RWORK(5) = max_{i.NE.j} |COS(A(:,i),A(:,j))| in the last sweep.
244*> This is useful information in cases when ZGESVJ did
245*> not converge, as it can be used to estimate whether
246*> the output is still useful and for post festum analysis.
247*> RWORK(6) = the largest absolute value over all sines of the
248*> Jacobi rotation angles in the last sweep. It can be
249*> useful for a post festum analysis.
250*> \endverbatim
251*>
252*> \param[in] LRWORK
253*> \verbatim
254*> LRWORK is INTEGER
255*> Length of RWORK.
256*> LRWORK >= 1, if MIN(M,N) = 0, and LRWORK >= MAX(6,N), otherwise.
257*>
258*> If on entry LRWORK = -1, then a workspace query is assumed and
259*> no computation is done; RWORK(1) is set to the minial (and optimal)
260*> length of RWORK.
261*> \endverbatim
262*>
263*> \param[out] INFO
264*> \verbatim
265*> INFO is INTEGER
266*> = 0: successful exit.
267*> < 0: if INFO = -i, then the i-th argument had an illegal value
268*> > 0: ZGESVJ did not converge in the maximal allowed number
269*> (NSWEEP=30) of sweeps. The output may still be useful.
270*> See the description of RWORK.
271*> \endverbatim
272*>
273* Authors:
274* ========
275*
276*> \author Univ. of Tennessee
277*> \author Univ. of California Berkeley
278*> \author Univ. of Colorado Denver
279*> \author NAG Ltd.
280*
281*> \ingroup gesvj
282*
283*> \par Further Details:
284* =====================
285*>
286*> \verbatim
287*>
288*> The orthogonal N-by-N matrix V is obtained as a product of Jacobi plane
289*> rotations. In the case of underflow of the tangent of the Jacobi angle, a
290*> modified Jacobi transformation of Drmac [3] is used. Pivot strategy uses
291*> column interchanges of de Rijk [1]. The relative accuracy of the computed
292*> singular values and the accuracy of the computed singular vectors (in
293*> angle metric) is as guaranteed by the theory of Demmel and Veselic [2].
294*> The condition number that determines the accuracy in the full rank case
295*> is essentially min_{D=diag} kappa(A*D), where kappa(.) is the
296*> spectral condition number. The best performance of this Jacobi SVD
297*> procedure is achieved if used in an accelerated version of Drmac and
298*> Veselic [4,5], and it is the kernel routine in the SIGMA library [6].
299*> Some tuning parameters (marked with [TP]) are available for the
300*> implementer.
301*> The computational range for the nonzero singular values is the machine
302*> number interval ( UNDERFLOW , OVERFLOW ). In extreme cases, even
303*> denormalized singular values can be computed with the corresponding
304*> gradual loss of accurate digits.
305*> \endverbatim
306*
307*> \par Contributor:
308* ==================
309*>
310*> \verbatim
311*>
312*> ============
313*>
314*> Zlatko Drmac (Zagreb, Croatia)
315*>
316*> \endverbatim
317*
318*> \par References:
319* ================
320*>
321*> \verbatim
322*>
323*> [1] P. P. M. De Rijk: A one-sided Jacobi algorithm for computing the
324*> singular value decomposition on a vector computer.
325*> SIAM J. Sci. Stat. Comp., Vol. 10 (1998), pp. 359-371.
326*> [2] J. Demmel and K. Veselic: Jacobi method is more accurate than QR.
327*> [3] Z. Drmac: Implementation of Jacobi rotations for accurate singular
328*> value computation in floating point arithmetic.
329*> SIAM J. Sci. Comp., Vol. 18 (1997), pp. 1200-1222.
330*> [4] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
331*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
332*> LAPACK Working note 169.
333*> [5] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
334*> SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
335*> LAPACK Working note 170.
336*> [6] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
337*> QSVD, (H,K)-SVD computations.
338*> Department of Mathematics, University of Zagreb, 2008, 2015.
339*> \endverbatim
340*
341*> \par Bugs, examples and comments:
342* =================================
343*>
344*> \verbatim
345*> ===========================
346*> Please report all bugs and send interesting test examples and comments to
347*> drmac@math.hr. Thank you.
348*> \endverbatim
349*>
350* =====================================================================
351 SUBROUTINE zgesvj( JOBA, JOBU, JOBV, M, N, A, LDA, SVA, MV, V,
352 $ LDV, CWORK, LWORK, RWORK, LRWORK, INFO )
353*
354* -- LAPACK computational routine --
355* -- LAPACK is a software package provided by Univ. of Tennessee, --
356* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
357*
358 IMPLICIT NONE
359* .. Scalar Arguments ..
360 INTEGER INFO, LDA, LDV, LWORK, LRWORK, M, MV, N
361 CHARACTER*1 JOBA, JOBU, JOBV
362* ..
363* .. Array Arguments ..
364 COMPLEX*16 A( LDA, * ), V( LDV, * ), CWORK( LWORK )
365 DOUBLE PRECISION RWORK( LRWORK ), SVA( N )
366* ..
367*
368* =====================================================================
369*
370* .. Local Parameters ..
371 DOUBLE PRECISION ZERO, HALF, ONE
372 parameter( zero = 0.0d0, half = 0.5d0, one = 1.0d0)
373 COMPLEX*16 CZERO, CONE
374 parameter( czero = (0.0d0, 0.0d0), cone = (1.0d0, 0.0d0) )
375 INTEGER NSWEEP
376 parameter( nsweep = 30 )
377* ..
378* .. Local Scalars ..
379 COMPLEX*16 AAPQ, OMPQ
380 DOUBLE PRECISION AAPP, AAPP0, AAPQ1, AAQQ, APOAQ, AQOAP, BIG,
381 $ bigtheta, cs, ctol, epsln, mxaapq,
382 $ mxsinj, rootbig, rooteps, rootsfmin, roottol,
383 $ skl, sfmin, small, sn, t, temp1, theta, thsign,
384 $ tol
385 INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1,
386 $ iswrot, jbc, jgl, kbl, lkahead, mvl, n2, n34,
387 $ n4, nbl, notrot, p, pskipped, q, rowskip,
388 $ swband, minmn, lwmin, lrwmin
389 LOGICAL APPLV, GOSCALE, LOWER, LQUERY, LSVEC, NOSCALE,
390 $ rotok, rsvec, uctol, upper
391* ..
392* ..
393* .. Intrinsic Functions ..
394 INTRINSIC abs, max, min, conjg, dble, sign, sqrt
395* ..
396* .. External Functions ..
397* ..
398* from BLAS
399 DOUBLE PRECISION DZNRM2
400 COMPLEX*16 ZDOTC
401 EXTERNAL zdotc, dznrm2
402 INTEGER IDAMAX
403 EXTERNAL idamax
404* from LAPACK
405 DOUBLE PRECISION DLAMCH
406 EXTERNAL dlamch
407 LOGICAL LSAME
408 EXTERNAL lsame
409* ..
410* .. External Subroutines ..
411* ..
412* from BLAS
413 EXTERNAL zcopy, zrot, zdscal, zswap, zaxpy
414* from LAPACK
415 EXTERNAL dlascl, zlascl, zlaset, zlassq,
416 $ xerbla
417 EXTERNAL zgsvj0, zgsvj1
418* ..
419* .. Executable Statements ..
420*
421* Test the input arguments
422*
423 lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
424 uctol = lsame( jobu, 'C' )
425 rsvec = lsame( jobv, 'V' ) .OR. lsame( jobv, 'J' )
426 applv = lsame( jobv, 'A' )
427 upper = lsame( joba, 'U' )
428 lower = lsame( joba, 'L' )
429*
430 minmn = min( m, n )
431 IF( minmn.EQ.0 ) THEN
432 lwmin = 1
433 lrwmin = 1
434 ELSE
435 lwmin = m+n
436 lrwmin = max( 6, n )
437 END IF
438*
439 lquery = ( lwork.EQ.-1 ) .OR. ( lrwork.EQ.-1 )
440 IF( .NOT.( upper .OR. lower .OR. lsame( joba, 'G' ) ) ) THEN
441 info = -1
442 ELSE IF( .NOT.( lsvec .OR.
443 $ uctol .OR.
444 $ lsame( jobu, 'N' ) ) ) THEN
445 info = -2
446 ELSE IF( .NOT.( rsvec .OR.
447 $ applv .OR.
448 $ lsame( jobv, 'N' ) ) ) THEN
449 info = -3
450 ELSE IF( m.LT.0 ) THEN
451 info = -4
452 ELSE IF( ( n.LT.0 ) .OR. ( n.GT.m ) ) THEN
453 info = -5
454 ELSE IF( lda.LT.m ) THEN
455 info = -7
456 ELSE IF( mv.LT.0 ) THEN
457 info = -9
458 ELSE IF( ( rsvec .AND. ( ldv.LT.n ) ) .OR.
459 $ ( applv .AND. ( ldv.LT.mv ) ) ) THEN
460 info = -11
461 ELSE IF( uctol .AND. ( rwork( 1 ).LE.one ) ) THEN
462 info = -12
463 ELSE IF( lwork.LT.lwmin .AND. ( .NOT.lquery ) ) THEN
464 info = -13
465 ELSE IF( lrwork.LT.lrwmin .AND. ( .NOT.lquery ) ) THEN
466 info = -15
467 ELSE
468 info = 0
469 END IF
470*
471* #:(
472 IF( info.NE.0 ) THEN
473 CALL xerbla( 'ZGESVJ', -info )
474 RETURN
475 ELSE IF( lquery ) THEN
476 cwork( 1 ) = lwmin
477 rwork( 1 ) = real( lrwmin )
478 RETURN
479 END IF
480*
481* #:) Quick return for void matrix
482*
483 IF( minmn.EQ.0 ) RETURN
484*
485* Set numerical parameters
486* The stopping criterion for Jacobi rotations is
487*
488* max_{i<>j}|A(:,i)^* * A(:,j)| / (||A(:,i)||*||A(:,j)||) < CTOL*EPS
489*
490* where EPS is the round-off and CTOL is defined as follows:
491*
492 IF( uctol ) THEN
493* ... user controlled
494 ctol = rwork( 1 )
495 ELSE
496* ... default
497 IF( lsvec .OR. rsvec .OR. applv ) THEN
498 ctol = sqrt( dble( m ) )
499 ELSE
500 ctol = dble( m )
501 END IF
502 END IF
503* ... and the machine dependent parameters are
504*[!] (Make sure that SLAMCH() works properly on the target machine.)
505*
506 epsln = dlamch( 'Epsilon' )
507 rooteps = sqrt( epsln )
508 sfmin = dlamch( 'SafeMinimum' )
509 rootsfmin = sqrt( sfmin )
510 small = sfmin / epsln
511 big = dlamch( 'Overflow' )
512* BIG = ONE / SFMIN
513 rootbig = one / rootsfmin
514* LARGE = BIG / SQRT( DBLE( M*N ) )
515 bigtheta = one / rooteps
516*
517 tol = ctol*epsln
518 roottol = sqrt( tol )
519*
520 IF( dble( m )*epsln.GE.one ) THEN
521 info = -4
522 CALL xerbla( 'ZGESVJ', -info )
523 RETURN
524 END IF
525*
526* Initialize the right singular vector matrix.
527*
528 IF( rsvec ) THEN
529 mvl = n
530 CALL zlaset( 'A', mvl, n, czero, cone, v, ldv )
531 ELSE IF( applv ) THEN
532 mvl = mv
533 END IF
534 rsvec = rsvec .OR. applv
535*
536* Initialize SVA( 1:N ) = ( ||A e_i||_2, i = 1:N )
537*(!) If necessary, scale A to protect the largest singular value
538* from overflow. It is possible that saving the largest singular
539* value destroys the information about the small ones.
540* This initial scaling is almost minimal in the sense that the
541* goal is to make sure that no column norm overflows, and that
542* SQRT(N)*max_i SVA(i) does not overflow. If INFinite entries
543* in A are detected, the procedure returns with INFO=-6.
544*
545 skl = one / sqrt( dble( m )*dble( n ) )
546 noscale = .true.
547 goscale = .true.
548*
549 IF( lower ) THEN
550* the input matrix is M-by-N lower triangular (trapezoidal)
551 DO 1874 p = 1, n
552 aapp = zero
553 aaqq = one
554 CALL zlassq( m-p+1, a( p, p ), 1, aapp, aaqq )
555 IF( aapp.GT.big ) THEN
556 info = -6
557 CALL xerbla( 'ZGESVJ', -info )
558 RETURN
559 END IF
560 aaqq = sqrt( aaqq )
561 IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
562 sva( p ) = aapp*aaqq
563 ELSE
564 noscale = .false.
565 sva( p ) = aapp*( aaqq*skl )
566 IF( goscale ) THEN
567 goscale = .false.
568 DO 1873 q = 1, p - 1
569 sva( q ) = sva( q )*skl
570 1873 CONTINUE
571 END IF
572 END IF
573 1874 CONTINUE
574 ELSE IF( upper ) THEN
575* the input matrix is M-by-N upper triangular (trapezoidal)
576 DO 2874 p = 1, n
577 aapp = zero
578 aaqq = one
579 CALL zlassq( p, a( 1, p ), 1, aapp, aaqq )
580 IF( aapp.GT.big ) THEN
581 info = -6
582 CALL xerbla( 'ZGESVJ', -info )
583 RETURN
584 END IF
585 aaqq = sqrt( aaqq )
586 IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
587 sva( p ) = aapp*aaqq
588 ELSE
589 noscale = .false.
590 sva( p ) = aapp*( aaqq*skl )
591 IF( goscale ) THEN
592 goscale = .false.
593 DO 2873 q = 1, p - 1
594 sva( q ) = sva( q )*skl
595 2873 CONTINUE
596 END IF
597 END IF
598 2874 CONTINUE
599 ELSE
600* the input matrix is M-by-N general dense
601 DO 3874 p = 1, n
602 aapp = zero
603 aaqq = one
604 CALL zlassq( m, a( 1, p ), 1, aapp, aaqq )
605 IF( aapp.GT.big ) THEN
606 info = -6
607 CALL xerbla( 'ZGESVJ', -info )
608 RETURN
609 END IF
610 aaqq = sqrt( aaqq )
611 IF( ( aapp.LT.( big / aaqq ) ) .AND. noscale ) THEN
612 sva( p ) = aapp*aaqq
613 ELSE
614 noscale = .false.
615 sva( p ) = aapp*( aaqq*skl )
616 IF( goscale ) THEN
617 goscale = .false.
618 DO 3873 q = 1, p - 1
619 sva( q ) = sva( q )*skl
620 3873 CONTINUE
621 END IF
622 END IF
623 3874 CONTINUE
624 END IF
625*
626 IF( noscale )skl = one
627*
628* Move the smaller part of the spectrum from the underflow threshold
629*(!) Start by determining the position of the nonzero entries of the
630* array SVA() relative to ( SFMIN, BIG ).
631*
632 aapp = zero
633 aaqq = big
634 DO 4781 p = 1, n
635 IF( sva( p ).NE.zero )aaqq = min( aaqq, sva( p ) )
636 aapp = max( aapp, sva( p ) )
637 4781 CONTINUE
638*
639* #:) Quick return for zero matrix
640*
641 IF( aapp.EQ.zero ) THEN
642 IF( lsvec )CALL zlaset( 'G', m, n, czero, cone, a, lda )
643 rwork( 1 ) = one
644 rwork( 2 ) = zero
645 rwork( 3 ) = zero
646 rwork( 4 ) = zero
647 rwork( 5 ) = zero
648 rwork( 6 ) = zero
649 RETURN
650 END IF
651*
652* #:) Quick return for one-column matrix
653*
654 IF( n.EQ.1 ) THEN
655 IF( lsvec )CALL zlascl( 'G', 0, 0, sva( 1 ), skl, m, 1,
656 $ a( 1, 1 ), lda, ierr )
657 rwork( 1 ) = one / skl
658 IF( sva( 1 ).GE.sfmin ) THEN
659 rwork( 2 ) = one
660 ELSE
661 rwork( 2 ) = zero
662 END IF
663 rwork( 3 ) = zero
664 rwork( 4 ) = zero
665 rwork( 5 ) = zero
666 rwork( 6 ) = zero
667 RETURN
668 END IF
669*
670* Protect small singular values from underflow, and try to
671* avoid underflows/overflows in computing Jacobi rotations.
672*
673 sn = sqrt( sfmin / epsln )
674 temp1 = sqrt( big / dble( n ) )
675 IF( ( aapp.LE.sn ) .OR. ( aaqq.GE.temp1 ) .OR.
676 $ ( ( sn.LE.aaqq ) .AND. ( aapp.LE.temp1 ) ) ) THEN
677 temp1 = min( big, temp1 / aapp )
678* AAQQ = AAQQ*TEMP1
679* AAPP = AAPP*TEMP1
680 ELSE IF( ( aaqq.LE.sn ) .AND. ( aapp.LE.temp1 ) ) THEN
681 temp1 = min( sn / aaqq, big / (aapp*sqrt( dble(n)) ) )
682* AAQQ = AAQQ*TEMP1
683* AAPP = AAPP*TEMP1
684 ELSE IF( ( aaqq.GE.sn ) .AND. ( aapp.GE.temp1 ) ) THEN
685 temp1 = max( sn / aaqq, temp1 / aapp )
686* AAQQ = AAQQ*TEMP1
687* AAPP = AAPP*TEMP1
688 ELSE IF( ( aaqq.LE.sn ) .AND. ( aapp.GE.temp1 ) ) THEN
689 temp1 = min( sn / aaqq, big / ( sqrt( dble( n ) )*aapp ) )
690* AAQQ = AAQQ*TEMP1
691* AAPP = AAPP*TEMP1
692 ELSE
693 temp1 = one
694 END IF
695*
696* Scale, if necessary
697*
698 IF( temp1.NE.one ) THEN
699 CALL dlascl( 'G', 0, 0, one, temp1, n, 1, sva, n, ierr )
700 END IF
701 skl = temp1*skl
702 IF( skl.NE.one ) THEN
703 CALL zlascl( joba, 0, 0, one, skl, m, n, a, lda, ierr )
704 skl = one / skl
705 END IF
706*
707* Row-cyclic Jacobi SVD algorithm with column pivoting
708*
709 emptsw = ( n*( n-1 ) ) / 2
710 notrot = 0
711
712 DO 1868 q = 1, n
713 cwork( q ) = cone
714 1868 CONTINUE
715*
716*
717*
718 swband = 3
719*[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective
720* if ZGESVJ is used as a computational routine in the preconditioned
721* Jacobi SVD algorithm ZGEJSV. For sweeps i=1:SWBAND the procedure
722* works on pivots inside a band-like region around the diagonal.
723* The boundaries are determined dynamically, based on the number of
724* pivots above a threshold.
725*
726 kbl = min( 8, n )
727*[TP] KBL is a tuning parameter that defines the tile size in the
728* tiling of the p-q loops of pivot pairs. In general, an optimal
729* value of KBL depends on the matrix dimensions and on the
730* parameters of the computer's memory.
731*
732 nbl = n / kbl
733 IF( ( nbl*kbl ).NE.n )nbl = nbl + 1
734*
735 blskip = kbl**2
736*[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
737*
738 rowskip = min( 5, kbl )
739*[TP] ROWSKIP is a tuning parameter.
740*
741 lkahead = 1
742*[TP] LKAHEAD is a tuning parameter.
743*
744* Quasi block transformations, using the lower (upper) triangular
745* structure of the input matrix. The quasi-block-cycling usually
746* invokes cubic convergence. Big part of this cycle is done inside
747* canonical subspaces of dimensions less than M.
748*
749 IF( ( lower .OR. upper ) .AND. ( n.GT.max( 64, 4*kbl ) ) ) THEN
750*[TP] The number of partition levels and the actual partition are
751* tuning parameters.
752 n4 = n / 4
753 n2 = n / 2
754 n34 = 3*n4
755 IF( applv ) THEN
756 q = 0
757 ELSE
758 q = 1
759 END IF
760*
761 IF( lower ) THEN
762*
763* This works very well on lower triangular matrices, in particular
764* in the framework of the preconditioned Jacobi SVD (xGEJSV).
765* The idea is simple:
766* [+ 0 0 0] Note that Jacobi transformations of [0 0]
767* [+ + 0 0] [0 0]
768* [+ + x 0] actually work on [x 0] [x 0]
769* [+ + x x] [x x]. [x x]
770*
771 CALL zgsvj0( jobv, m-n34, n-n34, a( n34+1, n34+1 ), lda,
772 $ cwork( n34+1 ), sva( n34+1 ), mvl,
773 $ v( n34*q+1, n34+1 ), ldv, epsln, sfmin, tol,
774 $ 2, cwork( n+1 ), lwork-n, ierr )
775
776 CALL zgsvj0( jobv, m-n2, n34-n2, a( n2+1, n2+1 ), lda,
777 $ cwork( n2+1 ), sva( n2+1 ), mvl,
778 $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 2,
779 $ cwork( n+1 ), lwork-n, ierr )
780
781 CALL zgsvj1( jobv, m-n2, n-n2, n4, a( n2+1, n2+1 ), lda,
782 $ cwork( n2+1 ), sva( n2+1 ), mvl,
783 $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,
784 $ cwork( n+1 ), lwork-n, ierr )
785
786 CALL zgsvj0( jobv, m-n4, n2-n4, a( n4+1, n4+1 ), lda,
787 $ cwork( n4+1 ), sva( n4+1 ), mvl,
788 $ v( n4*q+1, n4+1 ), ldv, epsln, sfmin, tol, 1,
789 $ cwork( n+1 ), lwork-n, ierr )
790*
791 CALL zgsvj0( jobv, m, n4, a, lda, cwork, sva, mvl, v,
792 $ ldv,
793 $ epsln, sfmin, tol, 1, cwork( n+1 ), lwork-n,
794 $ ierr )
795*
796 CALL zgsvj1( jobv, m, n2, n4, a, lda, cwork, sva, mvl, v,
797 $ ldv, epsln, sfmin, tol, 1, cwork( n+1 ),
798 $ lwork-n, ierr )
799*
800*
801 ELSE IF( upper ) THEN
802*
803*
804 CALL zgsvj0( jobv, n4, n4, a, lda, cwork, sva, mvl, v,
805 $ ldv,
806 $ epsln, sfmin, tol, 2, cwork( n+1 ), lwork-n,
807 $ ierr )
808*
809 CALL zgsvj0( jobv, n2, n4, a( 1, n4+1 ), lda,
810 $ cwork( n4+1 ),
811 $ sva( n4+1 ), mvl, v( n4*q+1, n4+1 ), ldv,
812 $ epsln, sfmin, tol, 1, cwork( n+1 ), lwork-n,
813 $ ierr )
814*
815 CALL zgsvj1( jobv, n2, n2, n4, a, lda, cwork, sva, mvl,
816 $ v,
817 $ ldv, epsln, sfmin, tol, 1, cwork( n+1 ),
818 $ lwork-n, ierr )
819*
820 CALL zgsvj0( jobv, n2+n4, n4, a( 1, n2+1 ), lda,
821 $ cwork( n2+1 ), sva( n2+1 ), mvl,
822 $ v( n2*q+1, n2+1 ), ldv, epsln, sfmin, tol, 1,
823 $ cwork( n+1 ), lwork-n, ierr )
824
825 END IF
826*
827 END IF
828*
829* .. Row-cyclic pivot strategy with de Rijk's pivoting ..
830*
831 DO 1993 i = 1, nsweep
832*
833* .. go go go ...
834*
835 mxaapq = zero
836 mxsinj = zero
837 iswrot = 0
838*
839 notrot = 0
840 pskipped = 0
841*
842* Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs
843* 1 <= p < q <= N. This is the first step toward a blocked implementation
844* of the rotations. New implementation, based on block transformations,
845* is under development.
846*
847 DO 2000 ibr = 1, nbl
848*
849 igl = ( ibr-1 )*kbl + 1
850*
851 DO 1002 ir1 = 0, min( lkahead, nbl-ibr )
852*
853 igl = igl + ir1*kbl
854*
855 DO 2001 p = igl, min( igl+kbl-1, n-1 )
856*
857* .. de Rijk's pivoting
858*
859 q = idamax( n-p+1, sva( p ), 1 ) + p - 1
860 IF( p.NE.q ) THEN
861 CALL zswap( m, a( 1, p ), 1, a( 1, q ), 1 )
862 IF( rsvec )CALL zswap( mvl, v( 1, p ), 1,
863 $ v( 1, q ), 1 )
864 temp1 = sva( p )
865 sva( p ) = sva( q )
866 sva( q ) = temp1
867 aapq = cwork(p)
868 cwork(p) = cwork(q)
869 cwork(q) = aapq
870 END IF
871*
872 IF( ir1.EQ.0 ) THEN
873*
874* Column norms are periodically updated by explicit
875* norm computation.
876*[!] Caveat:
877* Unfortunately, some BLAS implementations compute DZNRM2(M,A(1,p),1)
878* as SQRT(S=CDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to
879* overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to
880* underflow for ||A(:,p)||_2 < SQRT(underflow_threshold).
881* Hence, DZNRM2 cannot be trusted, not even in the case when
882* the true norm is far from the under(over)flow boundaries.
883* If properly implemented SCNRM2 is available, the IF-THEN-ELSE-END IF
884* below should be replaced with "AAPP = DZNRM2( M, A(1,p), 1 )".
885*
886 IF( ( sva( p ).LT.rootbig ) .AND.
887 $ ( sva( p ).GT.rootsfmin ) ) THEN
888 sva( p ) = dznrm2( m, a( 1, p ), 1 )
889 ELSE
890 temp1 = zero
891 aapp = one
892 CALL zlassq( m, a( 1, p ), 1, temp1, aapp )
893 sva( p ) = temp1*sqrt( aapp )
894 END IF
895 aapp = sva( p )
896 ELSE
897 aapp = sva( p )
898 END IF
899*
900 IF( aapp.GT.zero ) THEN
901*
902 pskipped = 0
903*
904 DO 2002 q = p + 1, min( igl+kbl-1, n )
905*
906 aaqq = sva( q )
907*
908 IF( aaqq.GT.zero ) THEN
909*
910 aapp0 = aapp
911 IF( aaqq.GE.one ) THEN
912 rotok = ( small*aapp ).LE.aaqq
913 IF( aapp.LT.( big / aaqq ) ) THEN
914 aapq = ( zdotc( m, a( 1, p ), 1,
915 $ a( 1, q ), 1 ) / aaqq ) / aapp
916 ELSE
917 CALL zcopy( m, a( 1, p ), 1,
918 $ cwork(n+1), 1 )
919 CALL zlascl( 'G', 0, 0, aapp, one,
920 $ m, 1, cwork(n+1), lda, ierr )
921 aapq = zdotc( m, cwork(n+1), 1,
922 $ a( 1, q ), 1 ) / aaqq
923 END IF
924 ELSE
925 rotok = aapp.LE.( aaqq / small )
926 IF( aapp.GT.( small / aaqq ) ) THEN
927 aapq = ( zdotc( m, a( 1, p ), 1,
928 $ a( 1, q ), 1 ) / aapp ) / aaqq
929 ELSE
930 CALL zcopy( m, a( 1, q ), 1,
931 $ cwork(n+1), 1 )
932 CALL zlascl( 'G', 0, 0, aaqq,
933 $ one, m, 1,
934 $ cwork(n+1), lda, ierr )
935 aapq = zdotc( m, a(1, p ), 1,
936 $ cwork(n+1), 1 ) / aapp
937 END IF
938 END IF
939*
940
941* AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q)
942 aapq1 = -abs(aapq)
943 mxaapq = max( mxaapq, -aapq1 )
944*
945* TO rotate or NOT to rotate, THAT is the question ...
946*
947 IF( abs( aapq1 ).GT.tol ) THEN
948 ompq = aapq / abs(aapq)
949*
950* .. rotate
951*[RTD] ROTATED = ROTATED + ONE
952*
953 IF( ir1.EQ.0 ) THEN
954 notrot = 0
955 pskipped = 0
956 iswrot = iswrot + 1
957 END IF
958*
959 IF( rotok ) THEN
960*
961 aqoap = aaqq / aapp
962 apoaq = aapp / aaqq
963 theta = -half*abs( aqoap-apoaq )/aapq1
964*
965 IF( abs( theta ).GT.bigtheta ) THEN
966*
967 t = half / theta
968 cs = one
969
970 CALL zrot( m, a(1,p), 1, a(1,q),
971 $ 1,
972 $ cs, conjg(ompq)*t )
973 IF ( rsvec ) THEN
974 CALL zrot( mvl, v(1,p), 1,
975 $ v(1,q), 1, cs, conjg(ompq)*t )
976 END IF
977
978 sva( q ) = aaqq*sqrt( max( zero,
979 $ one+t*apoaq*aapq1 ) )
980 aapp = aapp*sqrt( max( zero,
981 $ one-t*aqoap*aapq1 ) )
982 mxsinj = max( mxsinj, abs( t ) )
983*
984 ELSE
985*
986* .. choose correct signum for THETA and rotate
987*
988 thsign = -sign( one, aapq1 )
989 t = one / ( theta+thsign*
990 $ sqrt( one+theta*theta ) )
991 cs = sqrt( one / ( one+t*t ) )
992 sn = t*cs
993*
994 mxsinj = max( mxsinj, abs( sn ) )
995 sva( q ) = aaqq*sqrt( max( zero,
996 $ one+t*apoaq*aapq1 ) )
997 aapp = aapp*sqrt( max( zero,
998 $ one-t*aqoap*aapq1 ) )
999*
1000 CALL zrot( m, a(1,p), 1, a(1,q),
1001 $ 1,
1002 $ cs, conjg(ompq)*sn )
1003 IF ( rsvec ) THEN
1004 CALL zrot( mvl, v(1,p), 1,
1005 $ v(1,q), 1, cs, conjg(ompq)*sn )
1006 END IF
1007 END IF
1008 cwork(p) = -cwork(q) * ompq
1009*
1010 ELSE
1011* .. have to use modified Gram-Schmidt like transformation
1012 CALL zcopy( m, a( 1, p ), 1,
1013 $ cwork(n+1), 1 )
1014 CALL zlascl( 'G', 0, 0, aapp, one,
1015 $ m,
1016 $ 1, cwork(n+1), lda,
1017 $ ierr )
1018 CALL zlascl( 'G', 0, 0, aaqq, one,
1019 $ m,
1020 $ 1, a( 1, q ), lda, ierr )
1021 CALL zaxpy( m, -aapq, cwork(n+1), 1,
1022 $ a( 1, q ), 1 )
1023 CALL zlascl( 'G', 0, 0, one, aaqq,
1024 $ m,
1025 $ 1, a( 1, q ), lda, ierr )
1026 sva( q ) = aaqq*sqrt( max( zero,
1027 $ one-aapq1*aapq1 ) )
1028 mxsinj = max( mxsinj, sfmin )
1029 END IF
1030* END IF ROTOK THEN ... ELSE
1031*
1032* In the case of cancellation in updating SVA(q), SVA(p)
1033* recompute SVA(q), SVA(p).
1034*
1035 IF( ( sva( q ) / aaqq )**2.LE.rooteps )
1036 $ THEN
1037 IF( ( aaqq.LT.rootbig ) .AND.
1038 $ ( aaqq.GT.rootsfmin ) ) THEN
1039 sva( q ) = dznrm2( m, a( 1, q ),
1040 $ 1 )
1041 ELSE
1042 t = zero
1043 aaqq = one
1044 CALL zlassq( m, a( 1, q ), 1, t,
1045 $ aaqq )
1046 sva( q ) = t*sqrt( aaqq )
1047 END IF
1048 END IF
1049 IF( ( aapp / aapp0 ).LE.rooteps ) THEN
1050 IF( ( aapp.LT.rootbig ) .AND.
1051 $ ( aapp.GT.rootsfmin ) ) THEN
1052 aapp = dznrm2( m, a( 1, p ), 1 )
1053 ELSE
1054 t = zero
1055 aapp = one
1056 CALL zlassq( m, a( 1, p ), 1, t,
1057 $ aapp )
1058 aapp = t*sqrt( aapp )
1059 END IF
1060 sva( p ) = aapp
1061 END IF
1062*
1063 ELSE
1064* A(:,p) and A(:,q) already numerically orthogonal
1065 IF( ir1.EQ.0 )notrot = notrot + 1
1066*[RTD] SKIPPED = SKIPPED + 1
1067 pskipped = pskipped + 1
1068 END IF
1069 ELSE
1070* A(:,q) is zero column
1071 IF( ir1.EQ.0 )notrot = notrot + 1
1072 pskipped = pskipped + 1
1073 END IF
1074*
1075 IF( ( i.LE.swband ) .AND.
1076 $ ( pskipped.GT.rowskip ) ) THEN
1077 IF( ir1.EQ.0 )aapp = -aapp
1078 notrot = 0
1079 GO TO 2103
1080 END IF
1081*
1082 2002 CONTINUE
1083* END q-LOOP
1084*
1085 2103 CONTINUE
1086* bailed out of q-loop
1087*
1088 sva( p ) = aapp
1089*
1090 ELSE
1091 sva( p ) = aapp
1092 IF( ( ir1.EQ.0 ) .AND. ( aapp.EQ.zero ) )
1093 $ notrot = notrot + min( igl+kbl-1, n ) - p
1094 END IF
1095*
1096 2001 CONTINUE
1097* end of the p-loop
1098* end of doing the block ( ibr, ibr )
1099 1002 CONTINUE
1100* end of ir1-loop
1101*
1102* ... go to the off diagonal blocks
1103*
1104 igl = ( ibr-1 )*kbl + 1
1105*
1106 DO 2010 jbc = ibr + 1, nbl
1107*
1108 jgl = ( jbc-1 )*kbl + 1
1109*
1110* doing the block at ( ibr, jbc )
1111*
1112 ijblsk = 0
1113 DO 2100 p = igl, min( igl+kbl-1, n )
1114*
1115 aapp = sva( p )
1116 IF( aapp.GT.zero ) THEN
1117*
1118 pskipped = 0
1119*
1120 DO 2200 q = jgl, min( jgl+kbl-1, n )
1121*
1122 aaqq = sva( q )
1123 IF( aaqq.GT.zero ) THEN
1124 aapp0 = aapp
1125*
1126* .. M x 2 Jacobi SVD ..
1127*
1128* Safe Gram matrix computation
1129*
1130 IF( aaqq.GE.one ) THEN
1131 IF( aapp.GE.aaqq ) THEN
1132 rotok = ( small*aapp ).LE.aaqq
1133 ELSE
1134 rotok = ( small*aaqq ).LE.aapp
1135 END IF
1136 IF( aapp.LT.( big / aaqq ) ) THEN
1137 aapq = ( zdotc( m, a( 1, p ), 1,
1138 $ a( 1, q ), 1 ) / aaqq ) / aapp
1139 ELSE
1140 CALL zcopy( m, a( 1, p ), 1,
1141 $ cwork(n+1), 1 )
1142 CALL zlascl( 'G', 0, 0, aapp,
1143 $ one, m, 1,
1144 $ cwork(n+1), lda, ierr )
1145 aapq = zdotc( m, cwork(n+1), 1,
1146 $ a( 1, q ), 1 ) / aaqq
1147 END IF
1148 ELSE
1149 IF( aapp.GE.aaqq ) THEN
1150 rotok = aapp.LE.( aaqq / small )
1151 ELSE
1152 rotok = aaqq.LE.( aapp / small )
1153 END IF
1154 IF( aapp.GT.( small / aaqq ) ) THEN
1155 aapq = ( zdotc( m, a( 1, p ), 1,
1156 $ a( 1, q ), 1 ) / max(aaqq,aapp) )
1157 $ / min(aaqq,aapp)
1158 ELSE
1159 CALL zcopy( m, a( 1, q ), 1,
1160 $ cwork(n+1), 1 )
1161 CALL zlascl( 'G', 0, 0, aaqq,
1162 $ one, m, 1,
1163 $ cwork(n+1), lda, ierr )
1164 aapq = zdotc( m, a( 1, p ), 1,
1165 $ cwork(n+1), 1 ) / aapp
1166 END IF
1167 END IF
1168*
1169
1170* AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q)
1171 aapq1 = -abs(aapq)
1172 mxaapq = max( mxaapq, -aapq1 )
1173*
1174* TO rotate or NOT to rotate, THAT is the question ...
1175*
1176 IF( abs( aapq1 ).GT.tol ) THEN
1177 ompq = aapq / abs(aapq)
1178 notrot = 0
1179*[RTD] ROTATED = ROTATED + 1
1180 pskipped = 0
1181 iswrot = iswrot + 1
1182*
1183 IF( rotok ) THEN
1184*
1185 aqoap = aaqq / aapp
1186 apoaq = aapp / aaqq
1187 theta = -half*abs( aqoap-apoaq )/ aapq1
1188 IF( aaqq.GT.aapp0 )theta = -theta
1189*
1190 IF( abs( theta ).GT.bigtheta ) THEN
1191 t = half / theta
1192 cs = one
1193 CALL zrot( m, a(1,p), 1, a(1,q),
1194 $ 1,
1195 $ cs, conjg(ompq)*t )
1196 IF( rsvec ) THEN
1197 CALL zrot( mvl, v(1,p), 1,
1198 $ v(1,q), 1, cs, conjg(ompq)*t )
1199 END IF
1200 sva( q ) = aaqq*sqrt( max( zero,
1201 $ one+t*apoaq*aapq1 ) )
1202 aapp = aapp*sqrt( max( zero,
1203 $ one-t*aqoap*aapq1 ) )
1204 mxsinj = max( mxsinj, abs( t ) )
1205 ELSE
1206*
1207* .. choose correct signum for THETA and rotate
1208*
1209 thsign = -sign( one, aapq1 )
1210 IF( aaqq.GT.aapp0 )thsign = -thsign
1211 t = one / ( theta+thsign*
1212 $ sqrt( one+theta*theta ) )
1213 cs = sqrt( one / ( one+t*t ) )
1214 sn = t*cs
1215 mxsinj = max( mxsinj, abs( sn ) )
1216 sva( q ) = aaqq*sqrt( max( zero,
1217 $ one+t*apoaq*aapq1 ) )
1218 aapp = aapp*sqrt( max( zero,
1219 $ one-t*aqoap*aapq1 ) )
1220*
1221 CALL zrot( m, a(1,p), 1, a(1,q),
1222 $ 1,
1223 $ cs, conjg(ompq)*sn )
1224 IF( rsvec ) THEN
1225 CALL zrot( mvl, v(1,p), 1,
1226 $ v(1,q), 1, cs, conjg(ompq)*sn )
1227 END IF
1228 END IF
1229 cwork(p) = -cwork(q) * ompq
1230*
1231 ELSE
1232* .. have to use modified Gram-Schmidt like transformation
1233 IF( aapp.GT.aaqq ) THEN
1234 CALL zcopy( m, a( 1, p ), 1,
1235 $ cwork(n+1), 1 )
1236 CALL zlascl( 'G', 0, 0, aapp,
1237 $ one,
1238 $ m, 1, cwork(n+1),lda,
1239 $ ierr )
1240 CALL zlascl( 'G', 0, 0, aaqq,
1241 $ one,
1242 $ m, 1, a( 1, q ), lda,
1243 $ ierr )
1244 CALL zaxpy( m, -aapq, cwork(n+1),
1245 $ 1, a( 1, q ), 1 )
1246 CALL zlascl( 'G', 0, 0, one,
1247 $ aaqq,
1248 $ m, 1, a( 1, q ), lda,
1249 $ ierr )
1250 sva( q ) = aaqq*sqrt( max( zero,
1251 $ one-aapq1*aapq1 ) )
1252 mxsinj = max( mxsinj, sfmin )
1253 ELSE
1254 CALL zcopy( m, a( 1, q ), 1,
1255 $ cwork(n+1), 1 )
1256 CALL zlascl( 'G', 0, 0, aaqq,
1257 $ one,
1258 $ m, 1, cwork(n+1),lda,
1259 $ ierr )
1260 CALL zlascl( 'G', 0, 0, aapp,
1261 $ one,
1262 $ m, 1, a( 1, p ), lda,
1263 $ ierr )
1264 CALL zaxpy( m, -conjg(aapq),
1265 $ cwork(n+1), 1, a( 1, p ), 1 )
1266 CALL zlascl( 'G', 0, 0, one,
1267 $ aapp,
1268 $ m, 1, a( 1, p ), lda,
1269 $ ierr )
1270 sva( p ) = aapp*sqrt( max( zero,
1271 $ one-aapq1*aapq1 ) )
1272 mxsinj = max( mxsinj, sfmin )
1273 END IF
1274 END IF
1275* END IF ROTOK THEN ... ELSE
1276*
1277* In the case of cancellation in updating SVA(q), SVA(p)
1278* .. recompute SVA(q), SVA(p)
1279 IF( ( sva( q ) / aaqq )**2.LE.rooteps )
1280 $ THEN
1281 IF( ( aaqq.LT.rootbig ) .AND.
1282 $ ( aaqq.GT.rootsfmin ) ) THEN
1283 sva( q ) = dznrm2( m, a( 1, q ),
1284 $ 1)
1285 ELSE
1286 t = zero
1287 aaqq = one
1288 CALL zlassq( m, a( 1, q ), 1, t,
1289 $ aaqq )
1290 sva( q ) = t*sqrt( aaqq )
1291 END IF
1292 END IF
1293 IF( ( aapp / aapp0 )**2.LE.rooteps ) THEN
1294 IF( ( aapp.LT.rootbig ) .AND.
1295 $ ( aapp.GT.rootsfmin ) ) THEN
1296 aapp = dznrm2( m, a( 1, p ), 1 )
1297 ELSE
1298 t = zero
1299 aapp = one
1300 CALL zlassq( m, a( 1, p ), 1, t,
1301 $ aapp )
1302 aapp = t*sqrt( aapp )
1303 END IF
1304 sva( p ) = aapp
1305 END IF
1306* end of OK rotation
1307 ELSE
1308 notrot = notrot + 1
1309*[RTD] SKIPPED = SKIPPED + 1
1310 pskipped = pskipped + 1
1311 ijblsk = ijblsk + 1
1312 END IF
1313 ELSE
1314 notrot = notrot + 1
1315 pskipped = pskipped + 1
1316 ijblsk = ijblsk + 1
1317 END IF
1318*
1319 IF( ( i.LE.swband ) .AND. ( ijblsk.GE.blskip ) )
1320 $ THEN
1321 sva( p ) = aapp
1322 notrot = 0
1323 GO TO 2011
1324 END IF
1325 IF( ( i.LE.swband ) .AND.
1326 $ ( pskipped.GT.rowskip ) ) THEN
1327 aapp = -aapp
1328 notrot = 0
1329 GO TO 2203
1330 END IF
1331*
1332 2200 CONTINUE
1333* end of the q-loop
1334 2203 CONTINUE
1335*
1336 sva( p ) = aapp
1337*
1338 ELSE
1339*
1340 IF( aapp.EQ.zero )notrot = notrot +
1341 $ min( jgl+kbl-1, n ) - jgl + 1
1342 IF( aapp.LT.zero )notrot = 0
1343*
1344 END IF
1345*
1346 2100 CONTINUE
1347* end of the p-loop
1348 2010 CONTINUE
1349* end of the jbc-loop
1350 2011 CONTINUE
1351*2011 bailed out of the jbc-loop
1352 DO 2012 p = igl, min( igl+kbl-1, n )
1353 sva( p ) = abs( sva( p ) )
1354 2012 CONTINUE
1355***
1356 2000 CONTINUE
1357*2000 :: end of the ibr-loop
1358*
1359* .. update SVA(N)
1360 IF( ( sva( n ).LT.rootbig ) .AND. ( sva( n ).GT.rootsfmin ) )
1361 $ THEN
1362 sva( n ) = dznrm2( m, a( 1, n ), 1 )
1363 ELSE
1364 t = zero
1365 aapp = one
1366 CALL zlassq( m, a( 1, n ), 1, t, aapp )
1367 sva( n ) = t*sqrt( aapp )
1368 END IF
1369*
1370* Additional steering devices
1371*
1372 IF( ( i.LT.swband ) .AND. ( ( mxaapq.LE.roottol ) .OR.
1373 $ ( iswrot.LE.n ) ) )swband = i
1374*
1375 IF( ( i.GT.swband+1 ) .AND. ( mxaapq.LT.sqrt( dble( n ) )*
1376 $ tol ) .AND. ( dble( n )*mxaapq*mxsinj.LT.tol ) ) THEN
1377 GO TO 1994
1378 END IF
1379*
1380 IF( notrot.GE.emptsw )GO TO 1994
1381*
1382 1993 CONTINUE
1383* end i=1:NSWEEP loop
1384*
1385* #:( Reaching this point means that the procedure has not converged.
1386 info = nsweep - 1
1387 GO TO 1995
1388*
1389 1994 CONTINUE
1390* #:) Reaching this point means numerical convergence after the i-th
1391* sweep.
1392*
1393 info = 0
1394* #:) INFO = 0 confirms successful iterations.
1395 1995 CONTINUE
1396*
1397* Sort the singular values and find how many are above
1398* the underflow threshold.
1399*
1400 n2 = 0
1401 n4 = 0
1402 DO 5991 p = 1, n - 1
1403 q = idamax( n-p+1, sva( p ), 1 ) + p - 1
1404 IF( p.NE.q ) THEN
1405 temp1 = sva( p )
1406 sva( p ) = sva( q )
1407 sva( q ) = temp1
1408 CALL zswap( m, a( 1, p ), 1, a( 1, q ), 1 )
1409 IF( rsvec )CALL zswap( mvl, v( 1, p ), 1, v( 1, q ), 1 )
1410 END IF
1411 IF( sva( p ).NE.zero ) THEN
1412 n4 = n4 + 1
1413 IF( sva( p )*skl.GT.sfmin )n2 = n2 + 1
1414 END IF
1415 5991 CONTINUE
1416 IF( sva( n ).NE.zero ) THEN
1417 n4 = n4 + 1
1418 IF( sva( n )*skl.GT.sfmin )n2 = n2 + 1
1419 END IF
1420*
1421* Normalize the left singular vectors.
1422*
1423 IF( lsvec .OR. uctol ) THEN
1424 DO 1998 p = 1, n4
1425* CALL ZDSCAL( M, ONE / SVA( p ), A( 1, p ), 1 )
1426 CALL zlascl( 'G',0,0, sva(p), one, m, 1, a(1,p), m,
1427 $ ierr )
1428 1998 CONTINUE
1429 END IF
1430*
1431* Scale the product of Jacobi rotations.
1432*
1433 IF( rsvec ) THEN
1434 DO 2399 p = 1, n
1435 temp1 = one / dznrm2( mvl, v( 1, p ), 1 )
1436 CALL zdscal( mvl, temp1, v( 1, p ), 1 )
1437 2399 CONTINUE
1438 END IF
1439*
1440* Undo scaling, if necessary (and possible).
1441 IF( ( ( skl.GT.one ) .AND. ( sva( 1 ).LT.( big / skl ) ) )
1442 $ .OR. ( ( skl.LT.one ) .AND. ( sva( max( n2, 1 ) ) .GT.
1443 $ ( sfmin / skl ) ) ) ) THEN
1444 DO 2400 p = 1, n
1445 sva( p ) = skl*sva( p )
1446 2400 CONTINUE
1447 skl = one
1448 END IF
1449*
1450 rwork( 1 ) = skl
1451* The singular values of A are SKL*SVA(1:N). If SKL.NE.ONE
1452* then some of the singular values may overflow or underflow and
1453* the spectrum is given in this factored representation.
1454*
1455 rwork( 2 ) = dble( n4 )
1456* N4 is the number of computed nonzero singular values of A.
1457*
1458 rwork( 3 ) = dble( n2 )
1459* N2 is the number of singular values of A greater than SFMIN.
1460* If N2<N, SVA(N2:N) contains ZEROS and/or denormalized numbers
1461* that may carry some information.
1462*
1463 rwork( 4 ) = dble( i )
1464* i is the index of the last sweep before declaring convergence.
1465*
1466 rwork( 5 ) = mxaapq
1467* MXAAPQ is the largest absolute value of scaled pivots in the
1468* last sweep
1469*
1470 rwork( 6 ) = mxsinj
1471* MXSINJ is the largest absolute value of the sines of Jacobi angles
1472* in the last sweep
1473*
1474 RETURN
1475* ..
1476* .. END OF ZGESVJ
1477* ..
1478 END
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine zaxpy(n, za, zx, incx, zy, incy)
ZAXPY
Definition zaxpy.f:88
subroutine zcopy(n, zx, incx, zy, incy)
ZCOPY
Definition zcopy.f:81
subroutine zgesvj(joba, jobu, jobv, m, n, a, lda, sva, mv, v, ldv, cwork, lwork, rwork, lrwork, info)
ZGESVJ
Definition zgesvj.f:353
subroutine zgsvj0(jobv, m, n, a, lda, d, sva, mv, v, ldv, eps, sfmin, tol, nsweep, work, lwork, info)
ZGSVJ0 pre-processor for the routine zgesvj.
Definition zgsvj0.f:216
subroutine zgsvj1(jobv, m, n, n1, a, lda, d, sva, mv, v, ldv, eps, sfmin, tol, nsweep, work, lwork, info)
ZGSVJ1 pre-processor for the routine zgesvj, applies Jacobi rotations targeting only particular pivot...
Definition zgsvj1.f:234
subroutine zlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition zlascl.f:142
subroutine dlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition dlascl.f:142
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition zlaset.f:104
subroutine zlassq(n, x, incx, scale, sumsq)
ZLASSQ updates a sum of squares represented in scaled form.
Definition zlassq.f90:122
subroutine zrot(n, cx, incx, cy, incy, c, s)
ZROT applies a plane rotation with real cosine and complex sine to a pair of complex vectors.
Definition zrot.f:101
subroutine zdscal(n, da, zx, incx)
ZDSCAL
Definition zdscal.f:78
subroutine zswap(n, zx, incx, zy, incy)
ZSWAP
Definition zswap.f:81