LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ zgejsv()

subroutine zgejsv ( character*1 joba,
character*1 jobu,
character*1 jobv,
character*1 jobr,
character*1 jobt,
character*1 jobp,
integer m,
integer n,
complex*16, dimension( lda, * ) a,
integer lda,
double precision, dimension( n ) sva,
complex*16, dimension( ldu, * ) u,
integer ldu,
complex*16, dimension( ldv, * ) v,
integer ldv,
complex*16, dimension( lwork ) cwork,
integer lwork,
double precision, dimension( lrwork ) rwork,
integer lrwork,
integer, dimension( * ) iwork,
integer info )

ZGEJSV

Download ZGEJSV + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> ZGEJSV computes the singular value decomposition (SVD) of a complex M-by-N
!> matrix [A], where M >= N. The SVD of [A] is written as
!>
!>              [A] = [U] * [SIGMA] * [V]^*,
!>
!> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
!> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and
!> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are
!> the singular values of [A]. The columns of [U] and [V] are the left and
!> the right singular vectors of [A], respectively. The matrices [U] and [V]
!> are computed and stored in the arrays U and V, respectively. The diagonal
!> of [SIGMA] is computed and stored in the array SVA.
!> 
Parameters
[in]JOBA
!>          JOBA is CHARACTER*1
!>         Specifies the level of accuracy:
!>       = 'C': This option works well (high relative accuracy) if A = B * D,
!>              with well-conditioned B and arbitrary diagonal matrix D.
!>              The accuracy cannot be spoiled by COLUMN scaling. The
!>              accuracy of the computed output depends on the condition of
!>              B, and the procedure aims at the best theoretical accuracy.
!>              The relative error max_{i=1:N}|d sigma_i| / sigma_i is
!>              bounded by f(M,N)*epsilon* cond(B), independent of D.
!>              The input matrix is preprocessed with the QRF with column
!>              pivoting. This initial preprocessing and preconditioning by
!>              a rank revealing QR factorization is common for all values of
!>              JOBA. Additional actions are specified as follows:
!>       = 'E': Computation as with 'C' with an additional estimate of the
!>              condition number of B. It provides a realistic error bound.
!>       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
!>              D1, D2, and well-conditioned matrix C, this option gives
!>              higher accuracy than the 'C' option. If the structure of the
!>              input matrix is not known, and relative accuracy is
!>              desirable, then this option is advisable. The input matrix A
!>              is preprocessed with QR factorization with FULL (row and
!>              column) pivoting.
!>       = 'G': Computation as with 'F' with an additional estimate of the
!>              condition number of B, where A=B*D. If A has heavily weighted
!>              rows, then using this condition number gives too pessimistic
!>              error bound.
!>       = 'A': Small singular values are not well determined by the data 
!>              and are considered as noisy; the matrix is treated as
!>              numerically rank deficient. The error in the computed
!>              singular values is bounded by f(m,n)*epsilon*||A||.
!>              The computed SVD A = U * S * V^* restores A up to
!>              f(m,n)*epsilon*||A||.
!>              This gives the procedure the licence to discard (set to zero)
!>              all singular values below N*epsilon*||A||.
!>       = 'R': Similar as in 'A'. Rank revealing property of the initial
!>              QR factorization is used do reveal (using triangular factor)
!>              a gap sigma_{r+1} < epsilon * sigma_r in which case the
!>              numerical RANK is declared to be r. The SVD is computed with
!>              absolute error bounds, but more accurately than with 'A'.
!> 
[in]JOBU
!>          JOBU is CHARACTER*1
!>         Specifies whether to compute the columns of U:
!>       = 'U': N columns of U are returned in the array U.
!>       = 'F': full set of M left sing. vectors is returned in the array U.
!>       = 'W': U may be used as workspace of length M*N. See the description
!>              of U.
!>       = 'N': U is not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>         Specifies whether to compute the matrix V:
!>       = 'V': N columns of V are returned in the array V; Jacobi rotations
!>              are not explicitly accumulated.
!>       = 'J': N columns of V are returned in the array V, but they are
!>              computed as the product of Jacobi rotations, if JOBT = 'N'.
!>       = 'W': V may be used as workspace of length N*N. See the description
!>              of V.
!>       = 'N': V is not computed.
!> 
[in]JOBR
!>          JOBR is CHARACTER*1
!>         Specifies the RANGE for the singular values. Issues the licence to
!>         set to zero small positive singular values if they are outside
!>         specified range. If A .NE. 0 is scaled so that the largest singular
!>         value of c*A is around SQRT(BIG), BIG=DLAMCH('O'), then JOBR issues
!>         the licence to kill columns of A whose norm in c*A is less than
!>         SQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN,
!>         where SFMIN=DLAMCH('S'), EPSLN=DLAMCH('E').
!>       = 'N': Do not kill small columns of c*A. This option assumes that
!>              BLAS and QR factorizations and triangular solvers are
!>              implemented to work in that range. If the condition of A
!>              is greater than BIG, use ZGESVJ.
!>       = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)]
!>              (roughly, as described above). This option is recommended.
!>                                             ===========================
!>         For computing the singular values in the FULL range [SFMIN,BIG]
!>         use ZGESVJ.
!> 
[in]JOBT
!>          JOBT is CHARACTER*1
!>         If the matrix is square then the procedure may determine to use
!>         transposed A if A^* seems to be better with respect to convergence.
!>         If the matrix is not square, JOBT is ignored. 
!>         The decision is based on two values of entropy over the adjoint
!>         orbit of A^* * A. See the descriptions of RWORK(6) and RWORK(7).
!>       = 'T': transpose if entropy test indicates possibly faster
!>         convergence of Jacobi process if A^* is taken as input. If A is
!>         replaced with A^*, then the row pivoting is included automatically.
!>       = 'N': do not speculate.
!>         The option 'T' can be used to compute only the singular values, or
!>         the full SVD (U, SIGMA and V). For only one set of singular vectors
!>         (U or V), the caller should provide both U and V, as one of the
!>         matrices is used as workspace if the matrix A is transposed.
!>         The implementer can easily remove this constraint and make the
!>         code more complicated. See the descriptions of U and V.
!>         In general, this option is considered experimental, and 'N'; should
!>         be preferred. This is subject to changes in the future.
!> 
[in]JOBP
!>          JOBP is CHARACTER*1
!>         Issues the licence to introduce structured perturbations to drown
!>         denormalized numbers. This licence should be active if the
!>         denormals are poorly implemented, causing slow computation,
!>         especially in cases of fast convergence (!). For details see [1,2].
!>         For the sake of simplicity, this perturbations are included only
!>         when the full SVD or only the singular values are requested. The
!>         implementer/user can easily add the perturbation for the cases of
!>         computing one set of singular vectors.
!>       = 'P': introduce perturbation
!>       = 'N': do not perturb
!> 
[in]M
!>          M is INTEGER
!>         The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>         The number of columns of the input matrix A. M >= N >= 0.
!> 
[in,out]A
!>          A is COMPLEX*16 array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]SVA
!>          SVA is DOUBLE PRECISION array, dimension (N)
!>          On exit,
!>          - For RWORK(1)/RWORK(2) = ONE: The singular values of A. During
!>            the computation SVA contains Euclidean column norms of the
!>            iterated matrices in the array A.
!>          - For RWORK(1) .NE. RWORK(2): The singular values of A are
!>            (RWORK(1)/RWORK(2)) * SVA(1:N). This factored form is used if
!>            sigma_max(A) overflows or if small singular values have been
!>            saved from underflow by scaling the input matrix A.
!>          - If JOBR='R' then some of the singular values may be returned
!>            as exact zeros obtained by  because they are
!>            below the numerical rank threshold or are denormalized numbers.
!> 
[out]U
!>          U is COMPLEX*16 array, dimension ( LDU, N )
!>          If JOBU = 'U', then U contains on exit the M-by-N matrix of
!>                         the left singular vectors.
!>          If JOBU = 'F', then U contains on exit the M-by-M matrix of
!>                         the left singular vectors, including an ONB
!>                         of the orthogonal complement of the Range(A).
!>          If JOBU = 'W'  .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N),
!>                         then U is used as workspace if the procedure
!>                         replaces A with A^*. In that case, [V] is computed
!>                         in U as left singular vectors of A^* and then
!>                         copied back to the V array. This 'W' option is just
!>                         a reminder to the caller that in this case U is
!>                         reserved as workspace of length N*N.
!>          If JOBU = 'N'  U is not referenced, unless JOBT='T'.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U,  LDU >= 1.
!>          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
!> 
[out]V
!>          V is COMPLEX*16 array, dimension ( LDV, N )
!>          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
!>                         the right singular vectors;
!>          If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N),
!>                         then V is used as workspace if the procedure
!>                         replaces A with A^*. In that case, [U] is computed
!>                         in V as right singular vectors of A^* and then
!>                         copied back to the U array. This 'W' option is just
!>                         a reminder to the caller that in this case V is
!>                         reserved as workspace of length N*N.
!>          If JOBV = 'N'  V is not referenced, unless JOBT='T'.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V,  LDV >= 1.
!>          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
!> 
[out]CWORK
!>          CWORK is COMPLEX*16 array, dimension (MAX(2,LWORK))
!>          If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit CWORK(1) contains the required length of
!>          CWORK for the job parameters used in the call.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          Length of CWORK to confirm proper allocation of workspace.
!>          LWORK depends on the job:
!>
!>          1. If only SIGMA is needed ( JOBU = 'N', JOBV = 'N' ) and
!>            1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'):
!>               LWORK >= 2*N+1. This is the minimal requirement.
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= N + (N+1)*NB. Here NB is the optimal
!>               block size for ZGEQP3 and ZGEQRF.
!>               In general, optimal LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), LWORK(ZGESVJ)).
!>            1.2. .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G'). In this case, LWORK the minimal
!>               requirement is LWORK >= N*N + 2*N.
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZGEQRF), LWORK(ZGESVJ),
!>                            N*N+LWORK(ZPOCON)).
!>          2. If SIGMA and the right singular vectors are needed (JOBV = 'V'),
!>             (JOBU = 'N')
!>            2.1   .. no scaled condition estimate requested (JOBE = 'N'):    
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance, 
!>               LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQF,
!>               ZUNMLQ. In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3), N+LWORK(ZGESVJ),
!>                       N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)).
!>            2.2 .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G').
!>            -> the minimal requirement is LWORK >= 3*N.      
!>            -> For optimal performance, 
!>               LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZGELQF,
!>               ZUNMLQ. In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3), LWORK(ZPOCON), N+LWORK(ZGESVJ),
!>                       N+LWORK(ZGELQF), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMLQ)).   
!>          3. If SIGMA and the left singular vectors are needed
!>            3.1  .. no scaled condition estimate requested (JOBE = 'N'):
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance:
!>               if JOBU = 'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3), 2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)). 
!>            3.2  .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G').
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance:
!>               if JOBU = 'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for ZGEQP3, ZGEQRF, ZUNMQR.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(ZGEQP3),N+LWORK(ZPOCON),
!>                        2*N+LWORK(ZGEQRF), N+LWORK(ZUNMQR)).
!>          4. If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and 
!>            4.1. if JOBV = 'V'  
!>               the minimal requirement is LWORK >= 5*N+2*N*N. 
!>            4.2. if JOBV = 'J' the minimal requirement is 
!>               LWORK >= 4*N+N*N.
!>            In both cases, the allocated CWORK can accommodate blocked runs
!>            of ZGEQP3, ZGEQRF, ZGELQF, SUNMQR, ZUNMLQ.
!>
!>          If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the
!>          minimal length of CWORK for the job parameters used in the call.
!> 
[out]RWORK
!>          RWORK is DOUBLE PRECISION array, dimension (MAX(7,LRWORK))
!>          On exit,
!>          RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1)
!>                    such that SCALE*SVA(1:N) are the computed singular values
!>                    of A. (See the description of SVA().)
!>          RWORK(2) = See the description of RWORK(1).
!>          RWORK(3) = SCONDA is an estimate for the condition number of
!>                    column equilibrated A. (If JOBA = 'E' or 'G')
!>                    SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
!>                    It is computed using ZPOCON. It holds
!>                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
!>                    where R is the triangular factor from the QRF of A.
!>                    However, if R is truncated and the numerical rank is
!>                    determined to be strictly smaller than N, SCONDA is
!>                    returned as -1, thus indicating that the smallest
!>                    singular values might be lost.
!>
!>          If full SVD is needed, the following two condition numbers are
!>          useful for the analysis of the algorithm. They are provided for
!>          a developer/implementer who is familiar with the details of
!>          the method.
!>
!>          RWORK(4) = an estimate of the scaled condition number of the
!>                    triangular factor in the first QR factorization.
!>          RWORK(5) = an estimate of the scaled condition number of the
!>                    triangular factor in the second QR factorization.
!>          The following two parameters are computed if JOBT = 'T'.
!>          They are provided for a developer/implementer who is familiar
!>          with the details of the method.
!>          RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy
!>                    of diag(A^* * A) / Trace(A^* * A) taken as point in the
!>                    probability simplex.
!>          RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).)
!>          If the call to ZGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit RWORK(1) contains the required length of
!>          RWORK for the job parameters used in the call.
!> 
[in]LRWORK
!>          LRWORK is INTEGER
!>          Length of RWORK to confirm proper allocation of workspace.
!>          LRWORK depends on the job:
!>
!>       1. If only the singular values are requested i.e. if
!>          LSAME(JOBU,'N') .AND. LSAME(JOBV,'N')
!>          then:
!>          1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>               then: LRWORK = max( 7, 2 * M ).
!>          1.2. Otherwise, LRWORK  = max( 7,  N ).
!>       2. If singular values with the right singular vectors are requested
!>          i.e. if
!>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND.
!>          .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F'))
!>          then:
!>          2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          2.2. Otherwise, LRWORK  = max( 7,  N ).
!>       3. If singular values with the left singular vectors are requested, i.e. if
!>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
!>          .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
!>          then:
!>          3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          3.2. Otherwise, LRWORK  = max( 7,  N ).
!>       4. If singular values with both the left and the right singular vectors
!>          are requested, i.e. if
!>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
!>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
!>          then:
!>          4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          4.2. Otherwise, LRWORK  = max( 7, N ).
!>
!>          If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and 
!>          the length of RWORK is returned in RWORK(1). 
!> 
[out]IWORK
!>          IWORK is INTEGER array, of dimension at least 4, that further depends 
!>          on the job:
!>
!>          1. If only the singular values are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N.
!>          2. If the singular values and the right singular vectors are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>          3. If the singular values and the left singular vectors are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>          4. If the singular values with both the left and the right singular vectors
!>             are requested, then:      
!>             4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows:
!>                  If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>                  then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>             4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows:
!>                  If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>                  then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N.
!>        
!>          On exit,
!>          IWORK(1) = the numerical rank determined after the initial
!>                     QR factorization with pivoting. See the descriptions
!>                     of JOBA and JOBR.
!>          IWORK(2) = the number of the computed nonzero singular values
!>          IWORK(3) = if nonzero, a warning message:
!>                     If IWORK(3) = 1 then some of the column norms of A
!>                     were denormalized floats. The requested high accuracy
!>                     is not warranted by the data.
!>          IWORK(4) = 1 or -1. If IWORK(4) = 1, then the procedure used A^* to
!>                     do the job as specified by the JOB parameters.
!>          If the call to ZGEJSV is a workspace query (indicated by LWORK = -1 or
!>          LRWORK = -1), then on exit IWORK(1) contains the required length of 
!>          IWORK for the job parameters used in the call.
!> 
[out]INFO
!>          INFO is INTEGER
!>           < 0:  if INFO = -i, then the i-th argument had an illegal value.
!>           = 0:  successful exit;
!>           > 0:  ZGEJSV  did not converge in the maximal allowed number
!>                 of sweeps. The computed values may be inaccurate.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  ZGEJSV implements a preconditioned Jacobi SVD algorithm. It uses ZGEQP3,
!>  ZGEQRF, and ZGELQF as preprocessors and preconditioners. Optionally, an
!>  additional row pivoting can be used as a preprocessor, which in some
!>  cases results in much higher accuracy. An example is matrix A with the
!>  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
!>  diagonal matrices and C is well-conditioned matrix. In that case, complete
!>  pivoting in the first QR factorizations provides accuracy dependent on the
!>  condition number of C, and independent of D1, D2. Such higher accuracy is
!>  not completely understood theoretically, but it works well in practice.
!>  Further, if A can be written as A = B*D, with well-conditioned B and some
!>  diagonal D, then the high accuracy is guaranteed, both theoretically and
!>  in software, independent of D. For more details see [1], [2].
!>     The computational range for the singular values can be the full range
!>  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
!>  & LAPACK routines called by ZGEJSV are implemented to work in that range.
!>  If that is not the case, then the restriction for safe computation with
!>  the singular values in the range of normalized IEEE numbers is that the
!>  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
!>  overflow. This code (ZGEJSV) is best used in this restricted range,
!>  meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are
!>  returned as zeros. See JOBR for details on this.
!>     Further, this implementation is somewhat slower than the one described
!>  in [1,2] due to replacement of some non-LAPACK components, and because
!>  the choice of some tuning parameters in the iterative part (ZGESVJ) is
!>  left to the implementer on a particular machine.
!>     The rank revealing QR factorization (in this code: ZGEQP3) should be
!>  implemented as in [3]. We have a new version of ZGEQP3 under development
!>  that is more robust than the current one in LAPACK, with a cleaner cut in
!>  rank deficient cases. It will be available in the SIGMA library [4].
!>  If M is much larger than N, it is obvious that the initial QRF with
!>  column pivoting can be preprocessed by the QRF without pivoting. That
!>  well known trick is not used in ZGEJSV because in some cases heavy row
!>  weighting can be treated with complete pivoting. The overhead in cases
!>  M much larger than N is then only due to pivoting, but the benefits in
!>  terms of accuracy have prevailed. The implementer/user can incorporate
!>  this extra QRF step easily. The implementer can also improve data movement
!>  (matrix transpose, matrix copy, matrix transposed copy) - this
!>  implementation of ZGEJSV uses only the simplest, naive data movement.
!> 
Contributor:
Zlatko Drmac, Department of Mathematics, Faculty of Science, University of Zagreb (Zagreb, Croatia); drmac.nosp@m.@mat.nosp@m.h.hr
References:
!>
!> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
!>     LAPACK Working note 169.
!> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
!>     LAPACK Working note 170.
!> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
!>     factorization software - a case study.
!>     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
!>     LAPACK Working note 176.
!> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
!>     QSVD, (H,K)-SVD computations.
!>     Department of Mathematics, University of Zagreb, 2008, 2016.
!> 
Bugs, examples and comments:
Please report all bugs and send interesting examples and/or comments to drmac.nosp@m.@mat.nosp@m.h.hr. Thank you.

Definition at line 564 of file zgejsv.f.

567*
568* -- LAPACK computational routine --
569* -- LAPACK is a software package provided by Univ. of Tennessee, --
570* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
571*
572* .. Scalar Arguments ..
573 IMPLICIT NONE
574 INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N
575* ..
576* .. Array Arguments ..
577 COMPLEX*16 A( LDA, * ), U( LDU, * ), V( LDV, * ),
578 $ CWORK( LWORK )
579 DOUBLE PRECISION SVA( N ), RWORK( LRWORK )
580 INTEGER IWORK( * )
581 CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
582* ..
583*
584* ===========================================================================
585*
586* .. Local Parameters ..
587 DOUBLE PRECISION ZERO, ONE
588 parameter( zero = 0.0d0, one = 1.0d0 )
589 COMPLEX*16 CZERO, CONE
590 parameter( czero = ( 0.0d0, 0.0d0 ), cone = ( 1.0d0, 0.0d0 ) )
591* ..
592* .. Local Scalars ..
593 COMPLEX*16 CTEMP
594 DOUBLE PRECISION AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1,
595 $ COND_OK, CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN,
596 $ MAXPRJ, SCALEM, SCONDA, SFMIN, SMALL, TEMP1,
597 $ USCAL1, USCAL2, XSC
598 INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING
599 LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY,
600 $ LSVEC, L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, NOSCAL,
601 $ ROWPIV, RSVEC, TRANSP
602*
603 INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK
604 INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM,
605 $ LWSVDJ, LWSVDJV, LRWQP3, LRWCON, LRWSVDJ, IWOFF
606 INTEGER LWRK_ZGELQF, LWRK_ZGEQP3, LWRK_ZGEQP3N, LWRK_ZGEQRF,
607 $ LWRK_ZGESVJ, LWRK_ZGESVJV, LWRK_ZGESVJU, LWRK_ZUNMLQ,
608 $ LWRK_ZUNMQR, LWRK_ZUNMQRM
609* ..
610* .. Local Arrays
611 COMPLEX*16 CDUMMY(1)
612 DOUBLE PRECISION RDUMMY(1)
613*
614* .. Intrinsic Functions ..
615 INTRINSIC abs, dcmplx, conjg, dlog, max, min, dble, nint, sqrt
616* ..
617* .. External Functions ..
618 DOUBLE PRECISION DLAMCH, DZNRM2
619 INTEGER IDAMAX, IZAMAX
620 LOGICAL LSAME
621 EXTERNAL idamax, izamax, lsame, dlamch, dznrm2
622* ..
623* .. External Subroutines ..
624 EXTERNAL dlassq, zcopy, zgelqf, zgeqp3, zgeqrf, zlacpy,
628*
629 EXTERNAL zgesvj
630* ..
631*
632* Test the input arguments
633*
634 lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
635 jracc = lsame( jobv, 'J' )
636 rsvec = lsame( jobv, 'V' ) .OR. jracc
637 rowpiv = lsame( joba, 'F' ) .OR. lsame( joba, 'G' )
638 l2rank = lsame( joba, 'R' )
639 l2aber = lsame( joba, 'A' )
640 errest = lsame( joba, 'E' ) .OR. lsame( joba, 'G' )
641 l2tran = lsame( jobt, 'T' ) .AND. ( m .EQ. n )
642 l2kill = lsame( jobr, 'R' )
643 defr = lsame( jobr, 'N' )
644 l2pert = lsame( jobp, 'P' )
645*
646 lquery = ( lwork .EQ. -1 ) .OR. ( lrwork .EQ. -1 )
647*
648 IF ( .NOT.(rowpiv .OR. l2rank .OR. l2aber .OR.
649 $ errest .OR. lsame( joba, 'C' ) )) THEN
650 info = - 1
651 ELSE IF ( .NOT.( lsvec .OR. lsame( jobu, 'N' ) .OR.
652 $ ( lsame( jobu, 'W' ) .AND. rsvec .AND. l2tran ) ) ) THEN
653 info = - 2
654 ELSE IF ( .NOT.( rsvec .OR. lsame( jobv, 'N' ) .OR.
655 $ ( lsame( jobv, 'W' ) .AND. lsvec .AND. l2tran ) ) ) THEN
656 info = - 3
657 ELSE IF ( .NOT. ( l2kill .OR. defr ) ) THEN
658 info = - 4
659 ELSE IF ( .NOT. ( lsame(jobt,'T') .OR.
660 $ lsame(jobt,'N') ) ) THEN
661 info = - 5
662 ELSE IF ( .NOT. ( l2pert .OR. lsame( jobp, 'N' ) ) ) THEN
663 info = - 6
664 ELSE IF ( m .LT. 0 ) THEN
665 info = - 7
666 ELSE IF ( ( n .LT. 0 ) .OR. ( n .GT. m ) ) THEN
667 info = - 8
668 ELSE IF ( lda .LT. m ) THEN
669 info = - 10
670 ELSE IF ( lsvec .AND. ( ldu .LT. m ) ) THEN
671 info = - 13
672 ELSE IF ( rsvec .AND. ( ldv .LT. n ) ) THEN
673 info = - 15
674 ELSE
675* #:)
676 info = 0
677 END IF
678*
679 IF ( info .EQ. 0 ) THEN
680* .. compute the minimal and the optimal workspace lengths
681* [[The expressions for computing the minimal and the optimal
682* values of LCWORK, LRWORK are written with a lot of redundancy and
683* can be simplified. However, this verbose form is useful for
684* maintenance and modifications of the code.]]
685*
686* .. minimal workspace length for ZGEQP3 of an M x N matrix,
687* ZGEQRF of an N x N matrix, ZGELQF of an N x N matrix,
688* ZUNMLQ for computing N x N matrix, ZUNMQR for computing N x N
689* matrix, ZUNMQR for computing M x N matrix, respectively.
690 lwqp3 = n+1
691 lwqrf = max( 1, n )
692 lwlqf = max( 1, n )
693 lwunmlq = max( 1, n )
694 lwunmqr = max( 1, n )
695 lwunmqrm = max( 1, m )
696* .. minimal workspace length for ZPOCON of an N x N matrix
697 lwcon = 2 * n
698* .. minimal workspace length for ZGESVJ of an N x N matrix,
699* without and with explicit accumulation of Jacobi rotations
700 lwsvdj = max( 2 * n, 1 )
701 lwsvdjv = max( 2 * n, 1 )
702* .. minimal REAL workspace length for ZGEQP3, ZPOCON, ZGESVJ
703 lrwqp3 = 2 * n
704 lrwcon = n
705 lrwsvdj = n
706 IF ( lquery ) THEN
707 CALL zgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1,
708 $ rdummy, ierr )
709 lwrk_zgeqp3 = int( cdummy(1) )
710 CALL zgeqrf( n, n, a, lda, cdummy, cdummy,-1, ierr )
711 lwrk_zgeqrf = int( cdummy(1) )
712 CALL zgelqf( n, n, a, lda, cdummy, cdummy,-1, ierr )
713 lwrk_zgelqf = int( cdummy(1) )
714 END IF
715 minwrk = 2
716 optwrk = 2
717 miniwrk = n
718 IF ( .NOT. (lsvec .OR. rsvec ) ) THEN
719* .. minimal and optimal sizes of the complex workspace if
720* only the singular values are requested
721 IF ( errest ) THEN
722 minwrk = max( n+lwqp3, n**2+lwcon, n+lwqrf, lwsvdj )
723 ELSE
724 minwrk = max( n+lwqp3, n+lwqrf, lwsvdj )
725 END IF
726 IF ( lquery ) THEN
727 CALL zgesvj( 'L', 'N', 'N', n, n, a, lda, sva, n,
728 $ v,
729 $ ldv, cdummy, -1, rdummy, -1, ierr )
730 lwrk_zgesvj = int( cdummy(1) )
731 IF ( errest ) THEN
732 optwrk = max( n+lwrk_zgeqp3, n**2+lwcon,
733 $ n+lwrk_zgeqrf, lwrk_zgesvj )
734 ELSE
735 optwrk = max( n+lwrk_zgeqp3, n+lwrk_zgeqrf,
736 $ lwrk_zgesvj )
737 END IF
738 END IF
739 IF ( l2tran .OR. rowpiv ) THEN
740 IF ( errest ) THEN
741 minrwrk = max( 7, 2*m, lrwqp3, lrwcon, lrwsvdj )
742 ELSE
743 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
744 END IF
745 ELSE
746 IF ( errest ) THEN
747 minrwrk = max( 7, lrwqp3, lrwcon, lrwsvdj )
748 ELSE
749 minrwrk = max( 7, lrwqp3, lrwsvdj )
750 END IF
751 END IF
752 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
753 ELSE IF ( rsvec .AND. (.NOT.lsvec) ) THEN
754* .. minimal and optimal sizes of the complex workspace if the
755* singular values and the right singular vectors are requested
756 IF ( errest ) THEN
757 minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,
758 $ 2*n+lwqrf, n+lwsvdj, n+lwunmlq )
759 ELSE
760 minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2*n+lwqrf,
761 $ n+lwsvdj, n+lwunmlq )
762 END IF
763 IF ( lquery ) THEN
764 CALL zgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,
765 $ lda, cdummy, -1, rdummy, -1, ierr )
766 lwrk_zgesvj = int( cdummy(1) )
767 CALL zunmlq( 'L', 'C', n, n, n, a, lda, cdummy,
768 $ v, ldv, cdummy, -1, ierr )
769 lwrk_zunmlq = int( cdummy(1) )
770 IF ( errest ) THEN
771 optwrk = max( n+lwrk_zgeqp3, lwcon, lwrk_zgesvj,
772 $ n+lwrk_zgelqf, 2*n+lwrk_zgeqrf,
773 $ n+lwrk_zgesvj, n+lwrk_zunmlq )
774 ELSE
775 optwrk = max( n+lwrk_zgeqp3, lwrk_zgesvj,n+lwrk_zgelqf,
776 $ 2*n+lwrk_zgeqrf, n+lwrk_zgesvj,
777 $ n+lwrk_zunmlq )
778 END IF
779 END IF
780 IF ( l2tran .OR. rowpiv ) THEN
781 IF ( errest ) THEN
782 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
783 ELSE
784 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
785 END IF
786 ELSE
787 IF ( errest ) THEN
788 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
789 ELSE
790 minrwrk = max( 7, lrwqp3, lrwsvdj )
791 END IF
792 END IF
793 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
794 ELSE IF ( lsvec .AND. (.NOT.rsvec) ) THEN
795* .. minimal and optimal sizes of the complex workspace if the
796* singular values and the left singular vectors are requested
797 IF ( errest ) THEN
798 minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm )
799 ELSE
800 minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm )
801 END IF
802 IF ( lquery ) THEN
803 CALL zgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,
804 $ lda, cdummy, -1, rdummy, -1, ierr )
805 lwrk_zgesvj = int( cdummy(1) )
806 CALL zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
807 $ ldu, cdummy, -1, ierr )
808 lwrk_zunmqrm = int( cdummy(1) )
809 IF ( errest ) THEN
810 optwrk = n + max( lwrk_zgeqp3, lwcon, n+lwrk_zgeqrf,
811 $ lwrk_zgesvj, lwrk_zunmqrm )
812 ELSE
813 optwrk = n + max( lwrk_zgeqp3, n+lwrk_zgeqrf,
814 $ lwrk_zgesvj, lwrk_zunmqrm )
815 END IF
816 END IF
817 IF ( l2tran .OR. rowpiv ) THEN
818 IF ( errest ) THEN
819 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
820 ELSE
821 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
822 END IF
823 ELSE
824 IF ( errest ) THEN
825 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
826 ELSE
827 minrwrk = max( 7, lrwqp3, lrwsvdj )
828 END IF
829 END IF
830 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
831 ELSE
832* .. minimal and optimal sizes of the complex workspace if the
833* full SVD is requested
834 IF ( .NOT. jracc ) THEN
835 IF ( errest ) THEN
836 minwrk = max( n+lwqp3, n+lwcon, 2*n+n**2+lwcon,
837 $ 2*n+lwqrf, 2*n+lwqp3,
838 $ 2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,
839 $ 2*n+n**2+n+lwsvdj, 2*n+n**2+n+lwsvdjv,
840 $ 2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,
841 $ n+n**2+lwsvdj, n+lwunmqrm )
842 ELSE
843 minwrk = max( n+lwqp3, 2*n+n**2+lwcon,
844 $ 2*n+lwqrf, 2*n+lwqp3,
845 $ 2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,
846 $ 2*n+n**2+n+lwsvdj, 2*n+n**2+n+lwsvdjv,
847 $ 2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,
848 $ n+n**2+lwsvdj, n+lwunmqrm )
849 END IF
850 miniwrk = miniwrk + n
851 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
852 ELSE
853 IF ( errest ) THEN
854 minwrk = max( n+lwqp3, n+lwcon, 2*n+lwqrf,
855 $ 2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,
856 $ n+lwunmqrm )
857 ELSE
858 minwrk = max( n+lwqp3, 2*n+lwqrf,
859 $ 2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,
860 $ n+lwunmqrm )
861 END IF
862 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
863 END IF
864 IF ( lquery ) THEN
865 CALL zunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
866 $ ldu, cdummy, -1, ierr )
867 lwrk_zunmqrm = int( cdummy(1) )
868 CALL zunmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,
869 $ ldu, cdummy, -1, ierr )
870 lwrk_zunmqr = int( cdummy(1) )
871 IF ( .NOT. jracc ) THEN
872 CALL zgeqp3( n,n, a, lda, iwork, cdummy,cdummy,
873 $ -1,
874 $ rdummy, ierr )
875 lwrk_zgeqp3n = int( cdummy(1) )
876 CALL zgesvj( 'L', 'U', 'N', n, n, u, ldu, sva,
877 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
878 lwrk_zgesvj = int( cdummy(1) )
879 CALL zgesvj( 'U', 'U', 'N', n, n, u, ldu, sva,
880 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
881 lwrk_zgesvju = int( cdummy(1) )
882 CALL zgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,
883 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
884 lwrk_zgesvjv = int( cdummy(1) )
885 CALL zunmlq( 'L', 'C', n, n, n, a, lda, cdummy,
886 $ v, ldv, cdummy, -1, ierr )
887 lwrk_zunmlq = int( cdummy(1) )
888 IF ( errest ) THEN
889 optwrk = max( n+lwrk_zgeqp3, n+lwcon,
890 $ 2*n+n**2+lwcon, 2*n+lwrk_zgeqrf,
891 $ 2*n+lwrk_zgeqp3n,
892 $ 2*n+n**2+n+lwrk_zgelqf,
893 $ 2*n+n**2+n+n**2+lwcon,
894 $ 2*n+n**2+n+lwrk_zgesvj,
895 $ 2*n+n**2+n+lwrk_zgesvjv,
896 $ 2*n+n**2+n+lwrk_zunmqr,
897 $ 2*n+n**2+n+lwrk_zunmlq,
898 $ n+n**2+lwrk_zgesvju,
899 $ n+lwrk_zunmqrm )
900 ELSE
901 optwrk = max( n+lwrk_zgeqp3,
902 $ 2*n+n**2+lwcon, 2*n+lwrk_zgeqrf,
903 $ 2*n+lwrk_zgeqp3n,
904 $ 2*n+n**2+n+lwrk_zgelqf,
905 $ 2*n+n**2+n+n**2+lwcon,
906 $ 2*n+n**2+n+lwrk_zgesvj,
907 $ 2*n+n**2+n+lwrk_zgesvjv,
908 $ 2*n+n**2+n+lwrk_zunmqr,
909 $ 2*n+n**2+n+lwrk_zunmlq,
910 $ n+n**2+lwrk_zgesvju,
911 $ n+lwrk_zunmqrm )
912 END IF
913 ELSE
914 CALL zgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,
915 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
916 lwrk_zgesvjv = int( cdummy(1) )
917 CALL zunmqr( 'L', 'N', n, n, n, cdummy, n,
918 $ cdummy,
919 $ v, ldv, cdummy, -1, ierr )
920 lwrk_zunmqr = int( cdummy(1) )
921 CALL zunmqr( 'L', 'N', m, n, n, a, lda, cdummy,
922 $ u,
923 $ ldu, cdummy, -1, ierr )
924 lwrk_zunmqrm = int( cdummy(1) )
925 IF ( errest ) THEN
926 optwrk = max( n+lwrk_zgeqp3, n+lwcon,
927 $ 2*n+lwrk_zgeqrf, 2*n+n**2,
928 $ 2*n+n**2+lwrk_zgesvjv,
929 $ 2*n+n**2+n+lwrk_zunmqr,n+lwrk_zunmqrm )
930 ELSE
931 optwrk = max( n+lwrk_zgeqp3, 2*n+lwrk_zgeqrf,
932 $ 2*n+n**2, 2*n+n**2+lwrk_zgesvjv,
933 $ 2*n+n**2+n+lwrk_zunmqr,
934 $ n+lwrk_zunmqrm )
935 END IF
936 END IF
937 END IF
938 IF ( l2tran .OR. rowpiv ) THEN
939 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
940 ELSE
941 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
942 END IF
943 END IF
944 minwrk = max( 2, minwrk )
945 optwrk = max( minwrk, optwrk )
946 IF ( lwork .LT. minwrk .AND. (.NOT.lquery) ) info = - 17
947 IF ( lrwork .LT. minrwrk .AND. (.NOT.lquery) ) info = - 19
948 END IF
949*
950 IF ( info .NE. 0 ) THEN
951* #:(
952 CALL xerbla( 'ZGEJSV', - info )
953 RETURN
954 ELSE IF ( lquery ) THEN
955 cwork(1) = optwrk
956 cwork(2) = minwrk
957 rwork(1) = minrwrk
958 iwork(1) = max( 4, miniwrk )
959 RETURN
960 END IF
961*
962* Quick return for void matrix (Y3K safe)
963* #:)
964 IF ( ( m .EQ. 0 ) .OR. ( n .EQ. 0 ) ) THEN
965 iwork(1:4) = 0
966 rwork(1:7) = 0
967 RETURN
968 ENDIF
969*
970* Determine whether the matrix U should be M x N or M x M
971*
972 IF ( lsvec ) THEN
973 n1 = n
974 IF ( lsame( jobu, 'F' ) ) n1 = m
975 END IF
976*
977* Set numerical parameters
978*
979*! NOTE: Make sure DLAMCH() does not fail on the target architecture.
980*
981 epsln = dlamch('Epsilon')
982 sfmin = dlamch('SafeMinimum')
983 small = sfmin / epsln
984 big = dlamch('O')
985* BIG = ONE / SFMIN
986*
987* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
988*
989*(!) If necessary, scale SVA() to protect the largest norm from
990* overflow. It is possible that this scaling pushes the smallest
991* column norm left from the underflow threshold (extreme case).
992*
993 scalem = one / sqrt(dble(m)*dble(n))
994 noscal = .true.
995 goscal = .true.
996 DO 1874 p = 1, n
997 aapp = zero
998 aaqq = one
999 CALL zlassq( m, a(1,p), 1, aapp, aaqq )
1000 IF ( aapp .GT. big ) THEN
1001 info = - 9
1002 CALL xerbla( 'ZGEJSV', -info )
1003 RETURN
1004 END IF
1005 aaqq = sqrt(aaqq)
1006 IF ( ( aapp .LT. (big / aaqq) ) .AND. noscal ) THEN
1007 sva(p) = aapp * aaqq
1008 ELSE
1009 noscal = .false.
1010 sva(p) = aapp * ( aaqq * scalem )
1011 IF ( goscal ) THEN
1012 goscal = .false.
1013 CALL dscal( p-1, scalem, sva, 1 )
1014 END IF
1015 END IF
1016 1874 CONTINUE
1017*
1018 IF ( noscal ) scalem = one
1019*
1020 aapp = zero
1021 aaqq = big
1022 DO 4781 p = 1, n
1023 aapp = max( aapp, sva(p) )
1024 IF ( sva(p) .NE. zero ) aaqq = min( aaqq, sva(p) )
1025 4781 CONTINUE
1026*
1027* Quick return for zero M x N matrix
1028* #:)
1029 IF ( aapp .EQ. zero ) THEN
1030 IF ( lsvec ) CALL zlaset( 'G', m, n1, czero, cone, u, ldu )
1031 IF ( rsvec ) CALL zlaset( 'G', n, n, czero, cone, v, ldv )
1032 rwork(1) = one
1033 rwork(2) = one
1034 IF ( errest ) rwork(3) = one
1035 IF ( lsvec .AND. rsvec ) THEN
1036 rwork(4) = one
1037 rwork(5) = one
1038 END IF
1039 IF ( l2tran ) THEN
1040 rwork(6) = zero
1041 rwork(7) = zero
1042 END IF
1043 iwork(1) = 0
1044 iwork(2) = 0
1045 iwork(3) = 0
1046 iwork(4) = -1
1047 RETURN
1048 END IF
1049*
1050* Issue warning if denormalized column norms detected. Override the
1051* high relative accuracy request. Issue licence to kill nonzero columns
1052* (set them to zero) whose norm is less than sigma_max / BIG (roughly).
1053* #:(
1054 warning = 0
1055 IF ( aaqq .LE. sfmin ) THEN
1056 l2rank = .true.
1057 l2kill = .true.
1058 warning = 1
1059 END IF
1060*
1061* Quick return for one-column matrix
1062* #:)
1063 IF ( n .EQ. 1 ) THEN
1064*
1065 IF ( lsvec ) THEN
1066 CALL zlascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr )
1067 CALL zlacpy( 'A', m, 1, a, lda, u, ldu )
1068* computing all M left singular vectors of the M x 1 matrix
1069 IF ( n1 .NE. n ) THEN
1070 CALL zgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,
1071 $ ierr )
1072 CALL zungqr( m,n1,1, u,ldu,cwork,cwork(n+1),lwork-n,
1073 $ ierr )
1074 CALL zcopy( m, a(1,1), 1, u(1,1), 1 )
1075 END IF
1076 END IF
1077 IF ( rsvec ) THEN
1078 v(1,1) = cone
1079 END IF
1080 IF ( sva(1) .LT. (big*scalem) ) THEN
1081 sva(1) = sva(1) / scalem
1082 scalem = one
1083 END IF
1084 rwork(1) = one / scalem
1085 rwork(2) = one
1086 IF ( sva(1) .NE. zero ) THEN
1087 iwork(1) = 1
1088 IF ( ( sva(1) / scalem) .GE. sfmin ) THEN
1089 iwork(2) = 1
1090 ELSE
1091 iwork(2) = 0
1092 END IF
1093 ELSE
1094 iwork(1) = 0
1095 iwork(2) = 0
1096 END IF
1097 iwork(3) = 0
1098 iwork(4) = -1
1099 IF ( errest ) rwork(3) = one
1100 IF ( lsvec .AND. rsvec ) THEN
1101 rwork(4) = one
1102 rwork(5) = one
1103 END IF
1104 IF ( l2tran ) THEN
1105 rwork(6) = zero
1106 rwork(7) = zero
1107 END IF
1108 RETURN
1109*
1110 END IF
1111*
1112 transp = .false.
1113*
1114 aatmax = -one
1115 aatmin = big
1116 IF ( rowpiv .OR. l2tran ) THEN
1117*
1118* Compute the row norms, needed to determine row pivoting sequence
1119* (in the case of heavily row weighted A, row pivoting is strongly
1120* advised) and to collect information needed to compare the
1121* structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.).
1122*
1123 IF ( l2tran ) THEN
1124 DO 1950 p = 1, m
1125 xsc = zero
1126 temp1 = one
1127 CALL zlassq( n, a(p,1), lda, xsc, temp1 )
1128* ZLASSQ gets both the ell_2 and the ell_infinity norm
1129* in one pass through the vector
1130 rwork(m+p) = xsc * scalem
1131 rwork(p) = xsc * (scalem*sqrt(temp1))
1132 aatmax = max( aatmax, rwork(p) )
1133 IF (rwork(p) .NE. zero)
1134 $ aatmin = min(aatmin,rwork(p))
1135 1950 CONTINUE
1136 ELSE
1137 DO 1904 p = 1, m
1138 rwork(m+p) = scalem*abs( a(p,izamax(n,a(p,1),lda)) )
1139 aatmax = max( aatmax, rwork(m+p) )
1140 aatmin = min( aatmin, rwork(m+p) )
1141 1904 CONTINUE
1142 END IF
1143*
1144 END IF
1145*
1146* For square matrix A try to determine whether A^* would be better
1147* input for the preconditioned Jacobi SVD, with faster convergence.
1148* The decision is based on an O(N) function of the vector of column
1149* and row norms of A, based on the Shannon entropy. This should give
1150* the right choice in most cases when the difference actually matters.
1151* It may fail and pick the slower converging side.
1152*
1153 entra = zero
1154 entrat = zero
1155 IF ( l2tran ) THEN
1156*
1157 xsc = zero
1158 temp1 = one
1159 CALL dlassq( n, sva, 1, xsc, temp1 )
1160 temp1 = one / temp1
1161*
1162 entra = zero
1163 DO 1113 p = 1, n
1164 big1 = ( ( sva(p) / xsc )**2 ) * temp1
1165 IF ( big1 .NE. zero ) entra = entra + big1 * dlog(big1)
1166 1113 CONTINUE
1167 entra = - entra / dlog(dble(n))
1168*
1169* Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex.
1170* It is derived from the diagonal of A^* * A. Do the same with the
1171* diagonal of A * A^*, compute the entropy of the corresponding
1172* probability distribution. Note that A * A^* and A^* * A have the
1173* same trace.
1174*
1175 entrat = zero
1176 DO 1114 p = 1, m
1177 big1 = ( ( rwork(p) / xsc )**2 ) * temp1
1178 IF ( big1 .NE. zero ) entrat = entrat + big1 * dlog(big1)
1179 1114 CONTINUE
1180 entrat = - entrat / dlog(dble(m))
1181*
1182* Analyze the entropies and decide A or A^*. Smaller entropy
1183* usually means better input for the algorithm.
1184*
1185 transp = ( entrat .LT. entra )
1186*
1187* If A^* is better than A, take the adjoint of A. This is allowed
1188* only for square matrices, M=N.
1189 IF ( transp ) THEN
1190* In an optimal implementation, this trivial transpose
1191* should be replaced with faster transpose.
1192 DO 1115 p = 1, n - 1
1193 a(p,p) = conjg(a(p,p))
1194 DO 1116 q = p + 1, n
1195 ctemp = conjg(a(q,p))
1196 a(q,p) = conjg(a(p,q))
1197 a(p,q) = ctemp
1198 1116 CONTINUE
1199 1115 CONTINUE
1200 a(n,n) = conjg(a(n,n))
1201 DO 1117 p = 1, n
1202 rwork(m+p) = sva(p)
1203 sva(p) = rwork(p)
1204* previously computed row 2-norms are now column 2-norms
1205* of the transposed matrix
1206 1117 CONTINUE
1207 temp1 = aapp
1208 aapp = aatmax
1209 aatmax = temp1
1210 temp1 = aaqq
1211 aaqq = aatmin
1212 aatmin = temp1
1213 kill = lsvec
1214 lsvec = rsvec
1215 rsvec = kill
1216 IF ( lsvec ) n1 = n
1217*
1218 rowpiv = .true.
1219 END IF
1220*
1221 END IF
1222* END IF L2TRAN
1223*
1224* Scale the matrix so that its maximal singular value remains less
1225* than SQRT(BIG) -- the matrix is scaled so that its maximal column
1226* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep
1227* SQRT(BIG) instead of BIG is the fact that ZGEJSV uses LAPACK and
1228* BLAS routines that, in some implementations, are not capable of
1229* working in the full interval [SFMIN,BIG] and that they may provoke
1230* overflows in the intermediate results. If the singular values spread
1231* from SFMIN to BIG, then ZGESVJ will compute them. So, in that case,
1232* one should use ZGESVJ instead of ZGEJSV.
1233* >> change in the April 2016 update: allow bigger range, i.e. the
1234* largest column is allowed up to BIG/N and ZGESVJ will do the rest.
1235 big1 = sqrt( big )
1236 temp1 = sqrt( big / dble(n) )
1237* TEMP1 = BIG/DBLE(N)
1238*
1239 CALL dlascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr )
1240 IF ( aaqq .GT. (aapp * sfmin) ) THEN
1241 aaqq = ( aaqq / aapp ) * temp1
1242 ELSE
1243 aaqq = ( aaqq * temp1 ) / aapp
1244 END IF
1245 temp1 = temp1 * scalem
1246 CALL zlascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr )
1247*
1248* To undo scaling at the end of this procedure, multiply the
1249* computed singular values with USCAL2 / USCAL1.
1250*
1251 uscal1 = temp1
1252 uscal2 = aapp
1253*
1254 IF ( l2kill ) THEN
1255* L2KILL enforces computation of nonzero singular values in
1256* the restricted range of condition number of the initial A,
1257* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN).
1258 xsc = sqrt( sfmin )
1259 ELSE
1260 xsc = small
1261*
1262* Now, if the condition number of A is too big,
1263* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN,
1264* as a precaution measure, the full SVD is computed using ZGESVJ
1265* with accumulated Jacobi rotations. This provides numerically
1266* more robust computation, at the cost of slightly increased run
1267* time. Depending on the concrete implementation of BLAS and LAPACK
1268* (i.e. how they behave in presence of extreme ill-conditioning) the
1269* implementor may decide to remove this switch.
1270 IF ( ( aaqq.LT.sqrt(sfmin) ) .AND. lsvec .AND. rsvec ) THEN
1271 jracc = .true.
1272 END IF
1273*
1274 END IF
1275 IF ( aaqq .LT. xsc ) THEN
1276 DO 700 p = 1, n
1277 IF ( sva(p) .LT. xsc ) THEN
1278 CALL zlaset( 'A', m, 1, czero, czero, a(1,p), lda )
1279 sva(p) = zero
1280 END IF
1281 700 CONTINUE
1282 END IF
1283*
1284* Preconditioning using QR factorization with pivoting
1285*
1286 IF ( rowpiv ) THEN
1287* Optional row permutation (Bjoerck row pivoting):
1288* A result by Cox and Higham shows that the Bjoerck's
1289* row pivoting combined with standard column pivoting
1290* has similar effect as Powell-Reid complete pivoting.
1291* The ell-infinity norms of A are made nonincreasing.
1292 IF ( ( lsvec .AND. rsvec ) .AND. .NOT.( jracc ) ) THEN
1293 iwoff = 2*n
1294 ELSE
1295 iwoff = n
1296 END IF
1297 DO 1952 p = 1, m - 1
1298 q = idamax( m-p+1, rwork(m+p), 1 ) + p - 1
1299 iwork(iwoff+p) = q
1300 IF ( p .NE. q ) THEN
1301 temp1 = rwork(m+p)
1302 rwork(m+p) = rwork(m+q)
1303 rwork(m+q) = temp1
1304 END IF
1305 1952 CONTINUE
1306 CALL zlaswp( n, a, lda, 1, m-1, iwork(iwoff+1), 1 )
1307 END IF
1308*
1309* End of the preparation phase (scaling, optional sorting and
1310* transposing, optional flushing of small columns).
1311*
1312* Preconditioning
1313*
1314* If the full SVD is needed, the right singular vectors are computed
1315* from a matrix equation, and for that we need theoretical analysis
1316* of the Businger-Golub pivoting. So we use ZGEQP3 as the first RR QRF.
1317* In all other cases the first RR QRF can be chosen by other criteria
1318* (eg speed by replacing global with restricted window pivoting, such
1319* as in xGEQPX from TOMS # 782). Good results will be obtained using
1320* xGEQPX with properly (!) chosen numerical parameters.
1321* Any improvement of ZGEQP3 improves overall performance of ZGEJSV.
1322*
1323* A * P1 = Q1 * [ R1^* 0]^*:
1324 DO 1963 p = 1, n
1325* .. all columns are free columns
1326 iwork(p) = 0
1327 1963 CONTINUE
1328 CALL zgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lwork-n,
1329 $ rwork, ierr )
1330*
1331* The upper triangular matrix R1 from the first QRF is inspected for
1332* rank deficiency and possibilities for deflation, or possible
1333* ill-conditioning. Depending on the user specified flag L2RANK,
1334* the procedure explores possibilities to reduce the numerical
1335* rank by inspecting the computed upper triangular factor. If
1336* L2RANK or L2ABER are up, then ZGEJSV will compute the SVD of
1337* A + dA, where ||dA|| <= f(M,N)*EPSLN.
1338*
1339 nr = 1
1340 IF ( l2aber ) THEN
1341* Standard absolute error bound suffices. All sigma_i with
1342* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
1343* aggressive enforcement of lower numerical rank by introducing a
1344* backward error of the order of N*EPSLN*||A||.
1345 temp1 = sqrt(dble(n))*epsln
1346 DO 3001 p = 2, n
1347 IF ( abs(a(p,p)) .GE. (temp1*abs(a(1,1))) ) THEN
1348 nr = nr + 1
1349 ELSE
1350 GO TO 3002
1351 END IF
1352 3001 CONTINUE
1353 3002 CONTINUE
1354 ELSE IF ( l2rank ) THEN
1355* .. similarly as above, only slightly more gentle (less aggressive).
1356* Sudden drop on the diagonal of R1 is used as the criterion for
1357* close-to-rank-deficient.
1358 temp1 = sqrt(sfmin)
1359 DO 3401 p = 2, n
1360 IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
1361 $ ( abs(a(p,p)) .LT. small ) .OR.
1362 $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3402
1363 nr = nr + 1
1364 3401 CONTINUE
1365 3402 CONTINUE
1366*
1367 ELSE
1368* The goal is high relative accuracy. However, if the matrix
1369* has high scaled condition number the relative accuracy is in
1370* general not feasible. Later on, a condition number estimator
1371* will be deployed to estimate the scaled condition number.
1372* Here we just remove the underflowed part of the triangular
1373* factor. This prevents the situation in which the code is
1374* working hard to get the accuracy not warranted by the data.
1375 temp1 = sqrt(sfmin)
1376 DO 3301 p = 2, n
1377 IF ( ( abs(a(p,p)) .LT. small ) .OR.
1378 $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3302
1379 nr = nr + 1
1380 3301 CONTINUE
1381 3302 CONTINUE
1382*
1383 END IF
1384*
1385 almort = .false.
1386 IF ( nr .EQ. n ) THEN
1387 maxprj = one
1388 DO 3051 p = 2, n
1389 temp1 = abs(a(p,p)) / sva(iwork(p))
1390 maxprj = min( maxprj, temp1 )
1391 3051 CONTINUE
1392 IF ( maxprj**2 .GE. one - dble(n)*epsln ) almort = .true.
1393 END IF
1394*
1395*
1396 sconda = - one
1397 condr1 = - one
1398 condr2 = - one
1399*
1400 IF ( errest ) THEN
1401 IF ( n .EQ. nr ) THEN
1402 IF ( rsvec ) THEN
1403* .. V is available as workspace
1404 CALL zlacpy( 'U', n, n, a, lda, v, ldv )
1405 DO 3053 p = 1, n
1406 temp1 = sva(iwork(p))
1407 CALL zdscal( p, one/temp1, v(1,p), 1 )
1408 3053 CONTINUE
1409 IF ( lsvec )THEN
1410 CALL zpocon( 'U', n, v, ldv, one, temp1,
1411 $ cwork(n+1), rwork, ierr )
1412 ELSE
1413 CALL zpocon( 'U', n, v, ldv, one, temp1,
1414 $ cwork, rwork, ierr )
1415 END IF
1416*
1417 ELSE IF ( lsvec ) THEN
1418* .. U is available as workspace
1419 CALL zlacpy( 'U', n, n, a, lda, u, ldu )
1420 DO 3054 p = 1, n
1421 temp1 = sva(iwork(p))
1422 CALL zdscal( p, one/temp1, u(1,p), 1 )
1423 3054 CONTINUE
1424 CALL zpocon( 'U', n, u, ldu, one, temp1,
1425 $ cwork(n+1), rwork, ierr )
1426 ELSE
1427 CALL zlacpy( 'U', n, n, a, lda, cwork, n )
1428*[] CALL ZLACPY( 'U', N, N, A, LDA, CWORK(N+1), N )
1429* Change: here index shifted by N to the left, CWORK(1:N)
1430* not needed for SIGMA only computation
1431 DO 3052 p = 1, n
1432 temp1 = sva(iwork(p))
1433*[] CALL ZDSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 )
1434 CALL zdscal( p, one/temp1, cwork((p-1)*n+1), 1 )
1435 3052 CONTINUE
1436* .. the columns of R are scaled to have unit Euclidean lengths.
1437*[] CALL ZPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1,
1438*[] $ CWORK(N+N*N+1), RWORK, IERR )
1439 CALL zpocon( 'U', n, cwork, n, one, temp1,
1440 $ cwork(n*n+1), rwork, ierr )
1441*
1442 END IF
1443 IF ( temp1 .NE. zero ) THEN
1444 sconda = one / sqrt(temp1)
1445 ELSE
1446 sconda = - one
1447 END IF
1448* SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
1449* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
1450 ELSE
1451 sconda = - one
1452 END IF
1453 END IF
1454*
1455 l2pert = l2pert .AND. ( abs( a(1,1)/a(nr,nr) ) .GT. sqrt(big1) )
1456* If there is no violent scaling, artificial perturbation is not needed.
1457*
1458* Phase 3:
1459*
1460 IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
1461*
1462* Singular Values only
1463*
1464* .. transpose A(1:NR,1:N)
1465 DO 1946 p = 1, min( n-1, nr )
1466 CALL zcopy( n-p, a(p,p+1), lda, a(p+1,p), 1 )
1467 CALL zlacgv( n-p+1, a(p,p), 1 )
1468 1946 CONTINUE
1469 IF ( nr .EQ. n ) a(n,n) = conjg(a(n,n))
1470*
1471* The following two DO-loops introduce small relative perturbation
1472* into the strict upper triangle of the lower triangular matrix.
1473* Small entries below the main diagonal are also changed.
1474* This modification is useful if the computing environment does not
1475* provide/allow FLUSH TO ZERO underflow, for it prevents many
1476* annoying denormalized numbers in case of strongly scaled matrices.
1477* The perturbation is structured so that it does not introduce any
1478* new perturbation of the singular values, and it does not destroy
1479* the job done by the preconditioner.
1480* The licence for this perturbation is in the variable L2PERT, which
1481* should be .FALSE. if FLUSH TO ZERO underflow is active.
1482*
1483 IF ( .NOT. almort ) THEN
1484*
1485 IF ( l2pert ) THEN
1486* XSC = SQRT(SMALL)
1487 xsc = epsln / dble(n)
1488 DO 4947 q = 1, nr
1489 ctemp = dcmplx(xsc*abs(a(q,q)),zero)
1490 DO 4949 p = 1, n
1491 IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1492 $ .OR. ( p .LT. q ) )
1493* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1494 $ a(p,q) = ctemp
1495 4949 CONTINUE
1496 4947 CONTINUE
1497 ELSE
1498 CALL zlaset( 'U', nr-1,nr-1, czero,czero, a(1,2),lda )
1499 END IF
1500*
1501* .. second preconditioning using the QR factorization
1502*
1503 CALL zgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n,
1504 $ ierr )
1505*
1506* .. and transpose upper to lower triangular
1507 DO 1948 p = 1, nr - 1
1508 CALL zcopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 )
1509 CALL zlacgv( nr-p+1, a(p,p), 1 )
1510 1948 CONTINUE
1511*
1512 END IF
1513*
1514* Row-cyclic Jacobi SVD algorithm with column pivoting
1515*
1516* .. again some perturbation (a "background noise") is added
1517* to drown denormals
1518 IF ( l2pert ) THEN
1519* XSC = SQRT(SMALL)
1520 xsc = epsln / dble(n)
1521 DO 1947 q = 1, nr
1522 ctemp = dcmplx(xsc*abs(a(q,q)),zero)
1523 DO 1949 p = 1, nr
1524 IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1525 $ .OR. ( p .LT. q ) )
1526* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1527 $ a(p,q) = ctemp
1528 1949 CONTINUE
1529 1947 CONTINUE
1530 ELSE
1531 CALL zlaset( 'U', nr-1, nr-1, czero, czero, a(1,2),
1532 $ lda )
1533 END IF
1534*
1535* .. and one-sided Jacobi rotations are started on a lower
1536* triangular matrix (plus perturbation which is ignored in
1537* the part which destroys triangular form (confusing?!))
1538*
1539 CALL zgesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,
1540 $ n, v, ldv, cwork, lwork, rwork, lrwork, info )
1541*
1542 scalem = rwork(1)
1543 numrank = nint(rwork(2))
1544*
1545*
1546 ELSE IF ( ( rsvec .AND. ( .NOT. lsvec ) .AND. ( .NOT. jracc ) )
1547 $ .OR.
1548 $ ( jracc .AND. ( .NOT. lsvec ) .AND. ( nr .NE. n ) ) ) THEN
1549*
1550* -> Singular Values and Right Singular Vectors <-
1551*
1552 IF ( almort ) THEN
1553*
1554* .. in this case NR equals N
1555 DO 1998 p = 1, nr
1556 CALL zcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1557 CALL zlacgv( n-p+1, v(p,p), 1 )
1558 1998 CONTINUE
1559 CALL zlaset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv )
1560*
1561 CALL zgesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,
1562 $ cwork, lwork, rwork, lrwork, info )
1563 scalem = rwork(1)
1564 numrank = nint(rwork(2))
1565
1566 ELSE
1567*
1568* .. two more QR factorizations ( one QRF is not enough, two require
1569* accumulated product of Jacobi rotations, three are perfect )
1570*
1571 CALL zlaset( 'L', nr-1,nr-1, czero, czero, a(2,1), lda )
1572 CALL zgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n,
1573 $ ierr)
1574 CALL zlacpy( 'L', nr, nr, a, lda, v, ldv )
1575 CALL zlaset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv )
1576 CALL zgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1577 $ lwork-2*n, ierr )
1578 DO 8998 p = 1, nr
1579 CALL zcopy( nr-p+1, v(p,p), ldv, v(p,p), 1 )
1580 CALL zlacgv( nr-p+1, v(p,p), 1 )
1581 8998 CONTINUE
1582 CALL zlaset('U', nr-1, nr-1, czero, czero, v(1,2), ldv)
1583*
1584 CALL zgesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,
1585 $ ldu, cwork(n+1), lwork-n, rwork, lrwork, info )
1586 scalem = rwork(1)
1587 numrank = nint(rwork(2))
1588 IF ( nr .LT. n ) THEN
1589 CALL zlaset( 'A',n-nr, nr, czero,czero, v(nr+1,1),
1590 $ ldv )
1591 CALL zlaset( 'A',nr, n-nr, czero,czero, v(1,nr+1),
1592 $ ldv )
1593 CALL zlaset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),
1594 $ ldv )
1595 END IF
1596*
1597 CALL zunmlq( 'L', 'C', n, n, nr, a, lda, cwork,
1598 $ v, ldv, cwork(n+1), lwork-n, ierr )
1599*
1600 END IF
1601* .. permute the rows of V
1602* DO 8991 p = 1, N
1603* CALL ZCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA )
1604* 8991 CONTINUE
1605* CALL ZLACPY( 'All', N, N, A, LDA, V, LDV )
1606 CALL zlapmr( .false., n, n, v, ldv, iwork )
1607*
1608 IF ( transp ) THEN
1609 CALL zlacpy( 'A', n, n, v, ldv, u, ldu )
1610 END IF
1611*
1612 ELSE IF ( jracc .AND. (.NOT. lsvec) .AND. ( nr.EQ. n ) ) THEN
1613*
1614 CALL zlaset( 'L', n-1,n-1, czero, czero, a(2,1), lda )
1615*
1616 CALL zgesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,
1617 $ cwork, lwork, rwork, lrwork, info )
1618 scalem = rwork(1)
1619 numrank = nint(rwork(2))
1620 CALL zlapmr( .false., n, n, v, ldv, iwork )
1621*
1622 ELSE IF ( lsvec .AND. ( .NOT. rsvec ) ) THEN
1623*
1624* .. Singular Values and Left Singular Vectors ..
1625*
1626* .. second preconditioning step to avoid need to accumulate
1627* Jacobi rotations in the Jacobi iterations.
1628 DO 1965 p = 1, nr
1629 CALL zcopy( n-p+1, a(p,p), lda, u(p,p), 1 )
1630 CALL zlacgv( n-p+1, u(p,p), 1 )
1631 1965 CONTINUE
1632 CALL zlaset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1633*
1634 CALL zgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2*n+1),
1635 $ lwork-2*n, ierr )
1636*
1637 DO 1967 p = 1, nr - 1
1638 CALL zcopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 )
1639 CALL zlacgv( n-p+1, u(p,p), 1 )
1640 1967 CONTINUE
1641 CALL zlaset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1642*
1643 CALL zgesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,
1644 $ lda, cwork(n+1), lwork-n, rwork, lrwork, info )
1645 scalem = rwork(1)
1646 numrank = nint(rwork(2))
1647*
1648 IF ( nr .LT. m ) THEN
1649 CALL zlaset( 'A', m-nr, nr,czero, czero, u(nr+1,1),
1650 $ ldu )
1651 IF ( nr .LT. n1 ) THEN
1652 CALL zlaset( 'A',nr, n1-nr, czero, czero, u(1,nr+1),
1653 $ ldu )
1654 CALL zlaset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),
1655 $ ldu )
1656 END IF
1657 END IF
1658*
1659 CALL zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1660 $ ldu, cwork(n+1), lwork-n, ierr )
1661*
1662 IF ( rowpiv )
1663 $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
1664*
1665 DO 1974 p = 1, n1
1666 xsc = one / dznrm2( m, u(1,p), 1 )
1667 CALL zdscal( m, xsc, u(1,p), 1 )
1668 1974 CONTINUE
1669*
1670 IF ( transp ) THEN
1671 CALL zlacpy( 'A', n, n, u, ldu, v, ldv )
1672 END IF
1673*
1674 ELSE
1675*
1676* .. Full SVD ..
1677*
1678 IF ( .NOT. jracc ) THEN
1679*
1680 IF ( .NOT. almort ) THEN
1681*
1682* Second Preconditioning Step (QRF [with pivoting])
1683* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
1684* equivalent to an LQF CALL. Since in many libraries the QRF
1685* seems to be better optimized than the LQF, we do explicit
1686* transpose and use the QRF. This is subject to changes in an
1687* optimized implementation of ZGEJSV.
1688*
1689 DO 1968 p = 1, nr
1690 CALL zcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1691 CALL zlacgv( n-p+1, v(p,p), 1 )
1692 1968 CONTINUE
1693*
1694* .. the following two loops perturb small entries to avoid
1695* denormals in the second QR factorization, where they are
1696* as good as zeros. This is done to avoid painfully slow
1697* computation with denormals. The relative size of the perturbation
1698* is a parameter that can be changed by the implementer.
1699* This perturbation device will be obsolete on machines with
1700* properly implemented arithmetic.
1701* To switch it off, set L2PERT=.FALSE. To remove it from the
1702* code, remove the action under L2PERT=.TRUE., leave the ELSE part.
1703* The following two loops should be blocked and fused with the
1704* transposed copy above.
1705*
1706 IF ( l2pert ) THEN
1707 xsc = sqrt(small)
1708 DO 2969 q = 1, nr
1709 ctemp = dcmplx(xsc*abs( v(q,q) ),zero)
1710 DO 2968 p = 1, n
1711 IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1712 $ .OR. ( p .LT. q ) )
1713* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
1714 $ v(p,q) = ctemp
1715 IF ( p .LT. q ) v(p,q) = - v(p,q)
1716 2968 CONTINUE
1717 2969 CONTINUE
1718 ELSE
1719 CALL zlaset( 'U', nr-1, nr-1, czero, czero, v(1,2),
1720 $ ldv )
1721 END IF
1722*
1723* Estimate the row scaled condition number of R1
1724* (If R1 is rectangular, N > NR, then the condition number
1725* of the leading NR x NR submatrix is estimated.)
1726*
1727 CALL zlacpy( 'L', nr, nr, v, ldv, cwork(2*n+1), nr )
1728 DO 3950 p = 1, nr
1729 temp1 = dznrm2(nr-p+1,cwork(2*n+(p-1)*nr+p),1)
1730 CALL zdscal(nr-p+1,one/temp1,cwork(2*n+(p-1)*nr+p),1)
1731 3950 CONTINUE
1732 CALL zpocon('L',nr,cwork(2*n+1),nr,one,temp1,
1733 $ cwork(2*n+nr*nr+1),rwork,ierr)
1734 condr1 = one / sqrt(temp1)
1735* .. here need a second opinion on the condition number
1736* .. then assume worst case scenario
1737* R1 is OK for inverse <=> CONDR1 .LT. DBLE(N)
1738* more conservative <=> CONDR1 .LT. SQRT(DBLE(N))
1739*
1740 cond_ok = sqrt(sqrt(dble(nr)))
1741*[TP] COND_OK is a tuning parameter.
1742*
1743 IF ( condr1 .LT. cond_ok ) THEN
1744* .. the second QRF without pivoting. Note: in an optimized
1745* implementation, this QRF should be implemented as the QRF
1746* of a lower triangular matrix.
1747* R1^* = Q2 * R2
1748 CALL zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1749 $ lwork-2*n, ierr )
1750*
1751 IF ( l2pert ) THEN
1752 xsc = sqrt(small)/epsln
1753 DO 3959 p = 2, nr
1754 DO 3958 q = 1, p - 1
1755 ctemp=dcmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1756 $ zero)
1757 IF ( abs(v(q,p)) .LE. temp1 )
1758* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1759 $ v(q,p) = ctemp
1760 3958 CONTINUE
1761 3959 CONTINUE
1762 END IF
1763*
1764 IF ( nr .NE. n )
1765 $ CALL zlacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1766* .. save ...
1767*
1768* .. this transposed copy should be better than naive
1769 DO 1969 p = 1, nr - 1
1770 CALL zcopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 )
1771 CALL zlacgv(nr-p+1, v(p,p), 1 )
1772 1969 CONTINUE
1773 v(nr,nr)=conjg(v(nr,nr))
1774*
1775 condr2 = condr1
1776*
1777 ELSE
1778*
1779* .. ill-conditioned case: second QRF with pivoting
1780* Note that windowed pivoting would be equally good
1781* numerically, and more run-time efficient. So, in
1782* an optimal implementation, the next call to ZGEQP3
1783* should be replaced with eg. CALL ZGEQPX (ACM TOMS #782)
1784* with properly (carefully) chosen parameters.
1785*
1786* R1^* * P2 = Q2 * R2
1787 DO 3003 p = 1, nr
1788 iwork(n+p) = 0
1789 3003 CONTINUE
1790 CALL zgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),
1791 $ cwork(2*n+1), lwork-2*n, rwork, ierr )
1792** CALL ZGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
1793** $ LWORK-2*N, IERR )
1794 IF ( l2pert ) THEN
1795 xsc = sqrt(small)
1796 DO 3969 p = 2, nr
1797 DO 3968 q = 1, p - 1
1798 ctemp=dcmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1799 $ zero)
1800 IF ( abs(v(q,p)) .LE. temp1 )
1801* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1802 $ v(q,p) = ctemp
1803 3968 CONTINUE
1804 3969 CONTINUE
1805 END IF
1806*
1807 CALL zlacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1808*
1809 IF ( l2pert ) THEN
1810 xsc = sqrt(small)
1811 DO 8970 p = 2, nr
1812 DO 8971 q = 1, p - 1
1813 ctemp=dcmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1814 $ zero)
1815* V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) )
1816 v(p,q) = - ctemp
1817 8971 CONTINUE
1818 8970 CONTINUE
1819 ELSE
1820 CALL zlaset( 'L',nr-1,nr-1,czero,czero,v(2,1),ldv )
1821 END IF
1822* Now, compute R2 = L3 * Q3, the LQ factorization.
1823 CALL zgelqf( nr, nr, v, ldv, cwork(2*n+n*nr+1),
1824 $ cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, ierr )
1825* .. and estimate the condition number
1826 CALL zlacpy( 'L',nr,nr,v,ldv,cwork(2*n+n*nr+nr+1),nr )
1827 DO 4950 p = 1, nr
1828 temp1 = dznrm2( p, cwork(2*n+n*nr+nr+p), nr )
1829 CALL zdscal( p, one/temp1, cwork(2*n+n*nr+nr+p),
1830 $ nr )
1831 4950 CONTINUE
1832 CALL zpocon( 'L',nr,cwork(2*n+n*nr+nr+1),nr,one,temp1,
1833 $ cwork(2*n+n*nr+nr+nr*nr+1),rwork,ierr )
1834 condr2 = one / sqrt(temp1)
1835*
1836*
1837 IF ( condr2 .GE. cond_ok ) THEN
1838* .. save the Householder vectors used for Q3
1839* (this overwrites the copy of R2, as it will not be
1840* needed in this branch, but it does not overwrite the
1841* Huseholder vectors of Q2.).
1842 CALL zlacpy( 'U', nr, nr, v, ldv, cwork(2*n+1), n )
1843* .. and the rest of the information on Q3 is in
1844* WORK(2*N+N*NR+1:2*N+N*NR+N)
1845 END IF
1846*
1847 END IF
1848*
1849 IF ( l2pert ) THEN
1850 xsc = sqrt(small)
1851 DO 4968 q = 2, nr
1852 ctemp = xsc * v(q,q)
1853 DO 4969 p = 1, q - 1
1854* V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) )
1855 v(p,q) = - ctemp
1856 4969 CONTINUE
1857 4968 CONTINUE
1858 ELSE
1859 CALL zlaset( 'U', nr-1,nr-1, czero,czero, v(1,2),
1860 $ ldv )
1861 END IF
1862*
1863* Second preconditioning finished; continue with Jacobi SVD
1864* The input matrix is lower triangular.
1865*
1866* Recover the right singular vectors as solution of a well
1867* conditioned triangular matrix equation.
1868*
1869 IF ( condr1 .LT. cond_ok ) THEN
1870*
1871 CALL zgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,
1872 $ cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,rwork,
1873 $ lrwork, info )
1874 scalem = rwork(1)
1875 numrank = nint(rwork(2))
1876 DO 3970 p = 1, nr
1877 CALL zcopy( nr, v(1,p), 1, u(1,p), 1 )
1878 CALL zdscal( nr, sva(p), v(1,p), 1 )
1879 3970 CONTINUE
1880
1881* .. pick the right matrix equation and solve it
1882*
1883 IF ( nr .EQ. n ) THEN
1884* :)) .. best case, R1 is inverted. The solution of this matrix
1885* equation is Q2*V2 = the product of the Jacobi rotations
1886* used in ZGESVJ, premultiplied with the orthogonal matrix
1887* from the second QR factorization.
1888 CALL ztrsm('L','U','N','N', nr,nr,cone, a,lda, v,
1889 $ ldv)
1890 ELSE
1891* .. R1 is well conditioned, but non-square. Adjoint of R2
1892* is inverted to get the product of the Jacobi rotations
1893* used in ZGESVJ. The Q-factor from the second QR
1894* factorization is then built in explicitly.
1895 CALL ztrsm('L','U','C','N',nr,nr,cone,cwork(2*n+1),
1896 $ n,v,ldv)
1897 IF ( nr .LT. n ) THEN
1898 CALL zlaset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv)
1899 CALL zlaset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv)
1900 CALL zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),
1901 $ ldv)
1902 END IF
1903 CALL zunmqr('L','N',n,n,nr,cwork(2*n+1),n,
1904 $ cwork(n+1),
1905 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr)
1906 END IF
1907*
1908 ELSE IF ( condr2 .LT. cond_ok ) THEN
1909*
1910* The matrix R2 is inverted. The solution of the matrix equation
1911* is Q3^* * V3 = the product of the Jacobi rotations (applied to
1912* the lower triangular L3 from the LQ factorization of
1913* R2=L3*Q3), pre-multiplied with the transposed Q3.
1914 CALL zgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr,
1915 $ u,
1916 $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1917 $ rwork, lrwork, info )
1918 scalem = rwork(1)
1919 numrank = nint(rwork(2))
1920 DO 3870 p = 1, nr
1921 CALL zcopy( nr, v(1,p), 1, u(1,p), 1 )
1922 CALL zdscal( nr, sva(p), u(1,p), 1 )
1923 3870 CONTINUE
1924 CALL ztrsm('L','U','N','N',nr,nr,cone,cwork(2*n+1),n,
1925 $ u,ldu)
1926* .. apply the permutation from the second QR factorization
1927 DO 873 q = 1, nr
1928 DO 872 p = 1, nr
1929 cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1930 872 CONTINUE
1931 DO 874 p = 1, nr
1932 u(p,q) = cwork(2*n+n*nr+nr+p)
1933 874 CONTINUE
1934 873 CONTINUE
1935 IF ( nr .LT. n ) THEN
1936 CALL zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),
1937 $ ldv )
1938 CALL zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),
1939 $ ldv )
1940 CALL zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),
1941 $ ldv)
1942 END IF
1943 CALL zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1944 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1945 ELSE
1946* Last line of defense.
1947* #:( This is a rather pathological case: no scaled condition
1948* improvement after two pivoted QR factorizations. Other
1949* possibility is that the rank revealing QR factorization
1950* or the condition estimator has failed, or the COND_OK
1951* is set very close to ONE (which is unnecessary). Normally,
1952* this branch should never be executed, but in rare cases of
1953* failure of the RRQR or condition estimator, the last line of
1954* defense ensures that ZGEJSV completes the task.
1955* Compute the full SVD of L3 using ZGESVJ with explicit
1956* accumulation of Jacobi rotations.
1957 CALL zgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr,
1958 $ u,
1959 $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1960 $ rwork, lrwork, info )
1961 scalem = rwork(1)
1962 numrank = nint(rwork(2))
1963 IF ( nr .LT. n ) THEN
1964 CALL zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),
1965 $ ldv )
1966 CALL zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),
1967 $ ldv )
1968 CALL zlaset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),
1969 $ ldv)
1970 END IF
1971 CALL zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1972 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1973*
1974 CALL zunmlq( 'L', 'C', nr, nr, nr, cwork(2*n+1), n,
1975 $ cwork(2*n+n*nr+1), u, ldu, cwork(2*n+n*nr+nr+1),
1976 $ lwork-2*n-n*nr-nr, ierr )
1977 DO 773 q = 1, nr
1978 DO 772 p = 1, nr
1979 cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1980 772 CONTINUE
1981 DO 774 p = 1, nr
1982 u(p,q) = cwork(2*n+n*nr+nr+p)
1983 774 CONTINUE
1984 773 CONTINUE
1985*
1986 END IF
1987*
1988* Permute the rows of V using the (column) permutation from the
1989* first QRF. Also, scale the columns to make them unit in
1990* Euclidean norm. This applies to all cases.
1991*
1992 temp1 = sqrt(dble(n)) * epsln
1993 DO 1972 q = 1, n
1994 DO 972 p = 1, n
1995 cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
1996 972 CONTINUE
1997 DO 973 p = 1, n
1998 v(p,q) = cwork(2*n+n*nr+nr+p)
1999 973 CONTINUE
2000 xsc = one / dznrm2( n, v(1,q), 1 )
2001 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2002 $ CALL zdscal( n, xsc, v(1,q), 1 )
2003 1972 CONTINUE
2004* At this moment, V contains the right singular vectors of A.
2005* Next, assemble the left singular vector matrix U (M x N).
2006 IF ( nr .LT. m ) THEN
2007 CALL zlaset('A', m-nr, nr, czero, czero, u(nr+1,1),
2008 $ ldu)
2009 IF ( nr .LT. n1 ) THEN
2010 CALL zlaset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
2011 CALL zlaset('A',m-nr,n1-nr,czero,cone,
2012 $ u(nr+1,nr+1),ldu)
2013 END IF
2014 END IF
2015*
2016* The Q matrix from the first QRF is built into the left singular
2017* matrix U. This applies to all cases.
2018*
2019 CALL zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2020 $ ldu, cwork(n+1), lwork-n, ierr )
2021
2022* The columns of U are normalized. The cost is O(M*N) flops.
2023 temp1 = sqrt(dble(m)) * epsln
2024 DO 1973 p = 1, nr
2025 xsc = one / dznrm2( m, u(1,p), 1 )
2026 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2027 $ CALL zdscal( m, xsc, u(1,p), 1 )
2028 1973 CONTINUE
2029*
2030* If the initial QRF is computed with row pivoting, the left
2031* singular vectors must be adjusted.
2032*
2033 IF ( rowpiv )
2034 $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2035*
2036 ELSE
2037*
2038* .. the initial matrix A has almost orthogonal columns and
2039* the second QRF is not needed
2040*
2041 CALL zlacpy( 'U', n, n, a, lda, cwork(n+1), n )
2042 IF ( l2pert ) THEN
2043 xsc = sqrt(small)
2044 DO 5970 p = 2, n
2045 ctemp = xsc * cwork( n + (p-1)*n + p )
2046 DO 5971 q = 1, p - 1
2047* CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) /
2048* $ ABS(CWORK(N+(p-1)*N+q)) )
2049 cwork(n+(q-1)*n+p)=-ctemp
2050 5971 CONTINUE
2051 5970 CONTINUE
2052 ELSE
2053 CALL zlaset( 'L',n-1,n-1,czero,czero,cwork(n+2),n )
2054 END IF
2055*
2056 CALL zgesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,
2057 $ n, u, ldu, cwork(n+n*n+1), lwork-n-n*n, rwork, lrwork,
2058 $ info )
2059*
2060 scalem = rwork(1)
2061 numrank = nint(rwork(2))
2062 DO 6970 p = 1, n
2063 CALL zcopy( n, cwork(n+(p-1)*n+1), 1, u(1,p), 1 )
2064 CALL zdscal( n, sva(p), cwork(n+(p-1)*n+1), 1 )
2065 6970 CONTINUE
2066*
2067 CALL ztrsm( 'L', 'U', 'N', 'N', n, n,
2068 $ cone, a, lda, cwork(n+1), n )
2069 DO 6972 p = 1, n
2070 CALL zcopy( n, cwork(n+p), n, v(iwork(p),1), ldv )
2071 6972 CONTINUE
2072 temp1 = sqrt(dble(n))*epsln
2073 DO 6971 p = 1, n
2074 xsc = one / dznrm2( n, v(1,p), 1 )
2075 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2076 $ CALL zdscal( n, xsc, v(1,p), 1 )
2077 6971 CONTINUE
2078*
2079* Assemble the left singular vector matrix U (M x N).
2080*
2081 IF ( n .LT. m ) THEN
2082 CALL zlaset( 'A', m-n, n, czero, czero, u(n+1,1),
2083 $ ldu )
2084 IF ( n .LT. n1 ) THEN
2085 CALL zlaset('A',n, n1-n, czero, czero, u(1,n+1),
2086 $ ldu)
2087 CALL zlaset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),
2088 $ ldu)
2089 END IF
2090 END IF
2091 CALL zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2092 $ ldu, cwork(n+1), lwork-n, ierr )
2093 temp1 = sqrt(dble(m))*epsln
2094 DO 6973 p = 1, n1
2095 xsc = one / dznrm2( m, u(1,p), 1 )
2096 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2097 $ CALL zdscal( m, xsc, u(1,p), 1 )
2098 6973 CONTINUE
2099*
2100 IF ( rowpiv )
2101 $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2102*
2103 END IF
2104*
2105* end of the >> almost orthogonal case << in the full SVD
2106*
2107 ELSE
2108*
2109* This branch deploys a preconditioned Jacobi SVD with explicitly
2110* accumulated rotations. It is included as optional, mainly for
2111* experimental purposes. It does perform well, and can also be used.
2112* In this implementation, this branch will be automatically activated
2113* if the condition number sigma_max(A) / sigma_min(A) is predicted
2114* to be greater than the overflow threshold. This is because the
2115* a posteriori computation of the singular vectors assumes robust
2116* implementation of BLAS and some LAPACK procedures, capable of working
2117* in presence of extreme values, e.g. when the singular values spread from
2118* the underflow to the overflow threshold.
2119*
2120 DO 7968 p = 1, nr
2121 CALL zcopy( n-p+1, a(p,p), lda, v(p,p), 1 )
2122 CALL zlacgv( n-p+1, v(p,p), 1 )
2123 7968 CONTINUE
2124*
2125 IF ( l2pert ) THEN
2126 xsc = sqrt(small/epsln)
2127 DO 5969 q = 1, nr
2128 ctemp = dcmplx(xsc*abs( v(q,q) ),zero)
2129 DO 5968 p = 1, n
2130 IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
2131 $ .OR. ( p .LT. q ) )
2132* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
2133 $ v(p,q) = ctemp
2134 IF ( p .LT. q ) v(p,q) = - v(p,q)
2135 5968 CONTINUE
2136 5969 CONTINUE
2137 ELSE
2138 CALL zlaset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv )
2139 END IF
2140
2141 CALL zgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
2142 $ lwork-2*n, ierr )
2143 CALL zlacpy( 'L', n, nr, v, ldv, cwork(2*n+1), n )
2144*
2145 DO 7969 p = 1, nr
2146 CALL zcopy( nr-p+1, v(p,p), ldv, u(p,p), 1 )
2147 CALL zlacgv( nr-p+1, u(p,p), 1 )
2148 7969 CONTINUE
2149
2150 IF ( l2pert ) THEN
2151 xsc = sqrt(small/epsln)
2152 DO 9970 q = 2, nr
2153 DO 9971 p = 1, q - 1
2154 ctemp = dcmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),
2155 $ zero)
2156* U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) )
2157 u(p,q) = - ctemp
2158 9971 CONTINUE
2159 9970 CONTINUE
2160 ELSE
2161 CALL zlaset('U', nr-1, nr-1, czero, czero, u(1,2), ldu )
2162 END IF
2163
2164 CALL zgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,
2165 $ n, v, ldv, cwork(2*n+n*nr+1), lwork-2*n-n*nr,
2166 $ rwork, lrwork, info )
2167 scalem = rwork(1)
2168 numrank = nint(rwork(2))
2169
2170 IF ( nr .LT. n ) THEN
2171 CALL zlaset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
2172 CALL zlaset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
2173 CALL zlaset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv )
2174 END IF
2175
2176 CALL zunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
2177 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
2178*
2179* Permute the rows of V using the (column) permutation from the
2180* first QRF. Also, scale the columns to make them unit in
2181* Euclidean norm. This applies to all cases.
2182*
2183 temp1 = sqrt(dble(n)) * epsln
2184 DO 7972 q = 1, n
2185 DO 8972 p = 1, n
2186 cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
2187 8972 CONTINUE
2188 DO 8973 p = 1, n
2189 v(p,q) = cwork(2*n+n*nr+nr+p)
2190 8973 CONTINUE
2191 xsc = one / dznrm2( n, v(1,q), 1 )
2192 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2193 $ CALL zdscal( n, xsc, v(1,q), 1 )
2194 7972 CONTINUE
2195*
2196* At this moment, V contains the right singular vectors of A.
2197* Next, assemble the left singular vector matrix U (M x N).
2198*
2199 IF ( nr .LT. m ) THEN
2200 CALL zlaset( 'A', m-nr, nr, czero, czero, u(nr+1,1),
2201 $ ldu )
2202 IF ( nr .LT. n1 ) THEN
2203 CALL zlaset('A',nr, n1-nr, czero, czero, u(1,nr+1),
2204 $ ldu)
2205 CALL zlaset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),
2206 $ ldu)
2207 END IF
2208 END IF
2209*
2210 CALL zunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2211 $ ldu, cwork(n+1), lwork-n, ierr )
2212*
2213 IF ( rowpiv )
2214 $ CALL zlaswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2215*
2216*
2217 END IF
2218 IF ( transp ) THEN
2219* .. swap U and V because the procedure worked on A^*
2220 DO 6974 p = 1, n
2221 CALL zswap( n, u(1,p), 1, v(1,p), 1 )
2222 6974 CONTINUE
2223 END IF
2224*
2225 END IF
2226* end of the full SVD
2227*
2228* Undo scaling, if necessary (and possible)
2229*
2230 IF ( uscal2 .LE. (big/sva(1))*uscal1 ) THEN
2231 CALL dlascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n,
2232 $ ierr )
2233 uscal1 = one
2234 uscal2 = one
2235 END IF
2236*
2237 IF ( nr .LT. n ) THEN
2238 DO 3004 p = nr+1, n
2239 sva(p) = zero
2240 3004 CONTINUE
2241 END IF
2242*
2243 rwork(1) = uscal2 * scalem
2244 rwork(2) = uscal1
2245 IF ( errest ) rwork(3) = sconda
2246 IF ( lsvec .AND. rsvec ) THEN
2247 rwork(4) = condr1
2248 rwork(5) = condr2
2249 END IF
2250 IF ( l2tran ) THEN
2251 rwork(6) = entra
2252 rwork(7) = entrat
2253 END IF
2254*
2255 iwork(1) = nr
2256 iwork(2) = numrank
2257 iwork(3) = warning
2258 IF ( transp ) THEN
2259 iwork(4) = 1
2260 ELSE
2261 iwork(4) = -1
2262 END IF
2263
2264*
2265 RETURN
2266* ..
2267* .. END OF ZGEJSV
2268* ..
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine zcopy(n, zx, incx, zy, incy)
ZCOPY
Definition zcopy.f:81
subroutine zgelqf(m, n, a, lda, tau, work, lwork, info)
ZGELQF
Definition zgelqf.f:142
subroutine zgeqp3(m, n, a, lda, jpvt, tau, work, lwork, rwork, info)
ZGEQP3
Definition zgeqp3.f:157
subroutine zgeqrf(m, n, a, lda, tau, work, lwork, info)
ZGEQRF
Definition zgeqrf.f:144
subroutine zgesvj(joba, jobu, jobv, m, n, a, lda, sva, mv, v, ldv, cwork, lwork, rwork, lrwork, info)
ZGESVJ
Definition zgesvj.f:353
integer function idamax(n, dx, incx)
IDAMAX
Definition idamax.f:71
integer function izamax(n, zx, incx)
IZAMAX
Definition izamax.f:71
subroutine zlacgv(n, x, incx)
ZLACGV conjugates a complex vector.
Definition zlacgv.f:72
subroutine zlacpy(uplo, m, n, a, lda, b, ldb)
ZLACPY copies all or part of one two-dimensional array to another.
Definition zlacpy.f:101
double precision function dlamch(cmach)
DLAMCH
Definition dlamch.f:69
subroutine zlapmr(forwrd, m, n, x, ldx, k)
ZLAPMR rearranges rows of a matrix as specified by a permutation vector.
Definition zlapmr.f:102
subroutine zlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition zlascl.f:142
subroutine dlascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
DLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition dlascl.f:142
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition zlaset.f:104
subroutine zlassq(n, x, incx, scale, sumsq)
ZLASSQ updates a sum of squares represented in scaled form.
Definition zlassq.f90:122
subroutine dlassq(n, x, incx, scale, sumsq)
DLASSQ updates a sum of squares represented in scaled form.
Definition dlassq.f90:122
subroutine zlaswp(n, a, lda, k1, k2, ipiv, incx)
ZLASWP performs a series of row interchanges on a general rectangular matrix.
Definition zlaswp.f:113
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
real(wp) function dznrm2(n, x, incx)
DZNRM2
Definition dznrm2.f90:90
subroutine zpocon(uplo, n, a, lda, anorm, rcond, work, rwork, info)
ZPOCON
Definition zpocon.f:119
subroutine dscal(n, da, dx, incx)
DSCAL
Definition dscal.f:79
subroutine zdscal(n, da, zx, incx)
ZDSCAL
Definition zdscal.f:78
subroutine zswap(n, zx, incx, zy, incy)
ZSWAP
Definition zswap.f:81
subroutine ztrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
ZTRSM
Definition ztrsm.f:180
subroutine zungqr(m, n, k, a, lda, tau, work, lwork, info)
ZUNGQR
Definition zungqr.f:126
subroutine zunmlq(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
ZUNMLQ
Definition zunmlq.f:165
subroutine zunmqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
ZUNMQR
Definition zunmqr.f:165
Here is the call graph for this function:
Here is the caller graph for this function: