LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ cgejsv()

subroutine cgejsv ( character*1 joba,
character*1 jobu,
character*1 jobv,
character*1 jobr,
character*1 jobt,
character*1 jobp,
integer m,
integer n,
complex, dimension( lda, * ) a,
integer lda,
real, dimension( n ) sva,
complex, dimension( ldu, * ) u,
integer ldu,
complex, dimension( ldv, * ) v,
integer ldv,
complex, dimension( lwork ) cwork,
integer lwork,
real, dimension( lrwork ) rwork,
integer lrwork,
integer, dimension( * ) iwork,
integer info )

CGEJSV

Download CGEJSV + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> CGEJSV computes the singular value decomposition (SVD) of a complex M-by-N
!> matrix [A], where M >= N. The SVD of [A] is written as
!>
!>              [A] = [U] * [SIGMA] * [V]^*,
!>
!> where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
!> diagonal elements, [U] is an M-by-N (or M-by-M) unitary matrix, and
!> [V] is an N-by-N unitary matrix. The diagonal elements of [SIGMA] are
!> the singular values of [A]. The columns of [U] and [V] are the left and
!> the right singular vectors of [A], respectively. The matrices [U] and [V]
!> are computed and stored in the arrays U and V, respectively. The diagonal
!> of [SIGMA] is computed and stored in the array SVA.
!> 
Parameters
[in]JOBA
!>          JOBA is CHARACTER*1
!>         Specifies the level of accuracy:
!>       = 'C': This option works well (high relative accuracy) if A = B * D,
!>              with well-conditioned B and arbitrary diagonal matrix D.
!>              The accuracy cannot be spoiled by COLUMN scaling. The
!>              accuracy of the computed output depends on the condition of
!>              B, and the procedure aims at the best theoretical accuracy.
!>              The relative error max_{i=1:N}|d sigma_i| / sigma_i is
!>              bounded by f(M,N)*epsilon* cond(B), independent of D.
!>              The input matrix is preprocessed with the QRF with column
!>              pivoting. This initial preprocessing and preconditioning by
!>              a rank revealing QR factorization is common for all values of
!>              JOBA. Additional actions are specified as follows:
!>       = 'E': Computation as with 'C' with an additional estimate of the
!>              condition number of B. It provides a realistic error bound.
!>       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
!>              D1, D2, and well-conditioned matrix C, this option gives
!>              higher accuracy than the 'C' option. If the structure of the
!>              input matrix is not known, and relative accuracy is
!>              desirable, then this option is advisable. The input matrix A
!>              is preprocessed with QR factorization with FULL (row and
!>              column) pivoting.
!>       = 'G': Computation as with 'F' with an additional estimate of the
!>              condition number of B, where A=B*D. If A has heavily weighted
!>              rows, then using this condition number gives too pessimistic
!>              error bound.
!>       = 'A': Small singular values are not well determined by the data 
!>              and are considered as noisy; the matrix is treated as
!>              numerically rank deficient. The error in the computed
!>              singular values is bounded by f(m,n)*epsilon*||A||.
!>              The computed SVD A = U * S * V^* restores A up to
!>              f(m,n)*epsilon*||A||.
!>              This gives the procedure the licence to discard (set to zero)
!>              all singular values below N*epsilon*||A||.
!>       = 'R': Similar as in 'A'. Rank revealing property of the initial
!>              QR factorization is used do reveal (using triangular factor)
!>              a gap sigma_{r+1} < epsilon * sigma_r in which case the
!>              numerical RANK is declared to be r. The SVD is computed with
!>              absolute error bounds, but more accurately than with 'A'.
!> 
[in]JOBU
!>          JOBU is CHARACTER*1
!>         Specifies whether to compute the columns of U:
!>       = 'U': N columns of U are returned in the array U.
!>       = 'F': full set of M left sing. vectors is returned in the array U.
!>       = 'W': U may be used as workspace of length M*N. See the description
!>              of U.
!>       = 'N': U is not computed.
!> 
[in]JOBV
!>          JOBV is CHARACTER*1
!>         Specifies whether to compute the matrix V:
!>       = 'V': N columns of V are returned in the array V; Jacobi rotations
!>              are not explicitly accumulated.
!>       = 'J': N columns of V are returned in the array V, but they are
!>              computed as the product of Jacobi rotations, if JOBT = 'N'.
!>       = 'W': V may be used as workspace of length N*N. See the description
!>              of V.
!>       = 'N': V is not computed.
!> 
[in]JOBR
!>          JOBR is CHARACTER*1
!>         Specifies the RANGE for the singular values. Issues the licence to
!>         set to zero small positive singular values if they are outside
!>         specified range. If A .NE. 0 is scaled so that the largest singular
!>         value of c*A is around SQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
!>         the licence to kill columns of A whose norm in c*A is less than
!>         SQRT(SFMIN) (for JOBR = 'R'), or less than SMALL=SFMIN/EPSLN,
!>         where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
!>       = 'N': Do not kill small columns of c*A. This option assumes that
!>              BLAS and QR factorizations and triangular solvers are
!>              implemented to work in that range. If the condition of A
!>              is greater than BIG, use CGESVJ.
!>       = 'R': RESTRICTED range for sigma(c*A) is [SQRT(SFMIN), SQRT(BIG)]
!>              (roughly, as described above). This option is recommended.
!>                                             ===========================
!>         For computing the singular values in the FULL range [SFMIN,BIG]
!>         use CGESVJ.
!> 
[in]JOBT
!>          JOBT is CHARACTER*1
!>         If the matrix is square then the procedure may determine to use
!>         transposed A if A^* seems to be better with respect to convergence.
!>         If the matrix is not square, JOBT is ignored.
!>         The decision is based on two values of entropy over the adjoint
!>         orbit of A^* * A. See the descriptions of RWORK(6) and RWORK(7).
!>       = 'T': transpose if entropy test indicates possibly faster
!>         convergence of Jacobi process if A^* is taken as input. If A is
!>         replaced with A^*, then the row pivoting is included automatically.
!>       = 'N': do not speculate.
!>         The option 'T' can be used to compute only the singular values, or
!>         the full SVD (U, SIGMA and V). For only one set of singular vectors
!>         (U or V), the caller should provide both U and V, as one of the
!>         matrices is used as workspace if the matrix A is transposed.
!>         The implementer can easily remove this constraint and make the
!>         code more complicated. See the descriptions of U and V.
!>         In general, this option is considered experimental, and 'N'; should
!>         be preferred. This is subject to changes in the future.
!> 
[in]JOBP
!>          JOBP is CHARACTER*1
!>         Issues the licence to introduce structured perturbations to drown
!>         denormalized numbers. This licence should be active if the
!>         denormals are poorly implemented, causing slow computation,
!>         especially in cases of fast convergence (!). For details see [1,2].
!>         For the sake of simplicity, this perturbations are included only
!>         when the full SVD or only the singular values are requested. The
!>         implementer/user can easily add the perturbation for the cases of
!>         computing one set of singular vectors.
!>       = 'P': introduce perturbation
!>       = 'N': do not perturb
!> 
[in]M
!>          M is INTEGER
!>         The number of rows of the input matrix A.  M >= 0.
!> 
[in]N
!>          N is INTEGER
!>         The number of columns of the input matrix A. M >= N >= 0.
!> 
[in,out]A
!>          A is COMPLEX array, dimension (LDA,N)
!>          On entry, the M-by-N matrix A.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,M).
!> 
[out]SVA
!>          SVA is REAL array, dimension (N)
!>          On exit,
!>          - For RWORK(1)/RWORK(2) = ONE: The singular values of A. During
!>            the computation SVA contains Euclidean column norms of the
!>            iterated matrices in the array A.
!>          - For RWORK(1) .NE. RWORK(2): The singular values of A are
!>            (RWORK(1)/RWORK(2)) * SVA(1:N). This factored form is used if
!>            sigma_max(A) overflows or if small singular values have been
!>            saved from underflow by scaling the input matrix A.
!>          - If JOBR='R' then some of the singular values may be returned
!>            as exact zeros obtained by  because they are
!>            below the numerical rank threshold or are denormalized numbers.
!> 
[out]U
!>          U is COMPLEX array, dimension ( LDU, N ) or ( LDU, M )
!>          If JOBU = 'U', then U contains on exit the M-by-N matrix of
!>                         the left singular vectors.
!>          If JOBU = 'F', then U contains on exit the M-by-M matrix of
!>                         the left singular vectors, including an ONB
!>                         of the orthogonal complement of the Range(A).
!>          If JOBU = 'W'  .AND. (JOBV = 'V' .AND. JOBT = 'T' .AND. M = N),
!>                         then U is used as workspace if the procedure
!>                         replaces A with A^*. In that case, [V] is computed
!>                         in U as left singular vectors of A^* and then
!>                         copied back to the V array. This 'W' option is just
!>                         a reminder to the caller that in this case U is
!>                         reserved as workspace of length N*N.
!>          If JOBU = 'N'  U is not referenced, unless JOBT='T'.
!> 
[in]LDU
!>          LDU is INTEGER
!>          The leading dimension of the array U,  LDU >= 1.
!>          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
!> 
[out]V
!>          V is COMPLEX array, dimension ( LDV, N )
!>          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
!>                         the right singular vectors;
!>          If JOBV = 'W', AND (JOBU = 'U' AND JOBT = 'T' AND M = N),
!>                         then V is used as workspace if the procedure
!>                         replaces A with A^*. In that case, [U] is computed
!>                         in V as right singular vectors of A^* and then
!>                         copied back to the U array. This 'W' option is just
!>                         a reminder to the caller that in this case V is
!>                         reserved as workspace of length N*N.
!>          If JOBV = 'N'  V is not referenced, unless JOBT='T'.
!> 
[in]LDV
!>          LDV is INTEGER
!>          The leading dimension of the array V,  LDV >= 1.
!>          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
!> 
[out]CWORK
!>          CWORK is COMPLEX array, dimension (MAX(2,LWORK))
!>          If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit CWORK(1) contains the required length of 
!>          CWORK for the job parameters used in the call.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          Length of CWORK to confirm proper allocation of workspace.
!>          LWORK depends on the job:
!>
!>          1. If only SIGMA is needed ( JOBU = 'N', JOBV = 'N' ) and
!>            1.1 .. no scaled condition estimate required (JOBA.NE.'E'.AND.JOBA.NE.'G'):
!>               LWORK >= 2*N+1. This is the minimal requirement.
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= N + (N+1)*NB. Here NB is the optimal
!>               block size for CGEQP3 and CGEQRF.
!>               In general, optimal LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ)).        
!>            1.2. .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G'). In this case, LWORK the minimal
!>               requirement is LWORK >= N*N + 2*N.
!>               ->> For optimal performance (blocked code) the optimal value
!>               is LWORK >= max(N+(N+1)*NB, N*N+2*N)=N**2+2*N.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CGEQRF), LWORK(CGESVJ),
!>                            N*N+LWORK(CPOCON)).
!>          2. If SIGMA and the right singular vectors are needed (JOBV = 'V'),
!>             (JOBU = 'N')
!>            2.1   .. no scaled condition estimate requested (JOBE = 'N'):    
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance, 
!>               LWORK >= max(N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for CGEQP3, CGEQRF, CGELQF,
!>               CUNMLQ. In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3), N+LWORK(CGESVJ),
!>                       N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)).
!>            2.2 .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G').
!>            -> the minimal requirement is LWORK >= 3*N.      
!>            -> For optimal performance, 
!>               LWORK >= max(N+(N+1)*NB, 2*N,2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for CGEQP3, CGEQRF, CGELQF,
!>               CUNMLQ. In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3), LWORK(CPOCON), N+LWORK(CGESVJ),
!>                       N+LWORK(CGELQF), 2*N+LWORK(CGEQRF), N+LWORK(CUNMLQ)).   
!>          3. If SIGMA and the left singular vectors are needed
!>            3.1  .. no scaled condition estimate requested (JOBE = 'N'):
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance:
!>               if JOBU = 'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3), 2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)). 
!>            3.2  .. an estimate of the scaled condition number of A is
!>               required (JOBA='E', or 'G').
!>            -> the minimal requirement is LWORK >= 3*N.
!>            -> For optimal performance:
!>               if JOBU = 'U' :: LWORK >= max(3*N, N+(N+1)*NB, 2*N+N*NB)=2*N+N*NB,
!>               where NB is the optimal block size for CGEQP3, CGEQRF, CUNMQR.
!>               In general, the optimal length LWORK is computed as
!>               LWORK >= max(N+LWORK(CGEQP3),N+LWORK(CPOCON),
!>                        2*N+LWORK(CGEQRF), N+LWORK(CUNMQR)).
!>
!>          4. If the full SVD is needed: (JOBU = 'U' or JOBU = 'F') and
!>            4.1. if JOBV = 'V'
!>               the minimal requirement is LWORK >= 5*N+2*N*N.
!>            4.2. if JOBV = 'J' the minimal requirement is
!>               LWORK >= 4*N+N*N.
!>            In both cases, the allocated CWORK can accommodate blocked runs
!>            of CGEQP3, CGEQRF, CGELQF, CUNMQR, CUNMLQ.
!> 
!>          If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit CWORK(1) contains the optimal and CWORK(2) contains the
!>          minimal length of CWORK for the job parameters used in the call.        
!> 
[out]RWORK
!>          RWORK is REAL array, dimension (MAX(7,LRWORK))
!>          On exit,
!>          RWORK(1) = Determines the scaling factor SCALE = RWORK(2) / RWORK(1)
!>                    such that SCALE*SVA(1:N) are the computed singular values
!>                    of A. (See the description of SVA().)
!>          RWORK(2) = See the description of RWORK(1).
!>          RWORK(3) = SCONDA is an estimate for the condition number of
!>                    column equilibrated A. (If JOBA = 'E' or 'G')
!>                    SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
!>                    It is computed using CPOCON. It holds
!>                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
!>                    where R is the triangular factor from the QRF of A.
!>                    However, if R is truncated and the numerical rank is
!>                    determined to be strictly smaller than N, SCONDA is
!>                    returned as -1, thus indicating that the smallest
!>                    singular values might be lost.
!>
!>          If full SVD is needed, the following two condition numbers are
!>          useful for the analysis of the algorithm. They are provided for
!>          a developer/implementer who is familiar with the details of
!>          the method.
!>
!>          RWORK(4) = an estimate of the scaled condition number of the
!>                    triangular factor in the first QR factorization.
!>          RWORK(5) = an estimate of the scaled condition number of the
!>                    triangular factor in the second QR factorization.
!>          The following two parameters are computed if JOBT = 'T'.
!>          They are provided for a developer/implementer who is familiar
!>          with the details of the method.
!>          RWORK(6) = the entropy of A^* * A :: this is the Shannon entropy
!>                    of diag(A^* * A) / Trace(A^* * A) taken as point in the
!>                    probability simplex.
!>          RWORK(7) = the entropy of A * A^*. (See the description of RWORK(6).)
!>          If the call to CGEJSV is a workspace query (indicated by LWORK=-1 or
!>          LRWORK=-1), then on exit RWORK(1) contains the required length of
!>          RWORK for the job parameters used in the call.
!> 
[in]LRWORK
!>          LRWORK is INTEGER
!>          Length of RWORK to confirm proper allocation of workspace.
!>          LRWORK depends on the job:
!>
!>       1. If only the singular values are requested i.e. if
!>          LSAME(JOBU,'N') .AND. LSAME(JOBV,'N')
!>          then:
!>          1.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>               then: LRWORK = max( 7, 2 * M ).
!>          1.2. Otherwise, LRWORK  = max( 7,  N ).
!>       2. If singular values with the right singular vectors are requested
!>          i.e. if
!>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J')) .AND.
!>          .NOT.(LSAME(JOBU,'U').OR.LSAME(JOBU,'F'))
!>          then:
!>          2.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          2.2. Otherwise, LRWORK  = max( 7,  N ).
!>       3. If singular values with the left singular vectors are requested, i.e. if
!>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
!>          .NOT.(LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
!>          then:
!>          3.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          3.2. Otherwise, LRWORK  = max( 7,  N ).
!>       4. If singular values with both the left and the right singular vectors
!>          are requested, i.e. if
!>          (LSAME(JOBU,'U').OR.LSAME(JOBU,'F')) .AND.
!>          (LSAME(JOBV,'V').OR.LSAME(JOBV,'J'))
!>          then:
!>          4.1. If LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G'),
!>          then LRWORK = max( 7, 2 * M ).
!>          4.2. Otherwise, LRWORK  = max( 7, N ).
!> 
!>          If, on entry, LRWORK = -1 or LWORK=-1, a workspace query is assumed and 
!>          the length of RWORK is returned in RWORK(1). 
!> 
[out]IWORK
!>          IWORK is INTEGER array, of dimension at least 4, that further depends
!>          on the job:
!> 
!>          1. If only the singular values are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N.
!>          2. If the singular values and the right singular vectors are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>          3. If the singular values and the left singular vectors are requested then:
!>             If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>             then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>          4. If the singular values with both the left and the right singular vectors
!>             are requested, then:      
!>             4.1. If LSAME(JOBV,'J') the length of IWORK is determined as follows:
!>                  If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>                  then the length of IWORK is N+M; otherwise the length of IWORK is N. 
!>             4.2. If LSAME(JOBV,'V') the length of IWORK is determined as follows:
!>                  If ( LSAME(JOBT,'T') .OR. LSAME(JOBA,'F') .OR. LSAME(JOBA,'G') ) 
!>                  then the length of IWORK is 2*N+M; otherwise the length of IWORK is 2*N.
!>        
!>          On exit,
!>          IWORK(1) = the numerical rank determined after the initial
!>                     QR factorization with pivoting. See the descriptions
!>                     of JOBA and JOBR.
!>          IWORK(2) = the number of the computed nonzero singular values
!>          IWORK(3) = if nonzero, a warning message:
!>                     If IWORK(3) = 1 then some of the column norms of A
!>                     were denormalized floats. The requested high accuracy
!>                     is not warranted by the data.
!>          IWORK(4) = 1 or -1. If IWORK(4) = 1, then the procedure used A^* to
!>                     do the job as specified by the JOB parameters.
!>          If the call to CGEJSV is a workspace query (indicated by LWORK = -1 and 
!>          LRWORK = -1), then on exit IWORK(1) contains the required length of 
!>          IWORK for the job parameters used in the call.
!> 
[out]INFO
!>          INFO is INTEGER
!>           < 0:  if INFO = -i, then the i-th argument had an illegal value.
!>           = 0:  successful exit;
!>           > 0:  CGEJSV  did not converge in the maximal allowed number
!>                 of sweeps. The computed values may be inaccurate.
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>  CGEJSV implements a preconditioned Jacobi SVD algorithm. It uses CGEQP3,
!>  CGEQRF, and CGELQF as preprocessors and preconditioners. Optionally, an
!>  additional row pivoting can be used as a preprocessor, which in some
!>  cases results in much higher accuracy. An example is matrix A with the
!>  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
!>  diagonal matrices and C is well-conditioned matrix. In that case, complete
!>  pivoting in the first QR factorizations provides accuracy dependent on the
!>  condition number of C, and independent of D1, D2. Such higher accuracy is
!>  not completely understood theoretically, but it works well in practice.
!>  Further, if A can be written as A = B*D, with well-conditioned B and some
!>  diagonal D, then the high accuracy is guaranteed, both theoretically and
!>  in software, independent of D. For more details see [1], [2].
!>     The computational range for the singular values can be the full range
!>  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
!>  & LAPACK routines called by CGEJSV are implemented to work in that range.
!>  If that is not the case, then the restriction for safe computation with
!>  the singular values in the range of normalized IEEE numbers is that the
!>  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
!>  overflow. This code (CGEJSV) is best used in this restricted range,
!>  meaning that singular values of magnitude below ||A||_2 / SLAMCH('O') are
!>  returned as zeros. See JOBR for details on this.
!>     Further, this implementation is somewhat slower than the one described
!>  in [1,2] due to replacement of some non-LAPACK components, and because
!>  the choice of some tuning parameters in the iterative part (CGESVJ) is
!>  left to the implementer on a particular machine.
!>     The rank revealing QR factorization (in this code: CGEQP3) should be
!>  implemented as in [3]. We have a new version of CGEQP3 under development
!>  that is more robust than the current one in LAPACK, with a cleaner cut in
!>  rank deficient cases. It will be available in the SIGMA library [4].
!>  If M is much larger than N, it is obvious that the initial QRF with
!>  column pivoting can be preprocessed by the QRF without pivoting. That
!>  well known trick is not used in CGEJSV because in some cases heavy row
!>  weighting can be treated with complete pivoting. The overhead in cases
!>  M much larger than N is then only due to pivoting, but the benefits in
!>  terms of accuracy have prevailed. The implementer/user can incorporate
!>  this extra QRF step easily. The implementer can also improve data movement
!>  (matrix transpose, matrix copy, matrix transposed copy) - this
!>  implementation of CGEJSV uses only the simplest, naive data movement.
!> 
Contributor:
Zlatko Drmac (Zagreb, Croatia)
References:
!>
!> [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
!>     LAPACK Working note 169.
!> [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
!>     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
!>     LAPACK Working note 170.
!> [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
!>     factorization software - a case study.
!>     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
!>     LAPACK Working note 176.
!> [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
!>     QSVD, (H,K)-SVD computations.
!>     Department of Mathematics, University of Zagreb, 2008, 2016.
!> 
Bugs, examples and comments:
Please report all bugs and send interesting examples and/or comments to drmac.nosp@m.@mat.nosp@m.h.hr. Thank you.

Definition at line 563 of file cgejsv.f.

566*
567* -- LAPACK computational routine --
568* -- LAPACK is a software package provided by Univ. of Tennessee, --
569* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
570*
571* .. Scalar Arguments ..
572 IMPLICIT NONE
573 INTEGER INFO, LDA, LDU, LDV, LWORK, LRWORK, M, N
574* ..
575* .. Array Arguments ..
576 COMPLEX A( LDA, * ), U( LDU, * ), V( LDV, * ), CWORK( LWORK )
577 REAL SVA( N ), RWORK( LRWORK )
578 INTEGER IWORK( * )
579 CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
580* ..
581*
582* ===========================================================================
583*
584* .. Local Parameters ..
585 REAL ZERO, ONE
586 parameter( zero = 0.0e0, one = 1.0e0 )
587 COMPLEX CZERO, CONE
588 parameter( czero = ( 0.0e0, 0.0e0 ), cone = ( 1.0e0, 0.0e0 ) )
589* ..
590* .. Local Scalars ..
591 COMPLEX CTEMP
592 REAL AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK,
593 $ CONDR1, CONDR2, ENTRA, ENTRAT, EPSLN, MAXPRJ, SCALEM,
594 $ SCONDA, SFMIN, SMALL, TEMP1, USCAL1, USCAL2, XSC
595 INTEGER IERR, N1, NR, NUMRANK, p, q, WARNING
596 LOGICAL ALMORT, DEFR, ERREST, GOSCAL, JRACC, KILL, LQUERY,
597 $ LSVEC, L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN, NOSCAL,
598 $ ROWPIV, RSVEC, TRANSP
599*
600 INTEGER OPTWRK, MINWRK, MINRWRK, MINIWRK
601 INTEGER LWCON, LWLQF, LWQP3, LWQRF, LWUNMLQ, LWUNMQR, LWUNMQRM,
602 $ LWSVDJ, LWSVDJV, LRWQP3, LRWCON, LRWSVDJ, IWOFF
603 INTEGER LWRK_CGELQF, LWRK_CGEQP3, LWRK_CGEQP3N, LWRK_CGEQRF,
604 $ LWRK_CGESVJ, LWRK_CGESVJV, LWRK_CGESVJU, LWRK_CUNMLQ,
605 $ LWRK_CUNMQR, LWRK_CUNMQRM
606* ..
607* .. Local Arrays
608 COMPLEX CDUMMY(1)
609 REAL RDUMMY(1)
610*
611* .. Intrinsic Functions ..
612 INTRINSIC abs, cmplx, conjg, alog, max, min, real, nint, sqrt
613* ..
614* .. External Functions ..
615 REAL SLAMCH, SCNRM2
616 INTEGER ISAMAX, ICAMAX
617 LOGICAL LSAME
618 EXTERNAL isamax, icamax, lsame, slamch, scnrm2
619* ..
620* .. External Subroutines ..
621 EXTERNAL slassq, ccopy, cgelqf, cgeqp3, cgeqrf, clacpy,
625*
626 EXTERNAL cgesvj
627* ..
628*
629* Test the input arguments
630*
631 lsvec = lsame( jobu, 'U' ) .OR. lsame( jobu, 'F' )
632 jracc = lsame( jobv, 'J' )
633 rsvec = lsame( jobv, 'V' ) .OR. jracc
634 rowpiv = lsame( joba, 'F' ) .OR. lsame( joba, 'G' )
635 l2rank = lsame( joba, 'R' )
636 l2aber = lsame( joba, 'A' )
637 errest = lsame( joba, 'E' ) .OR. lsame( joba, 'G' )
638 l2tran = lsame( jobt, 'T' ) .AND. ( m .EQ. n )
639 l2kill = lsame( jobr, 'R' )
640 defr = lsame( jobr, 'N' )
641 l2pert = lsame( jobp, 'P' )
642*
643 lquery = ( lwork .EQ. -1 ) .OR. ( lrwork .EQ. -1 )
644*
645 IF ( .NOT.(rowpiv .OR. l2rank .OR. l2aber .OR.
646 $ errest .OR. lsame( joba, 'C' ) )) THEN
647 info = - 1
648 ELSE IF ( .NOT.( lsvec .OR. lsame( jobu, 'N' ) .OR.
649 $ ( lsame( jobu, 'W' ) .AND. rsvec .AND. l2tran ) ) ) THEN
650 info = - 2
651 ELSE IF ( .NOT.( rsvec .OR. lsame( jobv, 'N' ) .OR.
652 $ ( lsame( jobv, 'W' ) .AND. lsvec .AND. l2tran ) ) ) THEN
653 info = - 3
654 ELSE IF ( .NOT. ( l2kill .OR. defr ) ) THEN
655 info = - 4
656 ELSE IF ( .NOT. ( lsame(jobt,'T') .OR.
657 $ lsame(jobt,'N') ) ) THEN
658 info = - 5
659 ELSE IF ( .NOT. ( l2pert .OR. lsame( jobp, 'N' ) ) ) THEN
660 info = - 6
661 ELSE IF ( m .LT. 0 ) THEN
662 info = - 7
663 ELSE IF ( ( n .LT. 0 ) .OR. ( n .GT. m ) ) THEN
664 info = - 8
665 ELSE IF ( lda .LT. m ) THEN
666 info = - 10
667 ELSE IF ( lsvec .AND. ( ldu .LT. m ) ) THEN
668 info = - 13
669 ELSE IF ( rsvec .AND. ( ldv .LT. n ) ) THEN
670 info = - 15
671 ELSE
672* #:)
673 info = 0
674 END IF
675*
676 IF ( info .EQ. 0 ) THEN
677* .. compute the minimal and the optimal workspace lengths
678* [[The expressions for computing the minimal and the optimal
679* values of LCWORK, LRWORK are written with a lot of redundancy and
680* can be simplified. However, this verbose form is useful for
681* maintenance and modifications of the code.]]
682*
683* .. minimal workspace length for CGEQP3 of an M x N matrix,
684* CGEQRF of an N x N matrix, CGELQF of an N x N matrix,
685* CUNMLQ for computing N x N matrix, CUNMQR for computing N x N
686* matrix, CUNMQR for computing M x N matrix, respectively.
687 lwqp3 = n+1
688 lwqrf = max( 1, n )
689 lwlqf = max( 1, n )
690 lwunmlq = max( 1, n )
691 lwunmqr = max( 1, n )
692 lwunmqrm = max( 1, m )
693* .. minimal workspace length for CPOCON of an N x N matrix
694 lwcon = 2 * n
695* .. minimal workspace length for CGESVJ of an N x N matrix,
696* without and with explicit accumulation of Jacobi rotations
697 lwsvdj = max( 2 * n, 1 )
698 lwsvdjv = max( 2 * n, 1 )
699* .. minimal REAL workspace length for CGEQP3, CPOCON, CGESVJ
700 lrwqp3 = 2 * n
701 lrwcon = n
702 lrwsvdj = n
703 IF ( lquery ) THEN
704 CALL cgeqp3( m, n, a, lda, iwork, cdummy, cdummy, -1,
705 $ rdummy, ierr )
706 lwrk_cgeqp3 = int( cdummy(1) )
707 CALL cgeqrf( n, n, a, lda, cdummy, cdummy,-1, ierr )
708 lwrk_cgeqrf = int( cdummy(1) )
709 CALL cgelqf( n, n, a, lda, cdummy, cdummy,-1, ierr )
710 lwrk_cgelqf = int( cdummy(1) )
711 END IF
712 minwrk = 2
713 optwrk = 2
714 miniwrk = n
715 IF ( .NOT. (lsvec .OR. rsvec ) ) THEN
716* .. minimal and optimal sizes of the complex workspace if
717* only the singular values are requested
718 IF ( errest ) THEN
719 minwrk = max( n+lwqp3, n**2+lwcon, n+lwqrf, lwsvdj )
720 ELSE
721 minwrk = max( n+lwqp3, n+lwqrf, lwsvdj )
722 END IF
723 IF ( lquery ) THEN
724 CALL cgesvj( 'L', 'N', 'N', n, n, a, lda, sva, n,
725 $ v,
726 $ ldv, cdummy, -1, rdummy, -1, ierr )
727 lwrk_cgesvj = int( cdummy(1) )
728 IF ( errest ) THEN
729 optwrk = max( n+lwrk_cgeqp3, n**2+lwcon,
730 $ n+lwrk_cgeqrf, lwrk_cgesvj )
731 ELSE
732 optwrk = max( n+lwrk_cgeqp3, n+lwrk_cgeqrf,
733 $ lwrk_cgesvj )
734 END IF
735 END IF
736 IF ( l2tran .OR. rowpiv ) THEN
737 IF ( errest ) THEN
738 minrwrk = max( 7, 2*m, lrwqp3, lrwcon, lrwsvdj )
739 ELSE
740 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
741 END IF
742 ELSE
743 IF ( errest ) THEN
744 minrwrk = max( 7, lrwqp3, lrwcon, lrwsvdj )
745 ELSE
746 minrwrk = max( 7, lrwqp3, lrwsvdj )
747 END IF
748 END IF
749 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
750 ELSE IF ( rsvec .AND. (.NOT.lsvec) ) THEN
751* .. minimal and optimal sizes of the complex workspace if the
752* singular values and the right singular vectors are requested
753 IF ( errest ) THEN
754 minwrk = max( n+lwqp3, lwcon, lwsvdj, n+lwlqf,
755 $ 2*n+lwqrf, n+lwsvdj, n+lwunmlq )
756 ELSE
757 minwrk = max( n+lwqp3, lwsvdj, n+lwlqf, 2*n+lwqrf,
758 $ n+lwsvdj, n+lwunmlq )
759 END IF
760 IF ( lquery ) THEN
761 CALL cgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,
762 $ lda, cdummy, -1, rdummy, -1, ierr )
763 lwrk_cgesvj = int( cdummy(1) )
764 CALL cunmlq( 'L', 'C', n, n, n, a, lda, cdummy,
765 $ v, ldv, cdummy, -1, ierr )
766 lwrk_cunmlq = int( cdummy(1) )
767 IF ( errest ) THEN
768 optwrk = max( n+lwrk_cgeqp3, lwcon, lwrk_cgesvj,
769 $ n+lwrk_cgelqf, 2*n+lwrk_cgeqrf,
770 $ n+lwrk_cgesvj, n+lwrk_cunmlq )
771 ELSE
772 optwrk = max( n+lwrk_cgeqp3, lwrk_cgesvj,n+lwrk_cgelqf,
773 $ 2*n+lwrk_cgeqrf, n+lwrk_cgesvj,
774 $ n+lwrk_cunmlq )
775 END IF
776 END IF
777 IF ( l2tran .OR. rowpiv ) THEN
778 IF ( errest ) THEN
779 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
780 ELSE
781 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
782 END IF
783 ELSE
784 IF ( errest ) THEN
785 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
786 ELSE
787 minrwrk = max( 7, lrwqp3, lrwsvdj )
788 END IF
789 END IF
790 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
791 ELSE IF ( lsvec .AND. (.NOT.rsvec) ) THEN
792* .. minimal and optimal sizes of the complex workspace if the
793* singular values and the left singular vectors are requested
794 IF ( errest ) THEN
795 minwrk = n + max( lwqp3,lwcon,n+lwqrf,lwsvdj,lwunmqrm )
796 ELSE
797 minwrk = n + max( lwqp3, n+lwqrf, lwsvdj, lwunmqrm )
798 END IF
799 IF ( lquery ) THEN
800 CALL cgesvj( 'L', 'U', 'N', n,n, u, ldu, sva, n, a,
801 $ lda, cdummy, -1, rdummy, -1, ierr )
802 lwrk_cgesvj = int( cdummy(1) )
803 CALL cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
804 $ ldu, cdummy, -1, ierr )
805 lwrk_cunmqrm = int( cdummy(1) )
806 IF ( errest ) THEN
807 optwrk = n + max( lwrk_cgeqp3, lwcon, n+lwrk_cgeqrf,
808 $ lwrk_cgesvj, lwrk_cunmqrm )
809 ELSE
810 optwrk = n + max( lwrk_cgeqp3, n+lwrk_cgeqrf,
811 $ lwrk_cgesvj, lwrk_cunmqrm )
812 END IF
813 END IF
814 IF ( l2tran .OR. rowpiv ) THEN
815 IF ( errest ) THEN
816 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
817 ELSE
818 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj )
819 END IF
820 ELSE
821 IF ( errest ) THEN
822 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
823 ELSE
824 minrwrk = max( 7, lrwqp3, lrwsvdj )
825 END IF
826 END IF
827 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
828 ELSE
829* .. minimal and optimal sizes of the complex workspace if the
830* full SVD is requested
831 IF ( .NOT. jracc ) THEN
832 IF ( errest ) THEN
833 minwrk = max( n+lwqp3, n+lwcon, 2*n+n**2+lwcon,
834 $ 2*n+lwqrf, 2*n+lwqp3,
835 $ 2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,
836 $ 2*n+n**2+n+lwsvdj, 2*n+n**2+n+lwsvdjv,
837 $ 2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,
838 $ n+n**2+lwsvdj, n+lwunmqrm )
839 ELSE
840 minwrk = max( n+lwqp3, 2*n+n**2+lwcon,
841 $ 2*n+lwqrf, 2*n+lwqp3,
842 $ 2*n+n**2+n+lwlqf, 2*n+n**2+n+n**2+lwcon,
843 $ 2*n+n**2+n+lwsvdj, 2*n+n**2+n+lwsvdjv,
844 $ 2*n+n**2+n+lwunmqr,2*n+n**2+n+lwunmlq,
845 $ n+n**2+lwsvdj, n+lwunmqrm )
846 END IF
847 miniwrk = miniwrk + n
848 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
849 ELSE
850 IF ( errest ) THEN
851 minwrk = max( n+lwqp3, n+lwcon, 2*n+lwqrf,
852 $ 2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,
853 $ n+lwunmqrm )
854 ELSE
855 minwrk = max( n+lwqp3, 2*n+lwqrf,
856 $ 2*n+n**2+lwsvdjv, 2*n+n**2+n+lwunmqr,
857 $ n+lwunmqrm )
858 END IF
859 IF ( rowpiv .OR. l2tran ) miniwrk = miniwrk + m
860 END IF
861 IF ( lquery ) THEN
862 CALL cunmqr( 'L', 'N', m, n, n, a, lda, cdummy, u,
863 $ ldu, cdummy, -1, ierr )
864 lwrk_cunmqrm = int( cdummy(1) )
865 CALL cunmqr( 'L', 'N', n, n, n, a, lda, cdummy, u,
866 $ ldu, cdummy, -1, ierr )
867 lwrk_cunmqr = int( cdummy(1) )
868 IF ( .NOT. jracc ) THEN
869 CALL cgeqp3( n,n, a, lda, iwork, cdummy,cdummy,
870 $ -1,
871 $ rdummy, ierr )
872 lwrk_cgeqp3n = int( cdummy(1) )
873 CALL cgesvj( 'L', 'U', 'N', n, n, u, ldu, sva,
874 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
875 lwrk_cgesvj = int( cdummy(1) )
876 CALL cgesvj( 'U', 'U', 'N', n, n, u, ldu, sva,
877 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
878 lwrk_cgesvju = int( cdummy(1) )
879 CALL cgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,
880 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
881 lwrk_cgesvjv = int( cdummy(1) )
882 CALL cunmlq( 'L', 'C', n, n, n, a, lda, cdummy,
883 $ v, ldv, cdummy, -1, ierr )
884 lwrk_cunmlq = int( cdummy(1) )
885 IF ( errest ) THEN
886 optwrk = max( n+lwrk_cgeqp3, n+lwcon,
887 $ 2*n+n**2+lwcon, 2*n+lwrk_cgeqrf,
888 $ 2*n+lwrk_cgeqp3n,
889 $ 2*n+n**2+n+lwrk_cgelqf,
890 $ 2*n+n**2+n+n**2+lwcon,
891 $ 2*n+n**2+n+lwrk_cgesvj,
892 $ 2*n+n**2+n+lwrk_cgesvjv,
893 $ 2*n+n**2+n+lwrk_cunmqr,
894 $ 2*n+n**2+n+lwrk_cunmlq,
895 $ n+n**2+lwrk_cgesvju,
896 $ n+lwrk_cunmqrm )
897 ELSE
898 optwrk = max( n+lwrk_cgeqp3,
899 $ 2*n+n**2+lwcon, 2*n+lwrk_cgeqrf,
900 $ 2*n+lwrk_cgeqp3n,
901 $ 2*n+n**2+n+lwrk_cgelqf,
902 $ 2*n+n**2+n+n**2+lwcon,
903 $ 2*n+n**2+n+lwrk_cgesvj,
904 $ 2*n+n**2+n+lwrk_cgesvjv,
905 $ 2*n+n**2+n+lwrk_cunmqr,
906 $ 2*n+n**2+n+lwrk_cunmlq,
907 $ n+n**2+lwrk_cgesvju,
908 $ n+lwrk_cunmqrm )
909 END IF
910 ELSE
911 CALL cgesvj( 'L', 'U', 'V', n, n, u, ldu, sva,
912 $ n, v, ldv, cdummy, -1, rdummy, -1, ierr )
913 lwrk_cgesvjv = int( cdummy(1) )
914 CALL cunmqr( 'L', 'N', n, n, n, cdummy, n,
915 $ cdummy,
916 $ v, ldv, cdummy, -1, ierr )
917 lwrk_cunmqr = int( cdummy(1) )
918 CALL cunmqr( 'L', 'N', m, n, n, a, lda, cdummy,
919 $ u,
920 $ ldu, cdummy, -1, ierr )
921 lwrk_cunmqrm = int( cdummy(1) )
922 IF ( errest ) THEN
923 optwrk = max( n+lwrk_cgeqp3, n+lwcon,
924 $ 2*n+lwrk_cgeqrf, 2*n+n**2,
925 $ 2*n+n**2+lwrk_cgesvjv,
926 $ 2*n+n**2+n+lwrk_cunmqr,n+lwrk_cunmqrm )
927 ELSE
928 optwrk = max( n+lwrk_cgeqp3, 2*n+lwrk_cgeqrf,
929 $ 2*n+n**2, 2*n+n**2+lwrk_cgesvjv,
930 $ 2*n+n**2+n+lwrk_cunmqr,
931 $ n+lwrk_cunmqrm )
932 END IF
933 END IF
934 END IF
935 IF ( l2tran .OR. rowpiv ) THEN
936 minrwrk = max( 7, 2*m, lrwqp3, lrwsvdj, lrwcon )
937 ELSE
938 minrwrk = max( 7, lrwqp3, lrwsvdj, lrwcon )
939 END IF
940 END IF
941 minwrk = max( 2, minwrk )
942 optwrk = max( optwrk, minwrk )
943 IF ( lwork .LT. minwrk .AND. (.NOT.lquery) ) info = - 17
944 IF ( lrwork .LT. minrwrk .AND. (.NOT.lquery) ) info = - 19
945 END IF
946*
947 IF ( info .NE. 0 ) THEN
948* #:(
949 CALL xerbla( 'CGEJSV', - info )
950 RETURN
951 ELSE IF ( lquery ) THEN
952 cwork(1) = cmplx( optwrk )
953 cwork(2) = cmplx( minwrk )
954 rwork(1) = real( minrwrk )
955 iwork(1) = max( 4, miniwrk )
956 RETURN
957 END IF
958*
959* Quick return for void matrix (Y3K safe)
960* #:)
961 IF ( ( m .EQ. 0 ) .OR. ( n .EQ. 0 ) ) THEN
962 iwork(1:4) = 0
963 rwork(1:7) = 0
964 RETURN
965 ENDIF
966*
967* Determine whether the matrix U should be M x N or M x M
968*
969 IF ( lsvec ) THEN
970 n1 = n
971 IF ( lsame( jobu, 'F' ) ) n1 = m
972 END IF
973*
974* Set numerical parameters
975*
976*! NOTE: Make sure SLAMCH() does not fail on the target architecture.
977*
978 epsln = slamch('Epsilon')
979 sfmin = slamch('SafeMinimum')
980 small = sfmin / epsln
981 big = slamch('O')
982* BIG = ONE / SFMIN
983*
984* Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
985*
986*(!) If necessary, scale SVA() to protect the largest norm from
987* overflow. It is possible that this scaling pushes the smallest
988* column norm left from the underflow threshold (extreme case).
989*
990 scalem = one / sqrt(real(m)*real(n))
991 noscal = .true.
992 goscal = .true.
993 DO 1874 p = 1, n
994 aapp = zero
995 aaqq = one
996 CALL classq( m, a(1,p), 1, aapp, aaqq )
997 IF ( aapp .GT. big ) THEN
998 info = - 9
999 CALL xerbla( 'CGEJSV', -info )
1000 RETURN
1001 END IF
1002 aaqq = sqrt(aaqq)
1003 IF ( ( aapp .LT. (big / aaqq) ) .AND. noscal ) THEN
1004 sva(p) = aapp * aaqq
1005 ELSE
1006 noscal = .false.
1007 sva(p) = aapp * ( aaqq * scalem )
1008 IF ( goscal ) THEN
1009 goscal = .false.
1010 CALL sscal( p-1, scalem, sva, 1 )
1011 END IF
1012 END IF
1013 1874 CONTINUE
1014*
1015 IF ( noscal ) scalem = one
1016*
1017 aapp = zero
1018 aaqq = big
1019 DO 4781 p = 1, n
1020 aapp = max( aapp, sva(p) )
1021 IF ( sva(p) .NE. zero ) aaqq = min( aaqq, sva(p) )
1022 4781 CONTINUE
1023*
1024* Quick return for zero M x N matrix
1025* #:)
1026 IF ( aapp .EQ. zero ) THEN
1027 IF ( lsvec ) CALL claset( 'G', m, n1, czero, cone, u, ldu )
1028 IF ( rsvec ) CALL claset( 'G', n, n, czero, cone, v, ldv )
1029 rwork(1) = one
1030 rwork(2) = one
1031 IF ( errest ) rwork(3) = one
1032 IF ( lsvec .AND. rsvec ) THEN
1033 rwork(4) = one
1034 rwork(5) = one
1035 END IF
1036 IF ( l2tran ) THEN
1037 rwork(6) = zero
1038 rwork(7) = zero
1039 END IF
1040 iwork(1) = 0
1041 iwork(2) = 0
1042 iwork(3) = 0
1043 iwork(4) = -1
1044 RETURN
1045 END IF
1046*
1047* Issue warning if denormalized column norms detected. Override the
1048* high relative accuracy request. Issue licence to kill nonzero columns
1049* (set them to zero) whose norm is less than sigma_max / BIG (roughly).
1050* #:(
1051 warning = 0
1052 IF ( aaqq .LE. sfmin ) THEN
1053 l2rank = .true.
1054 l2kill = .true.
1055 warning = 1
1056 END IF
1057*
1058* Quick return for one-column matrix
1059* #:)
1060 IF ( n .EQ. 1 ) THEN
1061*
1062 IF ( lsvec ) THEN
1063 CALL clascl( 'G',0,0,sva(1),scalem, m,1,a(1,1),lda,ierr )
1064 CALL clacpy( 'A', m, 1, a, lda, u, ldu )
1065* computing all M left singular vectors of the M x 1 matrix
1066 IF ( n1 .NE. n ) THEN
1067 CALL cgeqrf( m, n, u,ldu, cwork, cwork(n+1),lwork-n,
1068 $ ierr )
1069 CALL cungqr( m,n1,1, u,ldu,cwork,cwork(n+1),lwork-n,
1070 $ ierr )
1071 CALL ccopy( m, a(1,1), 1, u(1,1), 1 )
1072 END IF
1073 END IF
1074 IF ( rsvec ) THEN
1075 v(1,1) = cone
1076 END IF
1077 IF ( sva(1) .LT. (big*scalem) ) THEN
1078 sva(1) = sva(1) / scalem
1079 scalem = one
1080 END IF
1081 rwork(1) = one / scalem
1082 rwork(2) = one
1083 IF ( sva(1) .NE. zero ) THEN
1084 iwork(1) = 1
1085 IF ( ( sva(1) / scalem) .GE. sfmin ) THEN
1086 iwork(2) = 1
1087 ELSE
1088 iwork(2) = 0
1089 END IF
1090 ELSE
1091 iwork(1) = 0
1092 iwork(2) = 0
1093 END IF
1094 iwork(3) = 0
1095 iwork(4) = -1
1096 IF ( errest ) rwork(3) = one
1097 IF ( lsvec .AND. rsvec ) THEN
1098 rwork(4) = one
1099 rwork(5) = one
1100 END IF
1101 IF ( l2tran ) THEN
1102 rwork(6) = zero
1103 rwork(7) = zero
1104 END IF
1105 RETURN
1106*
1107 END IF
1108*
1109 transp = .false.
1110*
1111 aatmax = -one
1112 aatmin = big
1113 IF ( rowpiv .OR. l2tran ) THEN
1114*
1115* Compute the row norms, needed to determine row pivoting sequence
1116* (in the case of heavily row weighted A, row pivoting is strongly
1117* advised) and to collect information needed to compare the
1118* structures of A * A^* and A^* * A (in the case L2TRAN.EQ..TRUE.).
1119*
1120 IF ( l2tran ) THEN
1121 DO 1950 p = 1, m
1122 xsc = zero
1123 temp1 = one
1124 CALL classq( n, a(p,1), lda, xsc, temp1 )
1125* CLASSQ gets both the ell_2 and the ell_infinity norm
1126* in one pass through the vector
1127 rwork(m+p) = xsc * scalem
1128 rwork(p) = xsc * (scalem*sqrt(temp1))
1129 aatmax = max( aatmax, rwork(p) )
1130 IF (rwork(p) .NE. zero)
1131 $ aatmin = min(aatmin,rwork(p))
1132 1950 CONTINUE
1133 ELSE
1134 DO 1904 p = 1, m
1135 rwork(m+p) = scalem*abs( a(p,icamax(n,a(p,1),lda)) )
1136 aatmax = max( aatmax, rwork(m+p) )
1137 aatmin = min( aatmin, rwork(m+p) )
1138 1904 CONTINUE
1139 END IF
1140*
1141 END IF
1142*
1143* For square matrix A try to determine whether A^* would be better
1144* input for the preconditioned Jacobi SVD, with faster convergence.
1145* The decision is based on an O(N) function of the vector of column
1146* and row norms of A, based on the Shannon entropy. This should give
1147* the right choice in most cases when the difference actually matters.
1148* It may fail and pick the slower converging side.
1149*
1150 entra = zero
1151 entrat = zero
1152 IF ( l2tran ) THEN
1153*
1154 xsc = zero
1155 temp1 = one
1156 CALL slassq( n, sva, 1, xsc, temp1 )
1157 temp1 = one / temp1
1158*
1159 entra = zero
1160 DO 1113 p = 1, n
1161 big1 = ( ( sva(p) / xsc )**2 ) * temp1
1162 IF ( big1 .NE. zero ) entra = entra + big1 * alog(big1)
1163 1113 CONTINUE
1164 entra = - entra / alog(real(n))
1165*
1166* Now, SVA().^2/Trace(A^* * A) is a point in the probability simplex.
1167* It is derived from the diagonal of A^* * A. Do the same with the
1168* diagonal of A * A^*, compute the entropy of the corresponding
1169* probability distribution. Note that A * A^* and A^* * A have the
1170* same trace.
1171*
1172 entrat = zero
1173 DO 1114 p = 1, m
1174 big1 = ( ( rwork(p) / xsc )**2 ) * temp1
1175 IF ( big1 .NE. zero ) entrat = entrat + big1 * alog(big1)
1176 1114 CONTINUE
1177 entrat = - entrat / alog(real(m))
1178*
1179* Analyze the entropies and decide A or A^*. Smaller entropy
1180* usually means better input for the algorithm.
1181*
1182 transp = ( entrat .LT. entra )
1183*
1184* If A^* is better than A, take the adjoint of A. This is allowed
1185* only for square matrices, M=N.
1186 IF ( transp ) THEN
1187* In an optimal implementation, this trivial transpose
1188* should be replaced with faster transpose.
1189 DO 1115 p = 1, n - 1
1190 a(p,p) = conjg(a(p,p))
1191 DO 1116 q = p + 1, n
1192 ctemp = conjg(a(q,p))
1193 a(q,p) = conjg(a(p,q))
1194 a(p,q) = ctemp
1195 1116 CONTINUE
1196 1115 CONTINUE
1197 a(n,n) = conjg(a(n,n))
1198 DO 1117 p = 1, n
1199 rwork(m+p) = sva(p)
1200 sva(p) = rwork(p)
1201* previously computed row 2-norms are now column 2-norms
1202* of the transposed matrix
1203 1117 CONTINUE
1204 temp1 = aapp
1205 aapp = aatmax
1206 aatmax = temp1
1207 temp1 = aaqq
1208 aaqq = aatmin
1209 aatmin = temp1
1210 kill = lsvec
1211 lsvec = rsvec
1212 rsvec = kill
1213 IF ( lsvec ) n1 = n
1214*
1215 rowpiv = .true.
1216 END IF
1217*
1218 END IF
1219* END IF L2TRAN
1220*
1221* Scale the matrix so that its maximal singular value remains less
1222* than SQRT(BIG) -- the matrix is scaled so that its maximal column
1223* has Euclidean norm equal to SQRT(BIG/N). The only reason to keep
1224* SQRT(BIG) instead of BIG is the fact that CGEJSV uses LAPACK and
1225* BLAS routines that, in some implementations, are not capable of
1226* working in the full interval [SFMIN,BIG] and that they may provoke
1227* overflows in the intermediate results. If the singular values spread
1228* from SFMIN to BIG, then CGESVJ will compute them. So, in that case,
1229* one should use CGESVJ instead of CGEJSV.
1230 big1 = sqrt( big )
1231 temp1 = sqrt( big / real(n) )
1232* >> for future updates: allow bigger range, i.e. the largest column
1233* will be allowed up to BIG/N and CGESVJ will do the rest. However, for
1234* this all other (LAPACK) components must allow such a range.
1235* TEMP1 = BIG/REAL(N)
1236* TEMP1 = BIG * EPSLN this should 'almost' work with current LAPACK components
1237 CALL slascl( 'G', 0, 0, aapp, temp1, n, 1, sva, n, ierr )
1238 IF ( aaqq .GT. (aapp * sfmin) ) THEN
1239 aaqq = ( aaqq / aapp ) * temp1
1240 ELSE
1241 aaqq = ( aaqq * temp1 ) / aapp
1242 END IF
1243 temp1 = temp1 * scalem
1244 CALL clascl( 'G', 0, 0, aapp, temp1, m, n, a, lda, ierr )
1245*
1246* To undo scaling at the end of this procedure, multiply the
1247* computed singular values with USCAL2 / USCAL1.
1248*
1249 uscal1 = temp1
1250 uscal2 = aapp
1251*
1252 IF ( l2kill ) THEN
1253* L2KILL enforces computation of nonzero singular values in
1254* the restricted range of condition number of the initial A,
1255* sigma_max(A) / sigma_min(A) approx. SQRT(BIG)/SQRT(SFMIN).
1256 xsc = sqrt( sfmin )
1257 ELSE
1258 xsc = small
1259*
1260* Now, if the condition number of A is too big,
1261* sigma_max(A) / sigma_min(A) .GT. SQRT(BIG/N) * EPSLN / SFMIN,
1262* as a precaution measure, the full SVD is computed using CGESVJ
1263* with accumulated Jacobi rotations. This provides numerically
1264* more robust computation, at the cost of slightly increased run
1265* time. Depending on the concrete implementation of BLAS and LAPACK
1266* (i.e. how they behave in presence of extreme ill-conditioning) the
1267* implementor may decide to remove this switch.
1268 IF ( ( aaqq.LT.sqrt(sfmin) ) .AND. lsvec .AND. rsvec ) THEN
1269 jracc = .true.
1270 END IF
1271*
1272 END IF
1273 IF ( aaqq .LT. xsc ) THEN
1274 DO 700 p = 1, n
1275 IF ( sva(p) .LT. xsc ) THEN
1276 CALL claset( 'A', m, 1, czero, czero, a(1,p), lda )
1277 sva(p) = zero
1278 END IF
1279 700 CONTINUE
1280 END IF
1281*
1282* Preconditioning using QR factorization with pivoting
1283*
1284 IF ( rowpiv ) THEN
1285* Optional row permutation (Bjoerck row pivoting):
1286* A result by Cox and Higham shows that the Bjoerck's
1287* row pivoting combined with standard column pivoting
1288* has similar effect as Powell-Reid complete pivoting.
1289* The ell-infinity norms of A are made nonincreasing.
1290 IF ( ( lsvec .AND. rsvec ) .AND. .NOT.( jracc ) ) THEN
1291 iwoff = 2*n
1292 ELSE
1293 iwoff = n
1294 END IF
1295 DO 1952 p = 1, m - 1
1296 q = isamax( m-p+1, rwork(m+p), 1 ) + p - 1
1297 iwork(iwoff+p) = q
1298 IF ( p .NE. q ) THEN
1299 temp1 = rwork(m+p)
1300 rwork(m+p) = rwork(m+q)
1301 rwork(m+q) = temp1
1302 END IF
1303 1952 CONTINUE
1304 CALL claswp( n, a, lda, 1, m-1, iwork(iwoff+1), 1 )
1305 END IF
1306*
1307* End of the preparation phase (scaling, optional sorting and
1308* transposing, optional flushing of small columns).
1309*
1310* Preconditioning
1311*
1312* If the full SVD is needed, the right singular vectors are computed
1313* from a matrix equation, and for that we need theoretical analysis
1314* of the Businger-Golub pivoting. So we use CGEQP3 as the first RR QRF.
1315* In all other cases the first RR QRF can be chosen by other criteria
1316* (eg speed by replacing global with restricted window pivoting, such
1317* as in xGEQPX from TOMS # 782). Good results will be obtained using
1318* xGEQPX with properly (!) chosen numerical parameters.
1319* Any improvement of CGEQP3 improves overall performance of CGEJSV.
1320*
1321* A * P1 = Q1 * [ R1^* 0]^*:
1322 DO 1963 p = 1, n
1323* .. all columns are free columns
1324 iwork(p) = 0
1325 1963 CONTINUE
1326 CALL cgeqp3( m, n, a, lda, iwork, cwork, cwork(n+1), lwork-n,
1327 $ rwork, ierr )
1328*
1329* The upper triangular matrix R1 from the first QRF is inspected for
1330* rank deficiency and possibilities for deflation, or possible
1331* ill-conditioning. Depending on the user specified flag L2RANK,
1332* the procedure explores possibilities to reduce the numerical
1333* rank by inspecting the computed upper triangular factor. If
1334* L2RANK or L2ABER are up, then CGEJSV will compute the SVD of
1335* A + dA, where ||dA|| <= f(M,N)*EPSLN.
1336*
1337 nr = 1
1338 IF ( l2aber ) THEN
1339* Standard absolute error bound suffices. All sigma_i with
1340* sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
1341* aggressive enforcement of lower numerical rank by introducing a
1342* backward error of the order of N*EPSLN*||A||.
1343 temp1 = sqrt(real(n))*epsln
1344 DO 3001 p = 2, n
1345 IF ( abs(a(p,p)) .GE. (temp1*abs(a(1,1))) ) THEN
1346 nr = nr + 1
1347 ELSE
1348 GO TO 3002
1349 END IF
1350 3001 CONTINUE
1351 3002 CONTINUE
1352 ELSE IF ( l2rank ) THEN
1353* .. similarly as above, only slightly more gentle (less aggressive).
1354* Sudden drop on the diagonal of R1 is used as the criterion for
1355* close-to-rank-deficient.
1356 temp1 = sqrt(sfmin)
1357 DO 3401 p = 2, n
1358 IF ( ( abs(a(p,p)) .LT. (epsln*abs(a(p-1,p-1))) ) .OR.
1359 $ ( abs(a(p,p)) .LT. small ) .OR.
1360 $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3402
1361 nr = nr + 1
1362 3401 CONTINUE
1363 3402 CONTINUE
1364*
1365 ELSE
1366* The goal is high relative accuracy. However, if the matrix
1367* has high scaled condition number the relative accuracy is in
1368* general not feasible. Later on, a condition number estimator
1369* will be deployed to estimate the scaled condition number.
1370* Here we just remove the underflowed part of the triangular
1371* factor. This prevents the situation in which the code is
1372* working hard to get the accuracy not warranted by the data.
1373 temp1 = sqrt(sfmin)
1374 DO 3301 p = 2, n
1375 IF ( ( abs(a(p,p)) .LT. small ) .OR.
1376 $ ( l2kill .AND. (abs(a(p,p)) .LT. temp1) ) ) GO TO 3302
1377 nr = nr + 1
1378 3301 CONTINUE
1379 3302 CONTINUE
1380*
1381 END IF
1382*
1383 almort = .false.
1384 IF ( nr .EQ. n ) THEN
1385 maxprj = one
1386 DO 3051 p = 2, n
1387 temp1 = abs(a(p,p)) / sva(iwork(p))
1388 maxprj = min( maxprj, temp1 )
1389 3051 CONTINUE
1390 IF ( maxprj**2 .GE. one - real(n)*epsln ) almort = .true.
1391 END IF
1392*
1393*
1394 sconda = - one
1395 condr1 = - one
1396 condr2 = - one
1397*
1398 IF ( errest ) THEN
1399 IF ( n .EQ. nr ) THEN
1400 IF ( rsvec ) THEN
1401* .. V is available as workspace
1402 CALL clacpy( 'U', n, n, a, lda, v, ldv )
1403 DO 3053 p = 1, n
1404 temp1 = sva(iwork(p))
1405 CALL csscal( p, one/temp1, v(1,p), 1 )
1406 3053 CONTINUE
1407 IF ( lsvec )THEN
1408 CALL cpocon( 'U', n, v, ldv, one, temp1,
1409 $ cwork(n+1), rwork, ierr )
1410 ELSE
1411 CALL cpocon( 'U', n, v, ldv, one, temp1,
1412 $ cwork, rwork, ierr )
1413 END IF
1414*
1415 ELSE IF ( lsvec ) THEN
1416* .. U is available as workspace
1417 CALL clacpy( 'U', n, n, a, lda, u, ldu )
1418 DO 3054 p = 1, n
1419 temp1 = sva(iwork(p))
1420 CALL csscal( p, one/temp1, u(1,p), 1 )
1421 3054 CONTINUE
1422 CALL cpocon( 'U', n, u, ldu, one, temp1,
1423 $ cwork(n+1), rwork, ierr )
1424 ELSE
1425 CALL clacpy( 'U', n, n, a, lda, cwork, n )
1426*[] CALL CLACPY( 'U', N, N, A, LDA, CWORK(N+1), N )
1427* Change: here index shifted by N to the left, CWORK(1:N)
1428* not needed for SIGMA only computation
1429 DO 3052 p = 1, n
1430 temp1 = sva(iwork(p))
1431*[] CALL CSSCAL( p, ONE/TEMP1, CWORK(N+(p-1)*N+1), 1 )
1432 CALL csscal( p, one/temp1, cwork((p-1)*n+1), 1 )
1433 3052 CONTINUE
1434* .. the columns of R are scaled to have unit Euclidean lengths.
1435*[] CALL CPOCON( 'U', N, CWORK(N+1), N, ONE, TEMP1,
1436*[] $ CWORK(N+N*N+1), RWORK, IERR )
1437 CALL cpocon( 'U', n, cwork, n, one, temp1,
1438 $ cwork(n*n+1), rwork, ierr )
1439*
1440 END IF
1441 IF ( temp1 .NE. zero ) THEN
1442 sconda = one / sqrt(temp1)
1443 ELSE
1444 sconda = - one
1445 END IF
1446* SCONDA is an estimate of SQRT(||(R^* * R)^(-1)||_1).
1447* N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
1448 ELSE
1449 sconda = - one
1450 END IF
1451 END IF
1452*
1453 l2pert = l2pert .AND. ( abs( a(1,1)/a(nr,nr) ) .GT. sqrt(big1) )
1454* If there is no violent scaling, artificial perturbation is not needed.
1455*
1456* Phase 3:
1457*
1458 IF ( .NOT. ( rsvec .OR. lsvec ) ) THEN
1459*
1460* Singular Values only
1461*
1462* .. transpose A(1:NR,1:N)
1463 DO 1946 p = 1, min( n-1, nr )
1464 CALL ccopy( n-p, a(p,p+1), lda, a(p+1,p), 1 )
1465 CALL clacgv( n-p+1, a(p,p), 1 )
1466 1946 CONTINUE
1467 IF ( nr .EQ. n ) a(n,n) = conjg(a(n,n))
1468*
1469* The following two DO-loops introduce small relative perturbation
1470* into the strict upper triangle of the lower triangular matrix.
1471* Small entries below the main diagonal are also changed.
1472* This modification is useful if the computing environment does not
1473* provide/allow FLUSH TO ZERO underflow, for it prevents many
1474* annoying denormalized numbers in case of strongly scaled matrices.
1475* The perturbation is structured so that it does not introduce any
1476* new perturbation of the singular values, and it does not destroy
1477* the job done by the preconditioner.
1478* The licence for this perturbation is in the variable L2PERT, which
1479* should be .FALSE. if FLUSH TO ZERO underflow is active.
1480*
1481 IF ( .NOT. almort ) THEN
1482*
1483 IF ( l2pert ) THEN
1484* XSC = SQRT(SMALL)
1485 xsc = epsln / real(n)
1486 DO 4947 q = 1, nr
1487 ctemp = cmplx(xsc*abs(a(q,q)),zero)
1488 DO 4949 p = 1, n
1489 IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1490 $ .OR. ( p .LT. q ) )
1491* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1492 $ a(p,q) = ctemp
1493 4949 CONTINUE
1494 4947 CONTINUE
1495 ELSE
1496 CALL claset( 'U', nr-1,nr-1, czero,czero, a(1,2),lda )
1497 END IF
1498*
1499* .. second preconditioning using the QR factorization
1500*
1501 CALL cgeqrf( n,nr, a,lda, cwork, cwork(n+1),lwork-n,
1502 $ ierr )
1503*
1504* .. and transpose upper to lower triangular
1505 DO 1948 p = 1, nr - 1
1506 CALL ccopy( nr-p, a(p,p+1), lda, a(p+1,p), 1 )
1507 CALL clacgv( nr-p+1, a(p,p), 1 )
1508 1948 CONTINUE
1509*
1510 END IF
1511*
1512* Row-cyclic Jacobi SVD algorithm with column pivoting
1513*
1514* .. again some perturbation (a "background noise") is added
1515* to drown denormals
1516 IF ( l2pert ) THEN
1517* XSC = SQRT(SMALL)
1518 xsc = epsln / real(n)
1519 DO 1947 q = 1, nr
1520 ctemp = cmplx(xsc*abs(a(q,q)),zero)
1521 DO 1949 p = 1, nr
1522 IF ( ( (p.GT.q) .AND. (abs(a(p,q)).LE.temp1) )
1523 $ .OR. ( p .LT. q ) )
1524* $ A(p,q) = TEMP1 * ( A(p,q) / ABS(A(p,q)) )
1525 $ a(p,q) = ctemp
1526 1949 CONTINUE
1527 1947 CONTINUE
1528 ELSE
1529 CALL claset( 'U', nr-1, nr-1, czero, czero, a(1,2),
1530 $ lda )
1531 END IF
1532*
1533* .. and one-sided Jacobi rotations are started on a lower
1534* triangular matrix (plus perturbation which is ignored in
1535* the part which destroys triangular form (confusing?!))
1536*
1537 CALL cgesvj( 'L', 'N', 'N', nr, nr, a, lda, sva,
1538 $ n, v, ldv, cwork, lwork, rwork, lrwork, info )
1539*
1540 scalem = rwork(1)
1541 numrank = nint(rwork(2))
1542*
1543*
1544 ELSE IF ( ( rsvec .AND. ( .NOT. lsvec ) .AND. ( .NOT. jracc ) )
1545 $ .OR.
1546 $ ( jracc .AND. ( .NOT. lsvec ) .AND. ( nr .NE. n ) ) ) THEN
1547*
1548* -> Singular Values and Right Singular Vectors <-
1549*
1550 IF ( almort ) THEN
1551*
1552* .. in this case NR equals N
1553 DO 1998 p = 1, nr
1554 CALL ccopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1555 CALL clacgv( n-p+1, v(p,p), 1 )
1556 1998 CONTINUE
1557 CALL claset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv )
1558*
1559 CALL cgesvj( 'L','U','N', n, nr, v, ldv, sva, nr, a, lda,
1560 $ cwork, lwork, rwork, lrwork, info )
1561 scalem = rwork(1)
1562 numrank = nint(rwork(2))
1563
1564 ELSE
1565*
1566* .. two more QR factorizations ( one QRF is not enough, two require
1567* accumulated product of Jacobi rotations, three are perfect )
1568*
1569 CALL claset( 'L', nr-1,nr-1, czero, czero, a(2,1), lda )
1570 CALL cgelqf( nr,n, a, lda, cwork, cwork(n+1), lwork-n,
1571 $ ierr)
1572 CALL clacpy( 'L', nr, nr, a, lda, v, ldv )
1573 CALL claset( 'U', nr-1,nr-1, czero, czero, v(1,2), ldv )
1574 CALL cgeqrf( nr, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1575 $ lwork-2*n, ierr )
1576 DO 8998 p = 1, nr
1577 CALL ccopy( nr-p+1, v(p,p), ldv, v(p,p), 1 )
1578 CALL clacgv( nr-p+1, v(p,p), 1 )
1579 8998 CONTINUE
1580 CALL claset('U', nr-1, nr-1, czero, czero, v(1,2), ldv)
1581*
1582 CALL cgesvj( 'L', 'U','N', nr, nr, v,ldv, sva, nr, u,
1583 $ ldu, cwork(n+1), lwork-n, rwork, lrwork, info )
1584 scalem = rwork(1)
1585 numrank = nint(rwork(2))
1586 IF ( nr .LT. n ) THEN
1587 CALL claset( 'A',n-nr, nr, czero,czero, v(nr+1,1),
1588 $ ldv )
1589 CALL claset( 'A',nr, n-nr, czero,czero, v(1,nr+1),
1590 $ ldv )
1591 CALL claset( 'A',n-nr,n-nr,czero,cone, v(nr+1,nr+1),
1592 $ ldv )
1593 END IF
1594*
1595 CALL cunmlq( 'L', 'C', n, n, nr, a, lda, cwork,
1596 $ v, ldv, cwork(n+1), lwork-n, ierr )
1597*
1598 END IF
1599* .. permute the rows of V
1600* DO 8991 p = 1, N
1601* CALL CCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA )
1602* 8991 CONTINUE
1603* CALL CLACPY( 'All', N, N, A, LDA, V, LDV )
1604 CALL clapmr( .false., n, n, v, ldv, iwork )
1605*
1606 IF ( transp ) THEN
1607 CALL clacpy( 'A', n, n, v, ldv, u, ldu )
1608 END IF
1609*
1610 ELSE IF ( jracc .AND. (.NOT. lsvec) .AND. ( nr.EQ. n ) ) THEN
1611*
1612 CALL claset( 'L', n-1,n-1, czero, czero, a(2,1), lda )
1613*
1614 CALL cgesvj( 'U','N','V', n, n, a, lda, sva, n, v, ldv,
1615 $ cwork, lwork, rwork, lrwork, info )
1616 scalem = rwork(1)
1617 numrank = nint(rwork(2))
1618 CALL clapmr( .false., n, n, v, ldv, iwork )
1619*
1620 ELSE IF ( lsvec .AND. ( .NOT. rsvec ) ) THEN
1621*
1622* .. Singular Values and Left Singular Vectors ..
1623*
1624* .. second preconditioning step to avoid need to accumulate
1625* Jacobi rotations in the Jacobi iterations.
1626 DO 1965 p = 1, nr
1627 CALL ccopy( n-p+1, a(p,p), lda, u(p,p), 1 )
1628 CALL clacgv( n-p+1, u(p,p), 1 )
1629 1965 CONTINUE
1630 CALL claset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1631*
1632 CALL cgeqrf( n, nr, u, ldu, cwork(n+1), cwork(2*n+1),
1633 $ lwork-2*n, ierr )
1634*
1635 DO 1967 p = 1, nr - 1
1636 CALL ccopy( nr-p, u(p,p+1), ldu, u(p+1,p), 1 )
1637 CALL clacgv( n-p+1, u(p,p), 1 )
1638 1967 CONTINUE
1639 CALL claset( 'U', nr-1, nr-1, czero, czero, u(1,2), ldu )
1640*
1641 CALL cgesvj( 'L', 'U', 'N', nr,nr, u, ldu, sva, nr, a,
1642 $ lda, cwork(n+1), lwork-n, rwork, lrwork, info )
1643 scalem = rwork(1)
1644 numrank = nint(rwork(2))
1645*
1646 IF ( nr .LT. m ) THEN
1647 CALL claset( 'A', m-nr, nr,czero, czero, u(nr+1,1),
1648 $ ldu )
1649 IF ( nr .LT. n1 ) THEN
1650 CALL claset( 'A',nr, n1-nr, czero, czero, u(1,nr+1),
1651 $ ldu )
1652 CALL claset( 'A',m-nr,n1-nr,czero,cone,u(nr+1,nr+1),
1653 $ ldu )
1654 END IF
1655 END IF
1656*
1657 CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
1658 $ ldu, cwork(n+1), lwork-n, ierr )
1659*
1660 IF ( rowpiv )
1661 $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
1662*
1663 DO 1974 p = 1, n1
1664 xsc = one / scnrm2( m, u(1,p), 1 )
1665 CALL csscal( m, xsc, u(1,p), 1 )
1666 1974 CONTINUE
1667*
1668 IF ( transp ) THEN
1669 CALL clacpy( 'A', n, n, u, ldu, v, ldv )
1670 END IF
1671*
1672 ELSE
1673*
1674* .. Full SVD ..
1675*
1676 IF ( .NOT. jracc ) THEN
1677*
1678 IF ( .NOT. almort ) THEN
1679*
1680* Second Preconditioning Step (QRF [with pivoting])
1681* Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
1682* equivalent to an LQF CALL. Since in many libraries the QRF
1683* seems to be better optimized than the LQF, we do explicit
1684* transpose and use the QRF. This is subject to changes in an
1685* optimized implementation of CGEJSV.
1686*
1687 DO 1968 p = 1, nr
1688 CALL ccopy( n-p+1, a(p,p), lda, v(p,p), 1 )
1689 CALL clacgv( n-p+1, v(p,p), 1 )
1690 1968 CONTINUE
1691*
1692* .. the following two loops perturb small entries to avoid
1693* denormals in the second QR factorization, where they are
1694* as good as zeros. This is done to avoid painfully slow
1695* computation with denormals. The relative size of the perturbation
1696* is a parameter that can be changed by the implementer.
1697* This perturbation device will be obsolete on machines with
1698* properly implemented arithmetic.
1699* To switch it off, set L2PERT=.FALSE. To remove it from the
1700* code, remove the action under L2PERT=.TRUE., leave the ELSE part.
1701* The following two loops should be blocked and fused with the
1702* transposed copy above.
1703*
1704 IF ( l2pert ) THEN
1705 xsc = sqrt(small)
1706 DO 2969 q = 1, nr
1707 ctemp = cmplx(xsc*abs( v(q,q) ),zero)
1708 DO 2968 p = 1, n
1709 IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
1710 $ .OR. ( p .LT. q ) )
1711* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
1712 $ v(p,q) = ctemp
1713 IF ( p .LT. q ) v(p,q) = - v(p,q)
1714 2968 CONTINUE
1715 2969 CONTINUE
1716 ELSE
1717 CALL claset( 'U', nr-1, nr-1, czero, czero, v(1,2),
1718 $ ldv )
1719 END IF
1720*
1721* Estimate the row scaled condition number of R1
1722* (If R1 is rectangular, N > NR, then the condition number
1723* of the leading NR x NR submatrix is estimated.)
1724*
1725 CALL clacpy( 'L', nr, nr, v, ldv, cwork(2*n+1), nr )
1726 DO 3950 p = 1, nr
1727 temp1 = scnrm2(nr-p+1,cwork(2*n+(p-1)*nr+p),1)
1728 CALL csscal(nr-p+1,one/temp1,cwork(2*n+(p-1)*nr+p),1)
1729 3950 CONTINUE
1730 CALL cpocon('L',nr,cwork(2*n+1),nr,one,temp1,
1731 $ cwork(2*n+nr*nr+1),rwork,ierr)
1732 condr1 = one / sqrt(temp1)
1733* .. here need a second opinion on the condition number
1734* .. then assume worst case scenario
1735* R1 is OK for inverse <=> CONDR1 .LT. REAL(N)
1736* more conservative <=> CONDR1 .LT. SQRT(REAL(N))
1737*
1738 cond_ok = sqrt(sqrt(real(nr)))
1739*[TP] COND_OK is a tuning parameter.
1740*
1741 IF ( condr1 .LT. cond_ok ) THEN
1742* .. the second QRF without pivoting. Note: in an optimized
1743* implementation, this QRF should be implemented as the QRF
1744* of a lower triangular matrix.
1745* R1^* = Q2 * R2
1746 CALL cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
1747 $ lwork-2*n, ierr )
1748*
1749 IF ( l2pert ) THEN
1750 xsc = sqrt(small)/epsln
1751 DO 3959 p = 2, nr
1752 DO 3958 q = 1, p - 1
1753 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1754 $ zero)
1755 IF ( abs(v(q,p)) .LE. temp1 )
1756* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1757 $ v(q,p) = ctemp
1758 3958 CONTINUE
1759 3959 CONTINUE
1760 END IF
1761*
1762 IF ( nr .NE. n )
1763 $ CALL clacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1764* .. save ...
1765*
1766* .. this transposed copy should be better than naive
1767 DO 1969 p = 1, nr - 1
1768 CALL ccopy( nr-p, v(p,p+1), ldv, v(p+1,p), 1 )
1769 CALL clacgv(nr-p+1, v(p,p), 1 )
1770 1969 CONTINUE
1771 v(nr,nr)=conjg(v(nr,nr))
1772*
1773 condr2 = condr1
1774*
1775 ELSE
1776*
1777* .. ill-conditioned case: second QRF with pivoting
1778* Note that windowed pivoting would be equally good
1779* numerically, and more run-time efficient. So, in
1780* an optimal implementation, the next call to CGEQP3
1781* should be replaced with eg. CALL CGEQPX (ACM TOMS #782)
1782* with properly (carefully) chosen parameters.
1783*
1784* R1^* * P2 = Q2 * R2
1785 DO 3003 p = 1, nr
1786 iwork(n+p) = 0
1787 3003 CONTINUE
1788 CALL cgeqp3( n, nr, v, ldv, iwork(n+1), cwork(n+1),
1789 $ cwork(2*n+1), lwork-2*n, rwork, ierr )
1790** CALL CGEQRF( N, NR, V, LDV, CWORK(N+1), CWORK(2*N+1),
1791** $ LWORK-2*N, IERR )
1792 IF ( l2pert ) THEN
1793 xsc = sqrt(small)
1794 DO 3969 p = 2, nr
1795 DO 3968 q = 1, p - 1
1796 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1797 $ zero)
1798 IF ( abs(v(q,p)) .LE. temp1 )
1799* $ V(q,p) = TEMP1 * ( V(q,p) / ABS(V(q,p)) )
1800 $ v(q,p) = ctemp
1801 3968 CONTINUE
1802 3969 CONTINUE
1803 END IF
1804*
1805 CALL clacpy( 'A', n, nr, v, ldv, cwork(2*n+1), n )
1806*
1807 IF ( l2pert ) THEN
1808 xsc = sqrt(small)
1809 DO 8970 p = 2, nr
1810 DO 8971 q = 1, p - 1
1811 ctemp=cmplx(xsc*min(abs(v(p,p)),abs(v(q,q))),
1812 $ zero)
1813* V(p,q) = - TEMP1*( V(q,p) / ABS(V(q,p)) )
1814 v(p,q) = - ctemp
1815 8971 CONTINUE
1816 8970 CONTINUE
1817 ELSE
1818 CALL claset( 'L',nr-1,nr-1,czero,czero,v(2,1),ldv )
1819 END IF
1820* Now, compute R2 = L3 * Q3, the LQ factorization.
1821 CALL cgelqf( nr, nr, v, ldv, cwork(2*n+n*nr+1),
1822 $ cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr, ierr )
1823* .. and estimate the condition number
1824 CALL clacpy( 'L',nr,nr,v,ldv,cwork(2*n+n*nr+nr+1),nr )
1825 DO 4950 p = 1, nr
1826 temp1 = scnrm2( p, cwork(2*n+n*nr+nr+p), nr )
1827 CALL csscal( p, one/temp1, cwork(2*n+n*nr+nr+p),
1828 $ nr )
1829 4950 CONTINUE
1830 CALL cpocon( 'L',nr,cwork(2*n+n*nr+nr+1),nr,one,temp1,
1831 $ cwork(2*n+n*nr+nr+nr*nr+1),rwork,ierr )
1832 condr2 = one / sqrt(temp1)
1833*
1834*
1835 IF ( condr2 .GE. cond_ok ) THEN
1836* .. save the Householder vectors used for Q3
1837* (this overwrites the copy of R2, as it will not be
1838* needed in this branch, but it does not overwrite the
1839* Huseholder vectors of Q2.).
1840 CALL clacpy( 'U', nr, nr, v, ldv, cwork(2*n+1), n )
1841* .. and the rest of the information on Q3 is in
1842* WORK(2*N+N*NR+1:2*N+N*NR+N)
1843 END IF
1844*
1845 END IF
1846*
1847 IF ( l2pert ) THEN
1848 xsc = sqrt(small)
1849 DO 4968 q = 2, nr
1850 ctemp = xsc * v(q,q)
1851 DO 4969 p = 1, q - 1
1852* V(p,q) = - TEMP1*( V(p,q) / ABS(V(p,q)) )
1853 v(p,q) = - ctemp
1854 4969 CONTINUE
1855 4968 CONTINUE
1856 ELSE
1857 CALL claset( 'U', nr-1,nr-1, czero,czero, v(1,2),
1858 $ ldv )
1859 END IF
1860*
1861* Second preconditioning finished; continue with Jacobi SVD
1862* The input matrix is lower triangular.
1863*
1864* Recover the right singular vectors as solution of a well
1865* conditioned triangular matrix equation.
1866*
1867 IF ( condr1 .LT. cond_ok ) THEN
1868*
1869 CALL cgesvj( 'L','U','N',nr,nr,v,ldv,sva,nr,u, ldu,
1870 $ cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,rwork,
1871 $ lrwork, info )
1872 scalem = rwork(1)
1873 numrank = nint(rwork(2))
1874 DO 3970 p = 1, nr
1875 CALL ccopy( nr, v(1,p), 1, u(1,p), 1 )
1876 CALL csscal( nr, sva(p), v(1,p), 1 )
1877 3970 CONTINUE
1878
1879* .. pick the right matrix equation and solve it
1880*
1881 IF ( nr .EQ. n ) THEN
1882* :)) .. best case, R1 is inverted. The solution of this matrix
1883* equation is Q2*V2 = the product of the Jacobi rotations
1884* used in CGESVJ, premultiplied with the orthogonal matrix
1885* from the second QR factorization.
1886 CALL ctrsm('L','U','N','N', nr,nr,cone, a,lda, v,
1887 $ ldv)
1888 ELSE
1889* .. R1 is well conditioned, but non-square. Adjoint of R2
1890* is inverted to get the product of the Jacobi rotations
1891* used in CGESVJ. The Q-factor from the second QR
1892* factorization is then built in explicitly.
1893 CALL ctrsm('L','U','C','N',nr,nr,cone,cwork(2*n+1),
1894 $ n,v,ldv)
1895 IF ( nr .LT. n ) THEN
1896 CALL claset('A',n-nr,nr,czero,czero,v(nr+1,1),ldv)
1897 CALL claset('A',nr,n-nr,czero,czero,v(1,nr+1),ldv)
1898 CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),
1899 $ ldv)
1900 END IF
1901 CALL cunmqr('L','N',n,n,nr,cwork(2*n+1),n,
1902 $ cwork(n+1),
1903 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr)
1904 END IF
1905*
1906 ELSE IF ( condr2 .LT. cond_ok ) THEN
1907*
1908* The matrix R2 is inverted. The solution of the matrix equation
1909* is Q3^* * V3 = the product of the Jacobi rotations (applied to
1910* the lower triangular L3 from the LQ factorization of
1911* R2=L3*Q3), pre-multiplied with the transposed Q3.
1912 CALL cgesvj( 'L', 'U', 'N', nr, nr, v, ldv, sva, nr,
1913 $ u,
1914 $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1915 $ rwork, lrwork, info )
1916 scalem = rwork(1)
1917 numrank = nint(rwork(2))
1918 DO 3870 p = 1, nr
1919 CALL ccopy( nr, v(1,p), 1, u(1,p), 1 )
1920 CALL csscal( nr, sva(p), u(1,p), 1 )
1921 3870 CONTINUE
1922 CALL ctrsm('L','U','N','N',nr,nr,cone,cwork(2*n+1),n,
1923 $ u,ldu)
1924* .. apply the permutation from the second QR factorization
1925 DO 873 q = 1, nr
1926 DO 872 p = 1, nr
1927 cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1928 872 CONTINUE
1929 DO 874 p = 1, nr
1930 u(p,q) = cwork(2*n+n*nr+nr+p)
1931 874 CONTINUE
1932 873 CONTINUE
1933 IF ( nr .LT. n ) THEN
1934 CALL claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),
1935 $ ldv )
1936 CALL claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),
1937 $ ldv )
1938 CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),
1939 $ ldv)
1940 END IF
1941 CALL cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1942 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1943 ELSE
1944* Last line of defense.
1945* #:( This is a rather pathological case: no scaled condition
1946* improvement after two pivoted QR factorizations. Other
1947* possibility is that the rank revealing QR factorization
1948* or the condition estimator has failed, or the COND_OK
1949* is set very close to ONE (which is unnecessary). Normally,
1950* this branch should never be executed, but in rare cases of
1951* failure of the RRQR or condition estimator, the last line of
1952* defense ensures that CGEJSV completes the task.
1953* Compute the full SVD of L3 using CGESVJ with explicit
1954* accumulation of Jacobi rotations.
1955 CALL cgesvj( 'L', 'U', 'V', nr, nr, v, ldv, sva, nr,
1956 $ u,
1957 $ ldu, cwork(2*n+n*nr+nr+1), lwork-2*n-n*nr-nr,
1958 $ rwork, lrwork, info )
1959 scalem = rwork(1)
1960 numrank = nint(rwork(2))
1961 IF ( nr .LT. n ) THEN
1962 CALL claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),
1963 $ ldv )
1964 CALL claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),
1965 $ ldv )
1966 CALL claset('A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),
1967 $ ldv)
1968 END IF
1969 CALL cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
1970 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
1971*
1972 CALL cunmlq( 'L', 'C', nr, nr, nr, cwork(2*n+1), n,
1973 $ cwork(2*n+n*nr+1), u, ldu, cwork(2*n+n*nr+nr+1),
1974 $ lwork-2*n-n*nr-nr, ierr )
1975 DO 773 q = 1, nr
1976 DO 772 p = 1, nr
1977 cwork(2*n+n*nr+nr+iwork(n+p)) = u(p,q)
1978 772 CONTINUE
1979 DO 774 p = 1, nr
1980 u(p,q) = cwork(2*n+n*nr+nr+p)
1981 774 CONTINUE
1982 773 CONTINUE
1983*
1984 END IF
1985*
1986* Permute the rows of V using the (column) permutation from the
1987* first QRF. Also, scale the columns to make them unit in
1988* Euclidean norm. This applies to all cases.
1989*
1990 temp1 = sqrt(real(n)) * epsln
1991 DO 1972 q = 1, n
1992 DO 972 p = 1, n
1993 cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
1994 972 CONTINUE
1995 DO 973 p = 1, n
1996 v(p,q) = cwork(2*n+n*nr+nr+p)
1997 973 CONTINUE
1998 xsc = one / scnrm2( n, v(1,q), 1 )
1999 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2000 $ CALL csscal( n, xsc, v(1,q), 1 )
2001 1972 CONTINUE
2002* At this moment, V contains the right singular vectors of A.
2003* Next, assemble the left singular vector matrix U (M x N).
2004 IF ( nr .LT. m ) THEN
2005 CALL claset('A', m-nr, nr, czero, czero, u(nr+1,1),
2006 $ ldu)
2007 IF ( nr .LT. n1 ) THEN
2008 CALL claset('A',nr,n1-nr,czero,czero,u(1,nr+1),ldu)
2009 CALL claset('A',m-nr,n1-nr,czero,cone,
2010 $ u(nr+1,nr+1),ldu)
2011 END IF
2012 END IF
2013*
2014* The Q matrix from the first QRF is built into the left singular
2015* matrix U. This applies to all cases.
2016*
2017 CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2018 $ ldu, cwork(n+1), lwork-n, ierr )
2019
2020* The columns of U are normalized. The cost is O(M*N) flops.
2021 temp1 = sqrt(real(m)) * epsln
2022 DO 1973 p = 1, nr
2023 xsc = one / scnrm2( m, u(1,p), 1 )
2024 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2025 $ CALL csscal( m, xsc, u(1,p), 1 )
2026 1973 CONTINUE
2027*
2028* If the initial QRF is computed with row pivoting, the left
2029* singular vectors must be adjusted.
2030*
2031 IF ( rowpiv )
2032 $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2033*
2034 ELSE
2035*
2036* .. the initial matrix A has almost orthogonal columns and
2037* the second QRF is not needed
2038*
2039 CALL clacpy( 'U', n, n, a, lda, cwork(n+1), n )
2040 IF ( l2pert ) THEN
2041 xsc = sqrt(small)
2042 DO 5970 p = 2, n
2043 ctemp = xsc * cwork( n + (p-1)*n + p )
2044 DO 5971 q = 1, p - 1
2045* CWORK(N+(q-1)*N+p)=-TEMP1 * ( CWORK(N+(p-1)*N+q) /
2046* $ ABS(CWORK(N+(p-1)*N+q)) )
2047 cwork(n+(q-1)*n+p)=-ctemp
2048 5971 CONTINUE
2049 5970 CONTINUE
2050 ELSE
2051 CALL claset( 'L',n-1,n-1,czero,czero,cwork(n+2),n )
2052 END IF
2053*
2054 CALL cgesvj( 'U', 'U', 'N', n, n, cwork(n+1), n, sva,
2055 $ n, u, ldu, cwork(n+n*n+1), lwork-n-n*n, rwork, lrwork,
2056 $ info )
2057*
2058 scalem = rwork(1)
2059 numrank = nint(rwork(2))
2060 DO 6970 p = 1, n
2061 CALL ccopy( n, cwork(n+(p-1)*n+1), 1, u(1,p), 1 )
2062 CALL csscal( n, sva(p), cwork(n+(p-1)*n+1), 1 )
2063 6970 CONTINUE
2064*
2065 CALL ctrsm( 'L', 'U', 'N', 'N', n, n,
2066 $ cone, a, lda, cwork(n+1), n )
2067 DO 6972 p = 1, n
2068 CALL ccopy( n, cwork(n+p), n, v(iwork(p),1), ldv )
2069 6972 CONTINUE
2070 temp1 = sqrt(real(n))*epsln
2071 DO 6971 p = 1, n
2072 xsc = one / scnrm2( n, v(1,p), 1 )
2073 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2074 $ CALL csscal( n, xsc, v(1,p), 1 )
2075 6971 CONTINUE
2076*
2077* Assemble the left singular vector matrix U (M x N).
2078*
2079 IF ( n .LT. m ) THEN
2080 CALL claset( 'A', m-n, n, czero, czero, u(n+1,1),
2081 $ ldu )
2082 IF ( n .LT. n1 ) THEN
2083 CALL claset('A',n, n1-n, czero, czero, u(1,n+1),
2084 $ ldu)
2085 CALL claset( 'A',m-n,n1-n, czero, cone,u(n+1,n+1),
2086 $ ldu)
2087 END IF
2088 END IF
2089 CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2090 $ ldu, cwork(n+1), lwork-n, ierr )
2091 temp1 = sqrt(real(m))*epsln
2092 DO 6973 p = 1, n1
2093 xsc = one / scnrm2( m, u(1,p), 1 )
2094 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2095 $ CALL csscal( m, xsc, u(1,p), 1 )
2096 6973 CONTINUE
2097*
2098 IF ( rowpiv )
2099 $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2100*
2101 END IF
2102*
2103* end of the >> almost orthogonal case << in the full SVD
2104*
2105 ELSE
2106*
2107* This branch deploys a preconditioned Jacobi SVD with explicitly
2108* accumulated rotations. It is included as optional, mainly for
2109* experimental purposes. It does perform well, and can also be used.
2110* In this implementation, this branch will be automatically activated
2111* if the condition number sigma_max(A) / sigma_min(A) is predicted
2112* to be greater than the overflow threshold. This is because the
2113* a posteriori computation of the singular vectors assumes robust
2114* implementation of BLAS and some LAPACK procedures, capable of working
2115* in presence of extreme values, e.g. when the singular values spread from
2116* the underflow to the overflow threshold.
2117*
2118 DO 7968 p = 1, nr
2119 CALL ccopy( n-p+1, a(p,p), lda, v(p,p), 1 )
2120 CALL clacgv( n-p+1, v(p,p), 1 )
2121 7968 CONTINUE
2122*
2123 IF ( l2pert ) THEN
2124 xsc = sqrt(small/epsln)
2125 DO 5969 q = 1, nr
2126 ctemp = cmplx(xsc*abs( v(q,q) ),zero)
2127 DO 5968 p = 1, n
2128 IF ( ( p .GT. q ) .AND. ( abs(v(p,q)) .LE. temp1 )
2129 $ .OR. ( p .LT. q ) )
2130* $ V(p,q) = TEMP1 * ( V(p,q) / ABS(V(p,q)) )
2131 $ v(p,q) = ctemp
2132 IF ( p .LT. q ) v(p,q) = - v(p,q)
2133 5968 CONTINUE
2134 5969 CONTINUE
2135 ELSE
2136 CALL claset( 'U', nr-1, nr-1, czero, czero, v(1,2), ldv )
2137 END IF
2138
2139 CALL cgeqrf( n, nr, v, ldv, cwork(n+1), cwork(2*n+1),
2140 $ lwork-2*n, ierr )
2141 CALL clacpy( 'L', n, nr, v, ldv, cwork(2*n+1), n )
2142*
2143 DO 7969 p = 1, nr
2144 CALL ccopy( nr-p+1, v(p,p), ldv, u(p,p), 1 )
2145 CALL clacgv( nr-p+1, u(p,p), 1 )
2146 7969 CONTINUE
2147
2148 IF ( l2pert ) THEN
2149 xsc = sqrt(small/epsln)
2150 DO 9970 q = 2, nr
2151 DO 9971 p = 1, q - 1
2152 ctemp = cmplx(xsc * min(abs(u(p,p)),abs(u(q,q))),
2153 $ zero)
2154* U(p,q) = - TEMP1 * ( U(q,p) / ABS(U(q,p)) )
2155 u(p,q) = - ctemp
2156 9971 CONTINUE
2157 9970 CONTINUE
2158 ELSE
2159 CALL claset('U', nr-1, nr-1, czero, czero, u(1,2), ldu )
2160 END IF
2161
2162 CALL cgesvj( 'L', 'U', 'V', nr, nr, u, ldu, sva,
2163 $ n, v, ldv, cwork(2*n+n*nr+1), lwork-2*n-n*nr,
2164 $ rwork, lrwork, info )
2165 scalem = rwork(1)
2166 numrank = nint(rwork(2))
2167
2168 IF ( nr .LT. n ) THEN
2169 CALL claset( 'A',n-nr,nr,czero,czero,v(nr+1,1),ldv )
2170 CALL claset( 'A',nr,n-nr,czero,czero,v(1,nr+1),ldv )
2171 CALL claset( 'A',n-nr,n-nr,czero,cone,v(nr+1,nr+1),ldv )
2172 END IF
2173
2174 CALL cunmqr( 'L','N',n,n,nr,cwork(2*n+1),n,cwork(n+1),
2175 $ v,ldv,cwork(2*n+n*nr+nr+1),lwork-2*n-n*nr-nr,ierr )
2176*
2177* Permute the rows of V using the (column) permutation from the
2178* first QRF. Also, scale the columns to make them unit in
2179* Euclidean norm. This applies to all cases.
2180*
2181 temp1 = sqrt(real(n)) * epsln
2182 DO 7972 q = 1, n
2183 DO 8972 p = 1, n
2184 cwork(2*n+n*nr+nr+iwork(p)) = v(p,q)
2185 8972 CONTINUE
2186 DO 8973 p = 1, n
2187 v(p,q) = cwork(2*n+n*nr+nr+p)
2188 8973 CONTINUE
2189 xsc = one / scnrm2( n, v(1,q), 1 )
2190 IF ( (xsc .LT. (one-temp1)) .OR. (xsc .GT. (one+temp1)) )
2191 $ CALL csscal( n, xsc, v(1,q), 1 )
2192 7972 CONTINUE
2193*
2194* At this moment, V contains the right singular vectors of A.
2195* Next, assemble the left singular vector matrix U (M x N).
2196*
2197 IF ( nr .LT. m ) THEN
2198 CALL claset( 'A', m-nr, nr, czero, czero, u(nr+1,1),
2199 $ ldu )
2200 IF ( nr .LT. n1 ) THEN
2201 CALL claset('A',nr, n1-nr, czero, czero, u(1,nr+1),
2202 $ ldu)
2203 CALL claset('A',m-nr,n1-nr, czero, cone,u(nr+1,nr+1),
2204 $ ldu)
2205 END IF
2206 END IF
2207*
2208 CALL cunmqr( 'L', 'N', m, n1, n, a, lda, cwork, u,
2209 $ ldu, cwork(n+1), lwork-n, ierr )
2210*
2211 IF ( rowpiv )
2212 $ CALL claswp( n1, u, ldu, 1, m-1, iwork(iwoff+1), -1 )
2213*
2214*
2215 END IF
2216 IF ( transp ) THEN
2217* .. swap U and V because the procedure worked on A^*
2218 DO 6974 p = 1, n
2219 CALL cswap( n, u(1,p), 1, v(1,p), 1 )
2220 6974 CONTINUE
2221 END IF
2222*
2223 END IF
2224* end of the full SVD
2225*
2226* Undo scaling, if necessary (and possible)
2227*
2228 IF ( uscal2 .LE. (big/sva(1))*uscal1 ) THEN
2229 CALL slascl( 'G', 0, 0, uscal1, uscal2, nr, 1, sva, n,
2230 $ ierr )
2231 uscal1 = one
2232 uscal2 = one
2233 END IF
2234*
2235 IF ( nr .LT. n ) THEN
2236 DO 3004 p = nr+1, n
2237 sva(p) = zero
2238 3004 CONTINUE
2239 END IF
2240*
2241 rwork(1) = uscal2 * scalem
2242 rwork(2) = uscal1
2243 IF ( errest ) rwork(3) = sconda
2244 IF ( lsvec .AND. rsvec ) THEN
2245 rwork(4) = condr1
2246 rwork(5) = condr2
2247 END IF
2248 IF ( l2tran ) THEN
2249 rwork(6) = entra
2250 rwork(7) = entrat
2251 END IF
2252*
2253 iwork(1) = nr
2254 iwork(2) = numrank
2255 iwork(3) = warning
2256 IF ( transp ) THEN
2257 iwork(4) = 1
2258 ELSE
2259 iwork(4) = -1
2260 END IF
2261
2262*
2263 RETURN
2264* ..
2265* .. END OF CGEJSV
2266* ..
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81
subroutine cgelqf(m, n, a, lda, tau, work, lwork, info)
CGELQF
Definition cgelqf.f:142
subroutine cgeqp3(m, n, a, lda, jpvt, tau, work, lwork, rwork, info)
CGEQP3
Definition cgeqp3.f:157
subroutine cgeqrf(m, n, a, lda, tau, work, lwork, info)
CGEQRF
Definition cgeqrf.f:144
subroutine cgesvj(joba, jobu, jobv, m, n, a, lda, sva, mv, v, ldv, cwork, lwork, rwork, lrwork, info)
CGESVJ
Definition cgesvj.f:353
integer function isamax(n, sx, incx)
ISAMAX
Definition isamax.f:71
integer function icamax(n, cx, incx)
ICAMAX
Definition icamax.f:71
subroutine clacgv(n, x, incx)
CLACGV conjugates a complex vector.
Definition clacgv.f:72
subroutine clacpy(uplo, m, n, a, lda, b, ldb)
CLACPY copies all or part of one two-dimensional array to another.
Definition clacpy.f:101
real function slamch(cmach)
SLAMCH
Definition slamch.f:68
subroutine clapmr(forwrd, m, n, x, ldx, k)
CLAPMR rearranges rows of a matrix as specified by a permutation vector.
Definition clapmr.f:102
subroutine clascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
CLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition clascl.f:142
subroutine slascl(type, kl, ku, cfrom, cto, m, n, a, lda, info)
SLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom.
Definition slascl.f:142
subroutine claset(uplo, m, n, alpha, beta, a, lda)
CLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition claset.f:104
subroutine slassq(n, x, incx, scale, sumsq)
SLASSQ updates a sum of squares represented in scaled form.
Definition slassq.f90:122
subroutine classq(n, x, incx, scale, sumsq)
CLASSQ updates a sum of squares represented in scaled form.
Definition classq.f90:122
subroutine claswp(n, a, lda, k1, k2, ipiv, incx)
CLASWP performs a series of row interchanges on a general rectangular matrix.
Definition claswp.f:113
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
real(wp) function scnrm2(n, x, incx)
SCNRM2
Definition scnrm2.f90:90
subroutine cpocon(uplo, n, a, lda, anorm, rcond, work, rwork, info)
CPOCON
Definition cpocon.f:119
subroutine csscal(n, sa, cx, incx)
CSSCAL
Definition csscal.f:78
subroutine sscal(n, sa, sx, incx)
SSCAL
Definition sscal.f:79
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81
subroutine ctrsm(side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb)
CTRSM
Definition ctrsm.f:180
subroutine cungqr(m, n, k, a, lda, tau, work, lwork, info)
CUNGQR
Definition cungqr.f:126
subroutine cunmlq(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
CUNMLQ
Definition cunmlq.f:166
subroutine cunmqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork, info)
CUNMQR
Definition cunmqr.f:166
Here is the call graph for this function:
Here is the caller graph for this function: