dd/dfe/slasd2_8f_source.html

*> \brief \b SLASD2 merges the two sets of singular values together into a single sorted set. Used by sbdsdc.

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> \htmlonly

*> Download SLASD2 + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slasd2.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slasd2.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slasd2.f">

*> [TXT]</a>

*> \endhtmlonly

*

*  Definition:

*  ===========

*

*       SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,

*                          LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,

*                          IDXC, IDXQ, COLTYP, INFO )

*

*       .. Scalar Arguments ..

*       INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE

*       REAL               ALPHA, BETA

*       ..

*       .. Array Arguments ..

*       INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),

*      $                   IDXQ( * )

*       REAL               D( * ), DSIGMA( * ), U( LDU, * ),

*      $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),

*      $                   Z( * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> SLASD2 merges the two sets of singular values together into a single

*> sorted set.  Then it tries to deflate the size of the problem.

*> There are two ways in which deflation can occur:  when two or more

*> singular values are close together or if there is a tiny entry in the

*> Z vector.  For each such occurrence the order of the related secular

*> equation problem is reduced by one.

*>

*> SLASD2 is called from SLASD1.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] NL

*> \verbatim

*>          NL is INTEGER

*>         The row dimension of the upper block.  NL >= 1.

*> \endverbatim

*>

*> \param[in] NR

*> \verbatim

*>          NR is INTEGER

*>         The row dimension of the lower block.  NR >= 1.

*> \endverbatim

*>

*> \param[in] SQRE

*> \verbatim

*>          SQRE is INTEGER

*>         = 0: the lower block is an NR-by-NR square matrix.

*>         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.

*>

*>         The bidiagonal matrix has N = NL + NR + 1 rows and

*>         M = N + SQRE >= N columns.

*> \endverbatim

*>

*> \param[out] K

*> \verbatim

*>          K is INTEGER

*>         Contains the dimension of the non-deflated matrix,

*>         This is the order of the related secular equation. 1 <= K <=N.

*> \endverbatim

*>

*> \param[in,out] D

*> \verbatim

*>          D is REAL array, dimension (N)

*>         On entry D contains the singular values of the two submatrices

*>         to be combined.  On exit D contains the trailing (N-K) updated

*>         singular values (those which were deflated) sorted into

*>         increasing order.

*> \endverbatim

*>

*> \param[out] Z

*> \verbatim

*>          Z is REAL array, dimension (N)

*>         On exit Z contains the updating row vector in the secular

*>         equation.

*> \endverbatim

*>

*> \param[in] ALPHA

*> \verbatim

*>          ALPHA is REAL

*>         Contains the diagonal element associated with the added row.

*> \endverbatim

*>

*> \param[in] BETA

*> \verbatim

*>          BETA is REAL

*>         Contains the off-diagonal element associated with the added

*>         row.

*> \endverbatim

*>

*> \param[in,out] U

*> \verbatim

*>          U is REAL array, dimension (LDU,N)

*>         On entry U contains the left singular vectors of two

*>         submatrices in the two square blocks with corners at (1,1),

*>         (NL, NL), and (NL+2, NL+2), (N,N).

*>         On exit U contains the trailing (N-K) updated left singular

*>         vectors (those which were deflated) in its last N-K columns.

*> \endverbatim

*>

*> \param[in] LDU

*> \verbatim

*>          LDU is INTEGER

*>         The leading dimension of the array U.  LDU >= N.

*> \endverbatim

*>

*> \param[in,out] VT

*> \verbatim

*>          VT is REAL array, dimension (LDVT,M)

*>         On entry VT**T contains the right singular vectors of two

*>         submatrices in the two square blocks with corners at (1,1),

*>         (NL+1, NL+1), and (NL+2, NL+2), (M,M).

*>         On exit VT**T contains the trailing (N-K) updated right singular

*>         vectors (those which were deflated) in its last N-K columns.

*>         In case SQRE =1, the last row of VT spans the right null

*>         space.

*> \endverbatim

*>

*> \param[in] LDVT

*> \verbatim

*>          LDVT is INTEGER

*>         The leading dimension of the array VT.  LDVT >= M.

*> \endverbatim

*>

*> \param[out] DSIGMA

*> \verbatim

*>          DSIGMA is REAL array, dimension (N)

*>         Contains a copy of the diagonal elements (K-1 singular values

*>         and one zero) in the secular equation.

*> \endverbatim

*>

*> \param[out] U2

*> \verbatim

*>          U2 is REAL array, dimension (LDU2,N)

*>         Contains a copy of the first K-1 left singular vectors which

*>         will be used by SLASD3 in a matrix multiply (SGEMM) to solve

*>         for the new left singular vectors. U2 is arranged into four

*>         blocks. The first block contains a column with 1 at NL+1 and

*>         zero everywhere else; the second block contains non-zero

*>         entries only at and above NL; the third contains non-zero

*>         entries only below NL+1; and the fourth is dense.

*> \endverbatim

*>

*> \param[in] LDU2

*> \verbatim

*>          LDU2 is INTEGER

*>         The leading dimension of the array U2.  LDU2 >= N.

*> \endverbatim

*>

*> \param[out] VT2

*> \verbatim

*>          VT2 is REAL array, dimension (LDVT2,N)

*>         VT2**T contains a copy of the first K right singular vectors

*>         which will be used by SLASD3 in a matrix multiply (SGEMM) to

*>         solve for the new right singular vectors. VT2 is arranged into

*>         three blocks. The first block contains a row that corresponds

*>         to the special 0 diagonal element in SIGMA; the second block

*>         contains non-zeros only at and before NL +1; the third block

*>         contains non-zeros only at and after  NL +2.

*> \endverbatim

*>

*> \param[in] LDVT2

*> \verbatim

*>          LDVT2 is INTEGER

*>         The leading dimension of the array VT2.  LDVT2 >= M.

*> \endverbatim

*>

*> \param[out] IDXP

*> \verbatim

*>          IDXP is INTEGER array, dimension (N)

*>         This will contain the permutation used to place deflated

*>         values of D at the end of the array. On output IDXP(2:K)

*>         points to the nondeflated D-values and IDXP(K+1:N)

*>         points to the deflated singular values.

*> \endverbatim

*>

*> \param[out] IDX

*> \verbatim

*>          IDX is INTEGER array, dimension (N)

*>         This will contain the permutation used to sort the contents of

*>         D into ascending order.

*> \endverbatim

*>

*> \param[out] IDXC

*> \verbatim

*>          IDXC is INTEGER array, dimension (N)

*>         This will contain the permutation used to arrange the columns

*>         of the deflated U matrix into three groups:  the first group

*>         contains non-zero entries only at and above NL, the second

*>         contains non-zero entries only below NL+2, and the third is

*>         dense.

*> \endverbatim

*>

*> \param[in,out] IDXQ

*> \verbatim

*>          IDXQ is INTEGER array, dimension (N)

*>         This contains the permutation which separately sorts the two

*>         sub-problems in D into ascending order.  Note that entries in

*>         the first hlaf of this permutation must first be moved one

*>         position backward; and entries in the second half

*>         must first have NL+1 added to their values.

*> \endverbatim

*>

*> \param[out] COLTYP

*> \verbatim

*>          COLTYP is INTEGER array, dimension (N)

*>         As workspace, this will contain a label which will indicate

*>         which of the following types a column in the U2 matrix or a

*>         row in the VT2 matrix is:

*>         1 : non-zero in the upper half only

*>         2 : non-zero in the lower half only

*>         3 : dense

*>         4 : deflated

*>

*>         On exit, it is an array of dimension 4, with COLTYP(I) being

*>         the dimension of the I-th type columns.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0:  successful exit.

*>          < 0:  if INFO = -i, the i-th argument had an illegal value.

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \date September 2012

*

*> \ingroup auxOTHERauxiliary

*

*> \par Contributors:

*  ==================

*>

*>     Ming Gu and Huan Ren, Computer Science Division, University of

*>     California at Berkeley, USA

*>

*  =====================================================================

      SUBROUTINE slasd2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,

     $                   ldvt, dsigma, u2, ldu2, vt2, ldvt2, idxp, idx,

     $                   idxc, idxq, coltyp, info )

*

*  -- LAPACK auxiliary routine (version 3.4.2) --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*     September 2012

*

*     .. Scalar Arguments ..

      INTEGER            info, k, ldu, ldu2, ldvt, ldvt2, nl, nr, sqre

      REAL               alpha, beta

*     ..

*     .. Array Arguments ..

      INTEGER            coltyp( * ), idx( * ), idxc( * ), idxp( * ),

     $                   idxq( * )

      REAL               d( * ), dsigma( * ), u( ldu, * ),

     $                   u2( ldu2, * ), vt( ldvt, * ), vt2( ldvt2, * ),

     $                   z( * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      REAL               zero, one, two, eight

      parameter( zero = 0.0e+0, one = 1.0e+0, two = 2.0e+0,

     $                   eight = 8.0e+0 )

*     ..

*     .. Local Arrays ..

      INTEGER            ctot( 4 ), psm( 4 )

*     ..

*     .. Local Scalars ..

      INTEGER            ct, i, idxi, idxj, idxjp, j, jp, jprev, k2, m,

     $                   n, nlp1, nlp2

      REAL               c, eps, hlftol, s, tau, tol, z1

*     ..

*     .. External Functions ..

      REAL               slamch, slapy2

      EXTERNAL           slamch, slapy2

*     ..

*     .. External Subroutines ..

      EXTERNAL           scopy, slacpy, slamrg, slaset, srot, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max

*     ..

*     .. Executable Statements ..

*

*     Test the input parameters.

*

      info = 0

*

      IF( nl.LT.1 ) THEN

         info = -1

      ELSE IF( nr.LT.1 ) THEN

         info = -2

      ELSE IF( ( sqre.NE.1 ) .AND. ( sqre.NE.0 ) ) THEN

         info = -3

      END IF

*

      n = nl + nr + 1

      m = n + sqre

*

      IF( ldu.LT.n ) THEN

         info = -10

      ELSE IF( ldvt.LT.m ) THEN

         info = -12

      ELSE IF( ldu2.LT.n ) THEN

         info = -15

      ELSE IF( ldvt2.LT.m ) THEN

         info = -17

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'SLASD2', -info )

         return

      END IF

*

      nlp1 = nl + 1

      nlp2 = nl + 2

*

*     Generate the first part of the vector Z; and move the singular

*     values in the first part of D one position backward.

*

      z1 = alpha*vt( nlp1, nlp1 )

      z( 1 ) = z1

      DO 10 i = nl, 1, -1

         z( i+1 ) = alpha*vt( i, nlp1 )

         d( i+1 ) = d( i )

         idxq( i+1 ) = idxq( i ) + 1

   10 continue

*

*     Generate the second part of the vector Z.

*

      DO 20 i = nlp2, m

         z( i ) = beta*vt( i, nlp2 )

   20 continue

*

*     Initialize some reference arrays.

*

      DO 30 i = 2, nlp1

         coltyp( i ) = 1

   30 continue

      DO 40 i = nlp2, n

         coltyp( i ) = 2

   40 continue

*

*     Sort the singular values into increasing order

*

      DO 50 i = nlp2, n

         idxq( i ) = idxq( i ) + nlp1

   50 continue

*

*     DSIGMA, IDXC, IDXC, and the first column of U2

*     are used as storage space.

*

      DO 60 i = 2, n

         dsigma( i ) = d( idxq( i ) )

         u2( i, 1 ) = z( idxq( i ) )

         idxc( i ) = coltyp( idxq( i ) )

   60 continue

*

      CALL slamrg( nl, nr, dsigma( 2 ), 1, 1, idx( 2 ) )

*

      DO 70 i = 2, n

         idxi = 1 + idx( i )

         d( i ) = dsigma( idxi )

         z( i ) = u2( idxi, 1 )

         coltyp( i ) = idxc( idxi )

   70 continue

*

*     Calculate the allowable deflation tolerance

*

      eps = slamch( 'Epsilon' )

      tol = max( abs( alpha ), abs( beta ) )

      tol = eight*eps*max( abs( d( n ) ), tol )

*

*     There are 2 kinds of deflation -- first a value in the z-vector

*     is small, second two (or more) singular values are very close

*     together (their difference is small).

*

*     If the value in the z-vector is small, we simply permute the

*     array so that the corresponding singular value is moved to the

*     end.

*

*     If two values in the D-vector are close, we perform a two-sided

*     rotation designed to make one of the corresponding z-vector

*     entries zero, and then permute the array so that the deflated

*     singular value is moved to the end.

*

*     If there are multiple singular values then the problem deflates.

*     Here the number of equal singular values are found.  As each equal

*     singular value is found, an elementary reflector is computed to

*     rotate the corresponding singular subspace so that the

*     corresponding components of Z are zero in this new basis.

*

      k = 1

      k2 = n + 1

      DO 80 j = 2, n

         IF( abs( z( j ) ).LE.tol ) THEN

*

*           Deflate due to small z component.

*

            k2 = k2 - 1

            idxp( k2 ) = j

            coltyp( j ) = 4

            IF( j.EQ.n )

     $         go to 120

         ELSE

            jprev = j

            go to 90

         END IF

   80 continue

   90 continue

      j = jprev

  100 continue

      j = j + 1

      IF( j.GT.n )

     $   go to 110

      IF( abs( z( j ) ).LE.tol ) THEN

*

*        Deflate due to small z component.

*

         k2 = k2 - 1

         idxp( k2 ) = j

         coltyp( j ) = 4

      ELSE

*

*        Check if singular values are close enough to allow deflation.

*

         IF( abs( d( j )-d( jprev ) ).LE.tol ) THEN

*

*           Deflation is possible.

*

            s = z( jprev )

            c = z( j )

*

*           Find sqrt(a**2+b**2) without overflow or

*           destructive underflow.

*

            tau = slapy2( c, s )

            c = c / tau

            s = -s / tau

            z( j ) = tau

            z( jprev ) = zero

*

*           Apply back the Givens rotation to the left and right

*           singular vector matrices.

*

            idxjp = idxq( idx( jprev )+1 )

            idxj = idxq( idx( j )+1 )

            IF( idxjp.LE.nlp1 ) THEN

               idxjp = idxjp - 1

            END IF

            IF( idxj.LE.nlp1 ) THEN

               idxj = idxj - 1

            END IF

            CALL srot( n, u( 1, idxjp ), 1, u( 1, idxj ), 1, c, s )

            CALL srot( m, vt( idxjp, 1 ), ldvt, vt( idxj, 1 ), ldvt, c,

     $                 s )

            IF( coltyp( j ).NE.coltyp( jprev ) ) THEN

               coltyp( j ) = 3

            END IF

            coltyp( jprev ) = 4

            k2 = k2 - 1

            idxp( k2 ) = jprev

            jprev = j

         ELSE

            k = k + 1

            u2( k, 1 ) = z( jprev )

            dsigma( k ) = d( jprev )

            idxp( k ) = jprev

            jprev = j

         END IF

      END IF

      go to 100

  110 continue

*

*     Record the last singular value.

*

      k = k + 1

      u2( k, 1 ) = z( jprev )

      dsigma( k ) = d( jprev )

      idxp( k ) = jprev

*

  120 continue

*

*     Count up the total number of the various types of columns, then

*     form a permutation which positions the four column types into

*     four groups of uniform structure (although one or more of these

*     groups may be empty).

*

      DO 130 j = 1, 4

         ctot( j ) = 0

  130 continue

      DO 140 j = 2, n

         ct = coltyp( j )

         ctot( ct ) = ctot( ct ) + 1

  140 continue

*

*     PSM(*) = Position in SubMatrix (of types 1 through 4)

*

      psm( 1 ) = 2

      psm( 2 ) = 2 + ctot( 1 )

      psm( 3 ) = psm( 2 ) + ctot( 2 )

      psm( 4 ) = psm( 3 ) + ctot( 3 )

*

*     Fill out the IDXC array so that the permutation which it induces

*     will place all type-1 columns first, all type-2 columns next,

*     then all type-3's, and finally all type-4's, starting from the

*     second column. This applies similarly to the rows of VT.

*

      DO 150 j = 2, n

         jp = idxp( j )

         ct = coltyp( jp )

         idxc( psm( ct ) ) = j

         psm( ct ) = psm( ct ) + 1

  150 continue

*

*     Sort the singular values and corresponding singular vectors into

*     DSIGMA, U2, and VT2 respectively.  The singular values/vectors

*     which were not deflated go into the first K slots of DSIGMA, U2,

*     and VT2 respectively, while those which were deflated go into the

*     last N - K slots, except that the first column/row will be treated

*     separately.

*

      DO 160 j = 2, n

         jp = idxp( j )

         dsigma( j ) = d( jp )

         idxj = idxq( idx( idxp( idxc( j ) ) )+1 )

         IF( idxj.LE.nlp1 ) THEN

            idxj = idxj - 1

         END IF

         CALL scopy( n, u( 1, idxj ), 1, u2( 1, j ), 1 )

         CALL scopy( m, vt( idxj, 1 ), ldvt, vt2( j, 1 ), ldvt2 )

  160 continue

*

*     Determine DSIGMA(1), DSIGMA(2) and Z(1)

*

      dsigma( 1 ) = zero

      hlftol = tol / two

      IF( abs( dsigma( 2 ) ).LE.hlftol )

     $   dsigma( 2 ) = hlftol

      IF( m.GT.n ) THEN

         z( 1 ) = slapy2( z1, z( m ) )

         IF( z( 1 ).LE.tol ) THEN

            c = one

            s = zero

            z( 1 ) = tol

         ELSE

            c = z1 / z( 1 )

            s = z( m ) / z( 1 )

         END IF

      ELSE

         IF( abs( z1 ).LE.tol ) THEN

            z( 1 ) = tol

         ELSE

            z( 1 ) = z1

         END IF

      END IF

*

*     Move the rest of the updating row to Z.

*

      CALL scopy( k-1, u2( 2, 1 ), 1, z( 2 ), 1 )

*

*     Determine the first column of U2, the first row of VT2 and the

*     last row of VT.

*

      CALL slaset( 'A', n, 1, zero, zero, u2, ldu2 )

      u2( nlp1, 1 ) = one

      IF( m.GT.n ) THEN

         DO 170 i = 1, nlp1

            vt( m, i ) = -s*vt( nlp1, i )

            vt2( 1, i ) = c*vt( nlp1, i )

  170    continue

         DO 180 i = nlp2, m

            vt2( 1, i ) = s*vt( m, i )

            vt( m, i ) = c*vt( m, i )

  180    continue

      ELSE

         CALL scopy( m, vt( nlp1, 1 ), ldvt, vt2( 1, 1 ), ldvt2 )

      END IF

      IF( m.GT.n ) THEN

         CALL scopy( m, vt( m, 1 ), ldvt, vt2( m, 1 ), ldvt2 )

      END IF

*

*     The deflated singular values and their corresponding vectors go

*     into the back of D, U, and V respectively.

*

      IF( n.GT.k ) THEN

         CALL scopy( n-k, dsigma( k+1 ), 1, d( k+1 ), 1 )

         CALL slacpy( 'A', n, n-k, u2( 1, k+1 ), ldu2, u( 1, k+1 ),

     $                ldu )

         CALL slacpy( 'A', n-k, m, vt2( k+1, 1 ), ldvt2, vt( k+1, 1 ),

     $                ldvt )

      END IF

*

*     Copy CTOT into COLTYP for referencing in SLASD3.

*

      DO 190 j = 1, 4

         coltyp( j ) = ctot( j )

  190 continue

*

      return

*

*     End of SLASD2

*

      END