d2/dbd/chetrf__aa_8f_source.html

*> \brief \b CHETRF_AA

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> Download CHETRF_AA + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrf_aa.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrf_aa.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrf_aa.f">

*> [TXT]</a>

*

*  Definition:

*  ===========

*

*       SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )

*

*       .. Scalar Arguments ..

*       CHARACTER    UPLO

*       INTEGER      N, LDA, LWORK, INFO

*       ..

*       .. Array Arguments ..

*       INTEGER      IPIV( * )

*       COMPLEX      A( LDA, * ), WORK( * )

*       ..

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> CHETRF_AA computes the factorization of a complex hermitian matrix A

*> using the Aasen's algorithm.  The form of the factorization is

*>

*>    A = U**H*T*U  or  A = L*T*L**H

*>

*> where U (or L) is a product of permutation and unit upper (lower)

*> triangular matrices, and T is a hermitian tridiagonal matrix.

*>

*> This is the blocked version of the algorithm, calling Level 3 BLAS.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] UPLO

*> \verbatim

*>          UPLO is CHARACTER*1

*>          = 'U':  Upper triangle of A is stored;

*>          = 'L':  Lower triangle of A is stored.

*> \endverbatim

*>

*> \param[in] N

*> \verbatim

*>          N is INTEGER

*>          The order of the matrix A.  N >= 0.

*> \endverbatim

*>

*> \param[in,out] A

*> \verbatim

*>          A is COMPLEX array, dimension (LDA,N)

*>          On entry, the hermitian matrix A.  If UPLO = 'U', the leading

*>          N-by-N upper triangular part of A contains the upper

*>          triangular part of the matrix A, and the strictly lower

*>          triangular part of A is not referenced.  If UPLO = 'L', the

*>          leading N-by-N lower triangular part of A contains the lower

*>          triangular part of the matrix A, and the strictly upper

*>          triangular part of A is not referenced.

*>

*>          On exit, the tridiagonal matrix is stored in the diagonals

*>          and the subdiagonals of A just below (or above) the diagonals,

*>          and L is stored below (or above) the subdiagonals, when UPLO

*>          is 'L' (or 'U').

*> \endverbatim

*>

*> \param[in] LDA

*> \verbatim

*>          LDA is INTEGER

*>          The leading dimension of the array A.  LDA >= max(1,N).

*> \endverbatim

*>

*> \param[out] IPIV

*> \verbatim

*>          IPIV is INTEGER array, dimension (N)

*>          On exit, it contains the details of the interchanges, i.e.,

*>          the row and column k of A were interchanged with the

*>          row and column IPIV(k).

*> \endverbatim

*>

*> \param[out] WORK

*> \verbatim

*>          WORK is COMPLEX array, dimension (MAX(1,LWORK))

*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

*> \endverbatim

*>

*> \param[in] LWORK

*> \verbatim

*>          LWORK is INTEGER

*>          The length of WORK.

*>          LWORK >= 1, if N <= 1, and LWORK >= 2*N, otherwise.

*>          For optimum performance LWORK >= N*(1+NB), where NB is

*>          the optimal blocksize, returned by ILAENV.

*>

*>          If LWORK = -1, then a workspace query is assumed; the routine

*>          only calculates the optimal size of the WORK array, returns

*>          this value as the first entry of the WORK array, and no error

*>          message related to LWORK is issued by XERBLA.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0:  successful exit

*>          < 0:  if INFO = -i, the i-th argument had an illegal value.

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \ingroup hetrf_aa

*

*  =====================================================================


      SUBROUTINE chetrf_aa( UPLO, N, A, LDA, IPIV,

     $                      WORK, LWORK, INFO )

*

*  -- LAPACK computational routine --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*

      IMPLICIT NONE

*

*     .. Scalar Arguments ..

      CHARACTER    UPLO

      INTEGER      N, LDA, LWORK, INFO

*     ..

*     .. Array Arguments ..

      INTEGER      IPIV( * )

      COMPLEX      A( LDA, * ), WORK( * )

*     ..

*

*  =====================================================================

*     .. Parameters ..

      COMPLEX      ZERO, ONE

      parameter( zero = (0.0e+0, 0.0e+0), one = (1.0e+0, 0.0e+0) )

*

*     .. Local Scalars ..

      LOGICAL      LQUERY, UPPER

      INTEGER      J, LWKMIN, LWKOPT

      INTEGER      NB, MJ, NJ, K1, K2, J1, J2, J3, JB

      COMPLEX      ALPHA

*     ..

*     .. External Functions ..

      LOGICAL      LSAME

      INTEGER      ILAENV

      REAL         SROUNDUP_LWORK

      EXTERNAL     lsame, ilaenv, sroundup_lwork

*     ..

*     .. External Subroutines ..

      EXTERNAL     clahef_aa, cgemm, ccopy, cswap, cscal,

     $                   xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC    real, conjg, max

*     ..

*     .. Executable Statements ..

*

*     Determine the block size

*

      nb = ilaenv( 1, 'CHETRF_AA', uplo, n, -1, -1, -1 )

*

*     Test the input parameters.

*

      info = 0

      upper = lsame( uplo, 'U' )

      lquery = ( lwork.EQ.-1 )

      IF( n.LE.1 ) THEN

         lwkmin = 1

         lwkopt = 1

      ELSE

         lwkmin = 2*n

         lwkopt = (nb+1)*n

      END IF

*

      IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN

         info = -1

      ELSE IF( n.LT.0 ) THEN

         info = -2

      ELSE IF( lda.LT.max( 1, n ) ) THEN

         info = -4

      ELSE IF( lwork.LT.lwkmin .AND. .NOT.lquery ) THEN

         info = -7

      END IF

*

      IF( info.EQ.0 ) THEN

         work( 1 ) = sroundup_lwork( lwkopt )

      END IF

*

      IF( info.NE.0 ) THEN

         CALL xerbla( 'CHETRF_AA', -info )

         RETURN

      ELSE IF( lquery ) THEN

         RETURN

      END IF

*

*     Quick return

*

      IF( n.EQ.0 ) THEN

          RETURN

      ENDIF

      ipiv( 1 ) = 1

      IF( n.EQ.1 ) THEN

         a( 1, 1 ) = real( a( 1, 1 ) )

         RETURN

      END IF

*

*     Adjust block size based on the workspace size

*

      IF( lwork.LT.((1+nb)*n) ) THEN

         nb = ( lwork-n ) / n

      END IF

*

      IF( upper ) THEN

*

*        .....................................................

*        Factorize A as U**H*D*U using the upper triangle of A

*        .....................................................

*

*        copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))

*

         CALL ccopy( n, a( 1, 1 ), lda, work( 1 ), 1 )

*

*        J is the main loop index, increasing from 1 to N in steps of

*        JB, where JB is the number of columns factorized by CLAHEF;

*        JB is either NB, or N-J+1 for the last block

*

         j = 0

 10      CONTINUE

         IF( j.GE.n )

     $      GO TO 20

*

*        each step of the main loop

*         J is the last column of the previous panel

*         J1 is the first column of the current panel

*         K1 identifies if the previous column of the panel has been

*          explicitly stored, e.g., K1=1 for the first panel, and

*          K1=0 for the rest

*

         j1 = j + 1

         jb = min( n-j1+1, nb )

         k1 = max(1, j)-j

*

*        Panel factorization

*

         CALL clahef_aa( uplo, 2-k1, n-j, jb,

     $                      a( max(1, j), j+1 ), lda,

     $                      ipiv( j+1 ), work, n, work( n*nb+1 ) )

*

*        Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)

*

         DO j2 = j+2, min(n, j+jb+1)

            ipiv( j2 ) = ipiv( j2 ) + j

            IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN

               CALL cswap( j1-k1-2, a( 1, j2 ), 1,

     $                              a( 1, ipiv(j2) ), 1 )

            END IF

         END DO

         j = j + jb

*

*        Trailing submatrix update, where

*         the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and

*         WORK stores the current block of the auxiriarly matrix H

*

         IF( j.LT.n ) THEN

*

*          if the first panel and JB=1 (NB=1), then nothing to do

*

            IF( j1.GT.1 .OR. jb.GT.1 ) THEN

*

*              Merge rank-1 update with BLAS-3 update

*

               alpha = conjg( a( j, j+1 ) )

               a( j, j+1 ) = one

               CALL ccopy( n-j, a( j-1, j+1 ), lda,

     $                          work( (j+1-j1+1)+jb*n ), 1 )

               CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )

*

*              K1 identifies if the previous column of the panel has been

*               explicitly stored, e.g., K1=0 and K2=1 for the first panel,

*               and K1=1 and K2=0 for the rest

*

               IF( j1.GT.1 ) THEN

*

*                 Not first panel

*

                  k2 = 1

               ELSE

*

*                 First panel

*

                  k2 = 0

*

*                 First update skips the first column

*

                  jb = jb - 1

               END IF

*

               DO j2 = j+1, n, nb

                  nj = min( nb, n-j2+1 )

*

*                 Update (J2, J2) diagonal block with CGEMV

*

                  j3 = j2

                  DO mj = nj-1, 1, -1

                     CALL cgemm( 'Conjugate transpose', 'Transpose',

     $                            1, mj, jb+1,

     $                           -one, a( j1-k2, j3 ), lda,

     $                                 work( (j3-j1+1)+k1*n ), n,

     $                            one, a( j3, j3 ), lda )

                     j3 = j3 + 1

                  END DO

*

*                 Update off-diagonal block of J2-th block row with CGEMM

*

                  CALL cgemm( 'Conjugate transpose', 'Transpose',

     $                        nj, n-j3+1, jb+1,

     $                       -one, a( j1-k2, j2 ), lda,

     $                             work( (j3-j1+1)+k1*n ), n,

     $                        one, a( j2, j3 ), lda )

               END DO

*

*              Recover T( J, J+1 )

*

               a( j, j+1 ) = conjg( alpha )

            END IF

*

*           WORK(J+1, 1) stores H(J+1, 1)

*

            CALL ccopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )

         END IF

         GO TO 10

      ELSE

*

*        .....................................................

*        Factorize A as L*D*L**H using the lower triangle of A

*        .....................................................

*

*        copy first column A(1:N, 1) into H(1:N, 1)

*         (stored in WORK(1:N))

*

         CALL ccopy( n, a( 1, 1 ), 1, work( 1 ), 1 )

*

*        J is the main loop index, increasing from 1 to N in steps of

*        JB, where JB is the number of columns factorized by CLAHEF;

*        JB is either NB, or N-J+1 for the last block

*

         j = 0

 11      CONTINUE

         IF( j.GE.n )

     $      GO TO 20

*

*        each step of the main loop

*         J is the last column of the previous panel

*         J1 is the first column of the current panel

*         K1 identifies if the previous column of the panel has been

*          explicitly stored, e.g., K1=1 for the first panel, and

*          K1=0 for the rest

*

         j1 = j+1

         jb = min( n-j1+1, nb )

         k1 = max(1, j)-j

*

*        Panel factorization

*

         CALL clahef_aa( uplo, 2-k1, n-j, jb,

     $                      a( j+1, max(1, j) ), lda,

     $                      ipiv( j+1 ), work, n, work( n*nb+1 ) )

*

*        Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)

*

         DO j2 = j+2, min(n, j+jb+1)

            ipiv( j2 ) = ipiv( j2 ) + j

            IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN

               CALL cswap( j1-k1-2, a( j2, 1 ), lda,

     $                              a( ipiv(j2), 1 ), lda )

            END IF

         END DO

         j = j + jb

*

*        Trailing submatrix update, where

*          A(J2+1, J1-1) stores L(J2+1, J1) and

*          WORK(J2+1, 1) stores H(J2+1, 1)

*

         IF( j.LT.n ) THEN

*

*          if the first panel and JB=1 (NB=1), then nothing to do

*

            IF( j1.GT.1 .OR. jb.GT.1 ) THEN

*

*              Merge rank-1 update with BLAS-3 update

*

               alpha = conjg( a( j+1, j ) )

               a( j+1, j ) = one

               CALL ccopy( n-j, a( j+1, j-1 ), 1,

     $                          work( (j+1-j1+1)+jb*n ), 1 )

               CALL cscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )

*

*              K1 identifies if the previous column of the panel has been

*               explicitly stored, e.g., K1=0 and K2=1 for the first panel,

*               and K1=1 and K2=0 for the rest

*

               IF( j1.GT.1 ) THEN

*

*                 Not first panel

*

                  k2 = 1

               ELSE

*

*                 First panel

*

                  k2 = 0

*

*                 First update skips the first column

*

                  jb = jb - 1

               END IF

*

               DO j2 = j+1, n, nb

                  nj = min( nb, n-j2+1 )

*

*                 Update (J2, J2) diagonal block with CGEMV

*

                  j3 = j2

                  DO mj = nj-1, 1, -1

                     CALL cgemm( 'No transpose',

     $                           'Conjugate transpose',

     $                           mj, 1, jb+1,

     $                          -one, work( (j3-j1+1)+k1*n ), n,

     $                                a( j3, j1-k2 ), lda,

     $                           one, a( j3, j3 ), lda )

                     j3 = j3 + 1

                  END DO

*

*                 Update off-diagonal block of J2-th block column with CGEMM

*

                  CALL cgemm( 'No transpose', 'Conjugate transpose',

     $                        n-j3+1, nj, jb+1,

     $                       -one, work( (j3-j1+1)+k1*n ), n,

     $                             a( j2, j1-k2 ), lda,

     $                        one, a( j3, j2 ), lda )

               END DO

*

*              Recover T( J+1, J )

*

               a( j+1, j ) = conjg( alpha )

            END IF

*

*           WORK(J+1, 1) stores H(J+1, 1)

*

            CALL ccopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )

         END IF

         GO TO 11

      END IF

*

   20 CONTINUE

      work( 1 ) = sroundup_lwork( lwkopt )

      RETURN

*

*     End of CHETRF_AA

*


      END

xerbla
subroutine xerbla(srname, info)
Definition cblat2.f:3285

ccopy
subroutine ccopy(n, cx, incx, cy, incy)
CCOPY
Definition ccopy.f:81

cgemm
subroutine cgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
CGEMM
Definition cgemm.f:188

chetrf_aa
subroutine chetrf_aa(uplo, n, a, lda, ipiv, work, lwork, info)
CHETRF_AA
Definition chetrf_aa.f:133

clahef_aa
subroutine clahef_aa(uplo, j1, m, nb, a, lda, ipiv, h, ldh, work)
CLAHEF_AA
Definition clahef_aa.f:142

cscal
subroutine cscal(n, ca, cx, incx)
CSCAL
Definition cscal.f:78

cswap
subroutine cswap(n, cx, incx, cy, incy)
CSWAP
Definition cswap.f:81