d6/da2/zhetrd__he2hb_8f_source.html

*> \brief \b ZHETRD_HE2HB

*

*  @precisions fortran z -> s d c

*

*  =========== DOCUMENTATION ===========

*

* Online html documentation available at

*            http://www.netlib.org/lapack/explore-html/

*

*> Download ZHETRD_HE2HB + dependencies

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd_he2hb.f">

*> [TGZ]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd_he2hb.f">

*> [ZIP]</a>

*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd_he2hb.f">

*> [TXT]</a>

*

*  Definition:

*  ===========

*

*       SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,

*                              WORK, LWORK, INFO )

*

*       IMPLICIT NONE

*

*       .. Scalar Arguments ..

*       CHARACTER          UPLO

*       INTEGER            INFO, LDA, LDAB, LWORK, N, KD

*       ..

*       .. Array Arguments ..

*       COMPLEX*16         A( LDA, * ), AB( LDAB, * ),

*                          TAU( * ), WORK( * )

*       ..

*

*

*> \par Purpose:

*  =============

*>

*> \verbatim

*>

*> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian

*> band-diagonal form AB by a unitary similarity transformation:

*> Q**H * A * Q = AB.

*> \endverbatim

*

*  Arguments:

*  ==========

*

*> \param[in] UPLO

*> \verbatim

*>          UPLO is CHARACTER*1

*>          = 'U':  Upper triangle of A is stored;

*>          = 'L':  Lower triangle of A is stored.

*> \endverbatim

*>

*> \param[in] N

*> \verbatim

*>          N is INTEGER

*>          The order of the matrix A.  N >= 0.

*> \endverbatim

*>

*> \param[in] KD

*> \verbatim

*>          KD is INTEGER

*>          The number of superdiagonals of the reduced matrix if UPLO = 'U',

*>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.

*>          The reduced matrix is stored in the array AB.

*> \endverbatim

*>

*> \param[in,out] A

*> \verbatim

*>          A is COMPLEX*16 array, dimension (LDA,N)

*>          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading

*>          N-by-N upper triangular part of A contains the upper

*>          triangular part of the matrix A, and the strictly lower

*>          triangular part of A is not referenced.  If UPLO = 'L', the

*>          leading N-by-N lower triangular part of A contains the lower

*>          triangular part of the matrix A, and the strictly upper

*>          triangular part of A is not referenced.

*>          On exit, if UPLO = 'U', the diagonal and first superdiagonal

*>          of A are overwritten by the corresponding elements of the

*>          tridiagonal matrix T, and the elements above the first

*>          superdiagonal, with the array TAU, represent the unitary

*>          matrix Q as a product of elementary reflectors; if UPLO

*>          = 'L', the diagonal and first subdiagonal of A are over-

*>          written by the corresponding elements of the tridiagonal

*>          matrix T, and the elements below the first subdiagonal, with

*>          the array TAU, represent the unitary matrix Q as a product

*>          of elementary reflectors. See Further Details.

*> \endverbatim

*>

*> \param[in] LDA

*> \verbatim

*>          LDA is INTEGER

*>          The leading dimension of the array A.  LDA >= max(1,N).

*> \endverbatim

*>

*> \param[out] AB

*> \verbatim

*>          AB is COMPLEX*16 array, dimension (LDAB,N)

*>          On exit, the upper or lower triangle of the Hermitian band

*>          matrix A, stored in the first KD+1 rows of the array.  The

*>          j-th column of A is stored in the j-th column of the array AB

*>          as follows:

*>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;

*>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).

*> \endverbatim

*>

*> \param[in] LDAB

*> \verbatim

*>          LDAB is INTEGER

*>          The leading dimension of the array AB.  LDAB >= KD+1.

*> \endverbatim

*>

*> \param[out] TAU

*> \verbatim

*>          TAU is COMPLEX*16 array, dimension (N-KD)

*>          The scalar factors of the elementary reflectors (see Further

*>          Details).

*> \endverbatim

*>

*> \param[out] WORK

*> \verbatim

*>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))

*>          On exit, if INFO = 0, or if LWORK = -1,

*>          WORK(1) returns the size of LWORK.

*> \endverbatim

*>

*> \param[in] LWORK

*> \verbatim

*>          LWORK is INTEGER

*>          The dimension of the array WORK which should be calculated

*>          by a workspace query.

*>          If N <= KD+1, LWORK >= 1, else LWORK = MAX(1, LWORK_QUERY).

*>

*>          If LWORK = -1, then a workspace query is assumed; the routine

*>          only calculates the optimal size of the WORK array, returns

*>          this value as the first entry of the WORK array, and no error

*>          message related to LWORK is issued by XERBLA.

*>          LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD

*>          where FACTOPTNB is the blocking used by the QR or LQ

*>          algorithm, usually FACTOPTNB=128 is a good choice otherwise

*>          putting LWORK=-1 will provide the size of WORK.

*> \endverbatim

*>

*> \param[out] INFO

*> \verbatim

*>          INFO is INTEGER

*>          = 0:  successful exit

*>          < 0:  if INFO = -i, the i-th argument had an illegal value

*> \endverbatim

*

*  Authors:

*  ========

*

*> \author Univ. of Tennessee

*> \author Univ. of California Berkeley

*> \author Univ. of Colorado Denver

*> \author NAG Ltd.

*

*> \ingroup hetrd_he2hb

*

*> \par Further Details:

*  =====================

*>

*> \verbatim

*>

*>  Implemented by Azzam Haidar.

*>

*>  All details are available on technical report, SC11, SC13 papers.

*>

*>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.

*>  Parallel reduction to condensed forms for symmetric eigenvalue problems

*>  using aggregated fine-grained and memory-aware kernels. In Proceedings

*>  of 2011 International Conference for High Performance Computing,

*>  Networking, Storage and Analysis (SC '11), New York, NY, USA,

*>  Article 8 , 11 pages.

*>  http://doi.acm.org/10.1145/2063384.2063394

*>

*>  A. Haidar, J. Kurzak, P. Luszczek, 2013.

*>  An improved parallel singular value algorithm and its implementation

*>  for multicore hardware, In Proceedings of 2013 International Conference

*>  for High Performance Computing, Networking, Storage and Analysis (SC '13).

*>  Denver, Colorado, USA, 2013.

*>  Article 90, 12 pages.

*>  http://doi.acm.org/10.1145/2503210.2503292

*>

*>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.

*>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure

*>  calculations based on fine-grained memory aware tasks.

*>  International Journal of High Performance Computing Applications.

*>  Volume 28 Issue 2, Pages 196-209, May 2014.

*>  http://hpc.sagepub.com/content/28/2/196

*>

*> \endverbatim

*>

*> \verbatim

*>

*>  If UPLO = 'U', the matrix Q is represented as a product of elementary

*>  reflectors

*>

*>     Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.

*>

*>  Each H(i) has the form

*>

*>     H(i) = I - tau * v * v**H

*>

*>  where tau is a complex scalar, and v is a complex vector with

*>  v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in

*>  A(i,i+kd+1:n), and tau in TAU(i).

*>

*>  If UPLO = 'L', the matrix Q is represented as a product of elementary

*>  reflectors

*>

*>     Q = H(1) H(2) . . . H(k), where k = n-kd.

*>

*>  Each H(i) has the form

*>

*>     H(i) = I - tau * v * v**H

*>

*>  where tau is a complex scalar, and v is a complex vector with

*>  v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in

*>  A(i+kd+2:n,i), and tau in TAU(i).

*>

*>  The contents of A on exit are illustrated by the following examples

*>  with n = 5:

*>

*>  if UPLO = 'U':                       if UPLO = 'L':

*>

*>    (  ab  ab/v1  v1      v1     v1    )              (  ab                            )

*>    (      ab     ab/v2   v2     v2    )              (  ab/v1  ab                     )

*>    (             ab      ab/v3  v3    )              (  v1     ab/v2  ab              )

*>    (                     ab     ab/v4 )              (  v1     v2     ab/v3  ab       )

*>    (                            ab    )              (  v1     v2     v3     ab/v4 ab )

*>

*>  where d and e denote diagonal and off-diagonal elements of T, and vi

*>  denotes an element of the vector defining H(i).

*> \endverbatim

*>

*  =====================================================================


      SUBROUTINE zhetrd_he2hb( UPLO, N, KD, A, LDA, AB, LDAB, TAU,

     $                         WORK, LWORK, INFO )

*

      IMPLICIT NONE

*

*  -- LAPACK computational routine --

*  -- LAPACK is a software package provided by Univ. of Tennessee,    --

*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

*

*     .. Scalar Arguments ..

      CHARACTER          UPLO

      INTEGER            INFO, LDA, LDAB, LWORK, N, KD

*     ..

*     .. Array Arguments ..

      COMPLEX*16         A( LDA, * ), AB( LDAB, * ),

     $                   tau( * ), work( * )

*     ..

*

*  =====================================================================

*

*     .. Parameters ..

      DOUBLE PRECISION   RONE

      COMPLEX*16         ZERO, ONE, HALF

      parameter( rone = 1.0d+0,

     $                   zero = ( 0.0d+0, 0.0d+0 ),

     $                   one = ( 1.0d+0, 0.0d+0 ),

     $                   half = ( 0.5d+0, 0.0d+0 ) )

*     ..

*     .. Local Scalars ..

      LOGICAL            LQUERY, UPPER

      INTEGER            I, J, IINFO, LWMIN, PN, PK, LK,

     $                   ldt, ldw, lds2, lds1,

     $                   ls2, ls1, lw, lt,

     $                   tpos, wpos, s2pos, s1pos

*     ..

*     .. External Subroutines ..

      EXTERNAL           xerbla, zher2k, zhemm, zgemm,

     $                   zcopy,

     $                   zlarft, zgelqf, zgeqrf, zlaset

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          min, max

*     ..

*     .. External Functions ..

      LOGICAL            LSAME

      INTEGER            ILAENV2STAGE

      EXTERNAL           lsame, ilaenv2stage

*     ..

*     .. Executable Statements ..

*

*     Determine the minimal workspace size required

*     and test the input parameters

*

      info   = 0

      upper  = lsame( uplo, 'U' )

      lquery = ( lwork.EQ.-1 )

      IF( n.LE.kd+1 ) THEN

         lwmin = 1

      ELSE

         lwmin = ilaenv2stage( 4, 'ZHETRD_HE2HB', '', n, kd, -1, -1 )

      END IF

*

      IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN

         info = -1

      ELSE IF( n.LT.0 ) THEN

         info = -2

      ELSE IF( kd.LT.0 ) THEN

         info = -3

      ELSE IF( lda.LT.max( 1, n ) ) THEN

         info = -5

      ELSE IF( ldab.LT.max( 1, kd+1 ) ) THEN

         info = -7

      ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN

         info = -10

      END IF

*

      IF( info.NE.0 ) THEN

         CALL xerbla( 'ZHETRD_HE2HB', -info )

         RETURN

      ELSE IF( lquery ) THEN

         work( 1 ) = lwmin

         RETURN

      END IF

*

*     Quick return if possible

*     Copy the upper/lower portion of A into AB

*

      IF( n.LE.kd+1 ) THEN

          IF( upper ) THEN

              DO 100 i = 1, n

                  lk = min( kd+1, i )

                  CALL zcopy( lk, a( i-lk+1, i ), 1,

     $                            ab( kd+1-lk+1, i ), 1 )

  100         CONTINUE

          ELSE

              DO 110 i = 1, n

                  lk = min( kd+1, n-i+1 )

                  CALL zcopy( lk, a( i, i ), 1, ab( 1, i ), 1 )

  110         CONTINUE

          ENDIF

          work( 1 ) = 1

          RETURN

      END IF

*

*     Determine the pointer position for the workspace

*

      ldt    = kd

      lds1   = kd

      lt     = ldt*kd

      lw     = n*kd

      ls1    = lds1*kd

      ls2    = lwmin - lt - lw - ls1

*      LS2 = N*MAX(KD,FACTOPTNB)

      tpos   = 1

      wpos   = tpos  + lt

      s1pos  = wpos  + lw

      s2pos  = s1pos + ls1

      IF( upper ) THEN

          ldw    = kd

          lds2   = kd

      ELSE

          ldw    = n

          lds2   = n

      ENDIF

*

*

*     Set the workspace of the triangular matrix T to zero once such a

*     way every time T is generated the upper/lower portion will be always zero

*

      CALL zlaset( "A", ldt, kd, zero, zero, work( tpos ), ldt )

*

      IF( upper ) THEN

          DO 10 i = 1, n - kd, kd

             pn = n-i-kd+1

             pk = min( n-i-kd+1, kd )

*

*            Compute the LQ factorization of the current block

*

             CALL zgelqf( kd, pn, a( i, i+kd ), lda,

     $                    tau( i ), work( s2pos ), ls2, iinfo )

*

*            Copy the upper portion of A into AB

*

             DO 20 j = i, i+pk-1

                lk = min( kd, n-j ) + 1

                CALL zcopy( lk, a( j, j ), lda, ab( kd+1, j ),

     $                      ldab-1 )

   20        CONTINUE

*

             CALL zlaset( 'Lower', pk, pk, zero, one,

     $                    a( i, i+kd ), lda )

*

*            Form the matrix T

*

             CALL zlarft( 'Forward', 'Rowwise', pn, pk,

     $                    a( i, i+kd ), lda, tau( i ),

     $                    work( tpos ), ldt )

*

*            Compute W:

*

             CALL zgemm( 'Conjugate', 'No transpose', pk, pn, pk,

     $                   one,  work( tpos ), ldt,

     $                         a( i, i+kd ), lda,

     $                   zero, work( s2pos ), lds2 )

*

             CALL zhemm( 'Right', uplo, pk, pn,

     $                   one,  a( i+kd, i+kd ), lda,

     $                         work( s2pos ), lds2,

     $                   zero, work( wpos ), ldw )

*

             CALL zgemm( 'No transpose', 'Conjugate', pk, pk, pn,

     $                   one,  work( wpos ), ldw,

     $                         work( s2pos ), lds2,

     $                   zero, work( s1pos ), lds1 )

*

             CALL zgemm( 'No transpose', 'No transpose', pk, pn, pk,

     $                   -half, work( s1pos ), lds1,

     $                          a( i, i+kd ), lda,

     $                   one,   work( wpos ), ldw )

*

*

*            Update the unreduced submatrix A(i+kd:n,i+kd:n), using

*            an update of the form:  A := A - V'*W - W'*V

*

             CALL zher2k( uplo, 'Conjugate', pn, pk,

     $                    -one, a( i, i+kd ), lda,

     $                          work( wpos ), ldw,

     $                    rone, a( i+kd, i+kd ), lda )

   10     CONTINUE

*

*        Copy the upper band to AB which is the band storage matrix

*

         DO 30 j = n-kd+1, n

            lk = min(kd, n-j) + 1

            CALL zcopy( lk, a( j, j ), lda, ab( kd+1, j ), ldab-1 )

   30    CONTINUE

*

      ELSE

*

*         Reduce the lower triangle of A to lower band matrix

*

          DO 40 i = 1, n - kd, kd

             pn = n-i-kd+1

             pk = min( n-i-kd+1, kd )

*

*            Compute the QR factorization of the current block

*

             CALL zgeqrf( pn, kd, a( i+kd, i ), lda,

     $                    tau( i ), work( s2pos ), ls2, iinfo )

*

*            Copy the upper portion of A into AB

*

             DO 50 j = i, i+pk-1

                lk = min( kd, n-j ) + 1

                CALL zcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )

   50        CONTINUE

*

             CALL zlaset( 'Upper', pk, pk, zero, one,

     $                    a( i+kd, i ), lda )

*

*            Form the matrix T

*

             CALL zlarft( 'Forward', 'Columnwise', pn, pk,

     $                    a( i+kd, i ), lda, tau( i ),

     $                    work( tpos ), ldt )

*

*            Compute W:

*

             CALL zgemm( 'No transpose', 'No transpose', pn, pk, pk,

     $                   one, a( i+kd, i ), lda,

     $                         work( tpos ), ldt,

     $                   zero, work( s2pos ), lds2 )

*

             CALL zhemm( 'Left', uplo, pn, pk,

     $                   one, a( i+kd, i+kd ), lda,

     $                         work( s2pos ), lds2,

     $                   zero, work( wpos ), ldw )

*

             CALL zgemm( 'Conjugate', 'No transpose', pk, pk, pn,

     $                   one, work( s2pos ), lds2,

     $                         work( wpos ), ldw,

     $                   zero, work( s1pos ), lds1 )

*

             CALL zgemm( 'No transpose', 'No transpose', pn, pk, pk,

     $                   -half, a( i+kd, i ), lda,

     $                         work( s1pos ), lds1,

     $                   one, work( wpos ), ldw )

*

*

*            Update the unreduced submatrix A(i+kd:n,i+kd:n), using

*            an update of the form:  A := A - V*W' - W*V'

*

             CALL zher2k( uplo, 'No transpose', pn, pk,

     $                    -one, a( i+kd, i ), lda,

     $                           work( wpos ), ldw,

     $                    rone, a( i+kd, i+kd ), lda )

*            ==================================================================

*            RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED

*             DO 45 J = I, I+PK-1

*                LK = MIN( KD, N-J ) + 1

*                CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )

*   45        CONTINUE

*            ==================================================================

   40     CONTINUE

*

*        Copy the lower band to AB which is the band storage matrix

*

         DO 60 j = n-kd+1, n

            lk = min(kd, n-j) + 1

            CALL zcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )

   60    CONTINUE


      END IF

*

      work( 1 ) = lwmin

      RETURN

*

*     End of ZHETRD_HE2HB

*


      END

xerbla
subroutine xerbla(srname, info)
Definition cblat2.f:3285

zcopy
subroutine zcopy(n, zx, incx, zy, incy)
ZCOPY
Definition zcopy.f:81

zgelqf
subroutine zgelqf(m, n, a, lda, tau, work, lwork, info)
ZGELQF
Definition zgelqf.f:142

zgemm
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:188

zgeqrf
subroutine zgeqrf(m, n, a, lda, tau, work, lwork, info)
ZGEQRF
Definition zgeqrf.f:144

zhemm
subroutine zhemm(side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)
ZHEMM
Definition zhemm.f:191

zher2k
subroutine zher2k(uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZHER2K
Definition zher2k.f:198

zhetrd_he2hb
subroutine zhetrd_he2hb(uplo, n, kd, a, lda, ab, ldab, tau, work, lwork, info)
ZHETRD_HE2HB
Definition zhetrd_he2hb.f:243

zlarft
recursive subroutine zlarft(direct, storev, n, k, v, ldv, tau, t, ldt)
ZLARFT forms the triangular factor T of a block reflector H = I - vtvH
Definition zlarft.f:162

zlaset
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition zlaset.f:104