LAPACK 3.12.1
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ zhetrd_he2hb()

subroutine zhetrd_he2hb ( character uplo,
integer n,
integer kd,
complex*16, dimension( lda, * ) a,
integer lda,
complex*16, dimension( ldab, * ) ab,
integer ldab,
complex*16, dimension( * ) tau,
complex*16, dimension( * ) work,
integer lwork,
integer info )

ZHETRD_HE2HB

Download ZHETRD_HE2HB + dependencies [TGZ] [ZIP] [TXT]

Purpose:
!>
!> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian
!> band-diagonal form AB by a unitary similarity transformation:
!> Q**H * A * Q = AB.
!> 
Parameters
[in]UPLO
!>          UPLO is CHARACTER*1
!>          = 'U':  Upper triangle of A is stored;
!>          = 'L':  Lower triangle of A is stored.
!> 
[in]N
!>          N is INTEGER
!>          The order of the matrix A.  N >= 0.
!> 
[in]KD
!>          KD is INTEGER
!>          The number of superdiagonals of the reduced matrix if UPLO = 'U',
!>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
!>          The reduced matrix is stored in the array AB.
!> 
[in,out]A
!>          A is COMPLEX*16 array, dimension (LDA,N)
!>          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
!>          N-by-N upper triangular part of A contains the upper
!>          triangular part of the matrix A, and the strictly lower
!>          triangular part of A is not referenced.  If UPLO = 'L', the
!>          leading N-by-N lower triangular part of A contains the lower
!>          triangular part of the matrix A, and the strictly upper
!>          triangular part of A is not referenced.
!>          On exit, if UPLO = 'U', the diagonal and first superdiagonal
!>          of A are overwritten by the corresponding elements of the
!>          tridiagonal matrix T, and the elements above the first
!>          superdiagonal, with the array TAU, represent the unitary
!>          matrix Q as a product of elementary reflectors; if UPLO
!>          = 'L', the diagonal and first subdiagonal of A are over-
!>          written by the corresponding elements of the tridiagonal
!>          matrix T, and the elements below the first subdiagonal, with
!>          the array TAU, represent the unitary matrix Q as a product
!>          of elementary reflectors. See Further Details.
!> 
[in]LDA
!>          LDA is INTEGER
!>          The leading dimension of the array A.  LDA >= max(1,N).
!> 
[out]AB
!>          AB is COMPLEX*16 array, dimension (LDAB,N)
!>          On exit, the upper or lower triangle of the Hermitian band
!>          matrix A, stored in the first KD+1 rows of the array.  The
!>          j-th column of A is stored in the j-th column of the array AB
!>          as follows:
!>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
!>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
!> 
[in]LDAB
!>          LDAB is INTEGER
!>          The leading dimension of the array AB.  LDAB >= KD+1.
!> 
[out]TAU
!>          TAU is COMPLEX*16 array, dimension (N-KD)
!>          The scalar factors of the elementary reflectors (see Further
!>          Details).
!> 
[out]WORK
!>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
!>          On exit, if INFO = 0, or if LWORK = -1,
!>          WORK(1) returns the size of LWORK.
!> 
[in]LWORK
!>          LWORK is INTEGER
!>          The dimension of the array WORK which should be calculated
!>          by a workspace query.
!>          If N <= KD+1, LWORK >= 1, else LWORK = MAX(1, LWORK_QUERY).
!>
!>          If LWORK = -1, then a workspace query is assumed; the routine
!>          only calculates the optimal size of the WORK array, returns
!>          this value as the first entry of the WORK array, and no error
!>          message related to LWORK is issued by XERBLA.
!>          LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
!>          where FACTOPTNB is the blocking used by the QR or LQ
!>          algorithm, usually FACTOPTNB=128 is a good choice otherwise
!>          putting LWORK=-1 will provide the size of WORK.
!> 
[out]INFO
!>          INFO is INTEGER
!>          = 0:  successful exit
!>          < 0:  if INFO = -i, the i-th argument had an illegal value
!> 
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.
Further Details:
!>
!>  Implemented by Azzam Haidar.
!>
!>  All details are available on technical report, SC11, SC13 papers.
!>
!>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
!>  Parallel reduction to condensed forms for symmetric eigenvalue problems
!>  using aggregated fine-grained and memory-aware kernels. In Proceedings
!>  of 2011 International Conference for High Performance Computing,
!>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
!>  Article 8 , 11 pages.
!>  http://doi.acm.org/10.1145/2063384.2063394
!>
!>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
!>  An improved parallel singular value algorithm and its implementation 
!>  for multicore hardware, In Proceedings of 2013 International Conference
!>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
!>  Denver, Colorado, USA, 2013.
!>  Article 90, 12 pages.
!>  http://doi.acm.org/10.1145/2503210.2503292
!>
!>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
!>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
!>  calculations based on fine-grained memory aware tasks.
!>  International Journal of High Performance Computing Applications.
!>  Volume 28 Issue 2, Pages 196-209, May 2014.
!>  http://hpc.sagepub.com/content/28/2/196 
!>
!> 
!>
!>  If UPLO = 'U', the matrix Q is represented as a product of elementary
!>  reflectors
!>
!>     Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.
!>
!>  Each H(i) has the form
!>
!>     H(i) = I - tau * v * v**H
!>
!>  where tau is a complex scalar, and v is a complex vector with
!>  v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
!>  A(i,i+kd+1:n), and tau in TAU(i).
!>
!>  If UPLO = 'L', the matrix Q is represented as a product of elementary
!>  reflectors
!>
!>     Q = H(1) H(2) . . . H(k), where k = n-kd.
!>
!>  Each H(i) has the form
!>
!>     H(i) = I - tau * v * v**H
!>
!>  where tau is a complex scalar, and v is a complex vector with
!>  v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
!>  A(i+kd+2:n,i), and tau in TAU(i).
!>
!>  The contents of A on exit are illustrated by the following examples
!>  with n = 5:
!>
!>  if UPLO = 'U':                       if UPLO = 'L':
!>
!>    (  ab  ab/v1  v1      v1     v1    )              (  ab                            )
!>    (      ab     ab/v2   v2     v2    )              (  ab/v1  ab                     )
!>    (             ab      ab/v3  v3    )              (  v1     ab/v2  ab              )
!>    (                     ab     ab/v4 )              (  v1     v2     ab/v3  ab       )
!>    (                            ab    )              (  v1     v2     v3     ab/v4 ab )
!>
!>  where d and e denote diagonal and off-diagonal elements of T, and vi
!>  denotes an element of the vector defining H(i).
!> 

Definition at line 241 of file zhetrd_he2hb.f.

243*
244 IMPLICIT NONE
245*
246* -- LAPACK computational routine --
247* -- LAPACK is a software package provided by Univ. of Tennessee, --
248* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
249*
250* .. Scalar Arguments ..
251 CHARACTER UPLO
252 INTEGER INFO, LDA, LDAB, LWORK, N, KD
253* ..
254* .. Array Arguments ..
255 COMPLEX*16 A( LDA, * ), AB( LDAB, * ),
256 $ TAU( * ), WORK( * )
257* ..
258*
259* =====================================================================
260*
261* .. Parameters ..
262 DOUBLE PRECISION RONE
263 COMPLEX*16 ZERO, ONE, HALF
264 parameter( rone = 1.0d+0,
265 $ zero = ( 0.0d+0, 0.0d+0 ),
266 $ one = ( 1.0d+0, 0.0d+0 ),
267 $ half = ( 0.5d+0, 0.0d+0 ) )
268* ..
269* .. Local Scalars ..
270 LOGICAL LQUERY, UPPER
271 INTEGER I, J, IINFO, LWMIN, PN, PK, LK,
272 $ LDT, LDW, LDS2, LDS1,
273 $ LS2, LS1, LW, LT,
274 $ TPOS, WPOS, S2POS, S1POS
275* ..
276* .. External Subroutines ..
277 EXTERNAL xerbla, zher2k, zhemm, zgemm,
278 $ zcopy,
280* ..
281* .. Intrinsic Functions ..
282 INTRINSIC min, max
283* ..
284* .. External Functions ..
285 LOGICAL LSAME
286 INTEGER ILAENV2STAGE
287 EXTERNAL lsame, ilaenv2stage
288* ..
289* .. Executable Statements ..
290*
291* Determine the minimal workspace size required
292* and test the input parameters
293*
294 info = 0
295 upper = lsame( uplo, 'U' )
296 lquery = ( lwork.EQ.-1 )
297 IF( n.LE.kd+1 ) THEN
298 lwmin = 1
299 ELSE
300 lwmin = ilaenv2stage( 4, 'ZHETRD_HE2HB', '', n, kd, -1, -1 )
301 END IF
302*
303 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
304 info = -1
305 ELSE IF( n.LT.0 ) THEN
306 info = -2
307 ELSE IF( kd.LT.0 ) THEN
308 info = -3
309 ELSE IF( lda.LT.max( 1, n ) ) THEN
310 info = -5
311 ELSE IF( ldab.LT.max( 1, kd+1 ) ) THEN
312 info = -7
313 ELSE IF( lwork.LT.lwmin .AND. .NOT.lquery ) THEN
314 info = -10
315 END IF
316*
317 IF( info.NE.0 ) THEN
318 CALL xerbla( 'ZHETRD_HE2HB', -info )
319 RETURN
320 ELSE IF( lquery ) THEN
321 work( 1 ) = lwmin
322 RETURN
323 END IF
324*
325* Quick return if possible
326* Copy the upper/lower portion of A into AB
327*
328 IF( n.LE.kd+1 ) THEN
329 IF( upper ) THEN
330 DO 100 i = 1, n
331 lk = min( kd+1, i )
332 CALL zcopy( lk, a( i-lk+1, i ), 1,
333 $ ab( kd+1-lk+1, i ), 1 )
334 100 CONTINUE
335 ELSE
336 DO 110 i = 1, n
337 lk = min( kd+1, n-i+1 )
338 CALL zcopy( lk, a( i, i ), 1, ab( 1, i ), 1 )
339 110 CONTINUE
340 ENDIF
341 work( 1 ) = 1
342 RETURN
343 END IF
344*
345* Determine the pointer position for the workspace
346*
347 ldt = kd
348 lds1 = kd
349 lt = ldt*kd
350 lw = n*kd
351 ls1 = lds1*kd
352 ls2 = lwmin - lt - lw - ls1
353* LS2 = N*MAX(KD,FACTOPTNB)
354 tpos = 1
355 wpos = tpos + lt
356 s1pos = wpos + lw
357 s2pos = s1pos + ls1
358 IF( upper ) THEN
359 ldw = kd
360 lds2 = kd
361 ELSE
362 ldw = n
363 lds2 = n
364 ENDIF
365*
366*
367* Set the workspace of the triangular matrix T to zero once such a
368* way every time T is generated the upper/lower portion will be always zero
369*
370 CALL zlaset( "A", ldt, kd, zero, zero, work( tpos ), ldt )
371*
372 IF( upper ) THEN
373 DO 10 i = 1, n - kd, kd
374 pn = n-i-kd+1
375 pk = min( n-i-kd+1, kd )
376*
377* Compute the LQ factorization of the current block
378*
379 CALL zgelqf( kd, pn, a( i, i+kd ), lda,
380 $ tau( i ), work( s2pos ), ls2, iinfo )
381*
382* Copy the upper portion of A into AB
383*
384 DO 20 j = i, i+pk-1
385 lk = min( kd, n-j ) + 1
386 CALL zcopy( lk, a( j, j ), lda, ab( kd+1, j ),
387 $ ldab-1 )
388 20 CONTINUE
389*
390 CALL zlaset( 'Lower', pk, pk, zero, one,
391 $ a( i, i+kd ), lda )
392*
393* Form the matrix T
394*
395 CALL zlarft( 'Forward', 'Rowwise', pn, pk,
396 $ a( i, i+kd ), lda, tau( i ),
397 $ work( tpos ), ldt )
398*
399* Compute W:
400*
401 CALL zgemm( 'Conjugate', 'No transpose', pk, pn, pk,
402 $ one, work( tpos ), ldt,
403 $ a( i, i+kd ), lda,
404 $ zero, work( s2pos ), lds2 )
405*
406 CALL zhemm( 'Right', uplo, pk, pn,
407 $ one, a( i+kd, i+kd ), lda,
408 $ work( s2pos ), lds2,
409 $ zero, work( wpos ), ldw )
410*
411 CALL zgemm( 'No transpose', 'Conjugate', pk, pk, pn,
412 $ one, work( wpos ), ldw,
413 $ work( s2pos ), lds2,
414 $ zero, work( s1pos ), lds1 )
415*
416 CALL zgemm( 'No transpose', 'No transpose', pk, pn, pk,
417 $ -half, work( s1pos ), lds1,
418 $ a( i, i+kd ), lda,
419 $ one, work( wpos ), ldw )
420*
421*
422* Update the unreduced submatrix A(i+kd:n,i+kd:n), using
423* an update of the form: A := A - V'*W - W'*V
424*
425 CALL zher2k( uplo, 'Conjugate', pn, pk,
426 $ -one, a( i, i+kd ), lda,
427 $ work( wpos ), ldw,
428 $ rone, a( i+kd, i+kd ), lda )
429 10 CONTINUE
430*
431* Copy the upper band to AB which is the band storage matrix
432*
433 DO 30 j = n-kd+1, n
434 lk = min(kd, n-j) + 1
435 CALL zcopy( lk, a( j, j ), lda, ab( kd+1, j ), ldab-1 )
436 30 CONTINUE
437*
438 ELSE
439*
440* Reduce the lower triangle of A to lower band matrix
441*
442 DO 40 i = 1, n - kd, kd
443 pn = n-i-kd+1
444 pk = min( n-i-kd+1, kd )
445*
446* Compute the QR factorization of the current block
447*
448 CALL zgeqrf( pn, kd, a( i+kd, i ), lda,
449 $ tau( i ), work( s2pos ), ls2, iinfo )
450*
451* Copy the upper portion of A into AB
452*
453 DO 50 j = i, i+pk-1
454 lk = min( kd, n-j ) + 1
455 CALL zcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )
456 50 CONTINUE
457*
458 CALL zlaset( 'Upper', pk, pk, zero, one,
459 $ a( i+kd, i ), lda )
460*
461* Form the matrix T
462*
463 CALL zlarft( 'Forward', 'Columnwise', pn, pk,
464 $ a( i+kd, i ), lda, tau( i ),
465 $ work( tpos ), ldt )
466*
467* Compute W:
468*
469 CALL zgemm( 'No transpose', 'No transpose', pn, pk, pk,
470 $ one, a( i+kd, i ), lda,
471 $ work( tpos ), ldt,
472 $ zero, work( s2pos ), lds2 )
473*
474 CALL zhemm( 'Left', uplo, pn, pk,
475 $ one, a( i+kd, i+kd ), lda,
476 $ work( s2pos ), lds2,
477 $ zero, work( wpos ), ldw )
478*
479 CALL zgemm( 'Conjugate', 'No transpose', pk, pk, pn,
480 $ one, work( s2pos ), lds2,
481 $ work( wpos ), ldw,
482 $ zero, work( s1pos ), lds1 )
483*
484 CALL zgemm( 'No transpose', 'No transpose', pn, pk, pk,
485 $ -half, a( i+kd, i ), lda,
486 $ work( s1pos ), lds1,
487 $ one, work( wpos ), ldw )
488*
489*
490* Update the unreduced submatrix A(i+kd:n,i+kd:n), using
491* an update of the form: A := A - V*W' - W*V'
492*
493 CALL zher2k( uplo, 'No transpose', pn, pk,
494 $ -one, a( i+kd, i ), lda,
495 $ work( wpos ), ldw,
496 $ rone, a( i+kd, i+kd ), lda )
497* ==================================================================
498* RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
499* DO 45 J = I, I+PK-1
500* LK = MIN( KD, N-J ) + 1
501* CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
502* 45 CONTINUE
503* ==================================================================
504 40 CONTINUE
505*
506* Copy the lower band to AB which is the band storage matrix
507*
508 DO 60 j = n-kd+1, n
509 lk = min(kd, n-j) + 1
510 CALL zcopy( lk, a( j, j ), 1, ab( 1, j ), 1 )
511 60 CONTINUE
512
513 END IF
514*
515 work( 1 ) = lwmin
516 RETURN
517*
518* End of ZHETRD_HE2HB
519*
subroutine xerbla(srname, info)
Definition cblat2.f:3285
subroutine zcopy(n, zx, incx, zy, incy)
ZCOPY
Definition zcopy.f:81
subroutine zgelqf(m, n, a, lda, tau, work, lwork, info)
ZGELQF
Definition zgelqf.f:142
subroutine zgemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZGEMM
Definition zgemm.f:188
subroutine zgeqrf(m, n, a, lda, tau, work, lwork, info)
ZGEQRF
Definition zgeqrf.f:144
subroutine zhemm(side, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc)
ZHEMM
Definition zhemm.f:191
subroutine zher2k(uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
ZHER2K
Definition zher2k.f:198
integer function ilaenv2stage(ispec, name, opts, n1, n2, n3, n4)
ILAENV2STAGE
recursive subroutine zlarft(direct, storev, n, k, v, ldv, tau, t, ldt)
ZLARFT forms the triangular factor T of a block reflector H = I - vtvH
Definition zlarft.f:162
subroutine zlaset(uplo, m, n, alpha, beta, a, lda)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
Definition zlaset.f:104
logical function lsame(ca, cb)
LSAME
Definition lsame.f:48
Here is the call graph for this function:
Here is the caller graph for this function: