LAPACK 3.11.0
LAPACK: Linear Algebra PACKage
Loading...
Searching...
No Matches

◆ zhetrf_aa()

subroutine zhetrf_aa ( character  UPLO,
integer  N,
complex*16, dimension( lda, * )  A,
integer  LDA,
integer, dimension( * )  IPIV,
complex*16, dimension( * )  WORK,
integer  LWORK,
integer  INFO 
)

ZHETRF_AA

Download ZHETRF_AA + dependencies [TGZ] [ZIP] [TXT]

Purpose:
 ZHETRF_AA computes the factorization of a complex hermitian matrix A
 using the Aasen's algorithm.  The form of the factorization is

    A = U**H*T*U  or  A = L*T*L**H

 where U (or L) is a product of permutation and unit upper (lower)
 triangular matrices, and T is a hermitian tridiagonal matrix.

 This is the blocked version of the algorithm, calling Level 3 BLAS.
Parameters
[in]UPLO
          UPLO is CHARACTER*1
          = 'U':  Upper triangle of A is stored;
          = 'L':  Lower triangle of A is stored.
[in]N
          N is INTEGER
          The order of the matrix A.  N >= 0.
[in,out]A
          A is COMPLEX*16 array, dimension (LDA,N)
          On entry, the hermitian matrix A.  If UPLO = 'U', the leading
          N-by-N upper triangular part of A contains the upper
          triangular part of the matrix A, and the strictly lower
          triangular part of A is not referenced.  If UPLO = 'L', the
          leading N-by-N lower triangular part of A contains the lower
          triangular part of the matrix A, and the strictly upper
          triangular part of A is not referenced.

          On exit, the tridiagonal matrix is stored in the diagonals
          and the subdiagonals of A just below (or above) the diagonals,
          and L is stored below (or above) the subdiaonals, when UPLO
          is 'L' (or 'U').
[in]LDA
          LDA is INTEGER
          The leading dimension of the array A.  LDA >= max(1,N).
[out]IPIV
          IPIV is INTEGER array, dimension (N)
          On exit, it contains the details of the interchanges, i.e.,
          the row and column k of A were interchanged with the
          row and column IPIV(k).
[out]WORK
          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
[in]LWORK
          LWORK is INTEGER
          The length of WORK. LWORK >= MAX(1,2*N). For optimum performance
          LWORK >= N*(1+NB), where NB is the optimal blocksize.

          If LWORK = -1, then a workspace query is assumed; the routine
          only calculates the optimal size of the WORK array, returns
          this value as the first entry of the WORK array, and no error
          message related to LWORK is issued by XERBLA.
[out]INFO
          INFO is INTEGER
          = 0:  successful exit
          < 0:  if INFO = -i, the i-th argument had an illegal value.
Author
Univ. of Tennessee
Univ. of California Berkeley
Univ. of Colorado Denver
NAG Ltd.

Definition at line 131 of file zhetrf_aa.f.

132*
133* -- LAPACK computational routine --
134* -- LAPACK is a software package provided by Univ. of Tennessee, --
135* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
136*
137 IMPLICIT NONE
138*
139* .. Scalar Arguments ..
140 CHARACTER UPLO
141 INTEGER N, LDA, LWORK, INFO
142* ..
143* .. Array Arguments ..
144 INTEGER IPIV( * )
145 COMPLEX*16 A( LDA, * ), WORK( * )
146* ..
147*
148* =====================================================================
149* .. Parameters ..
150 COMPLEX*16 ZERO, ONE
151 parameter( zero = (0.0d+0, 0.0d+0), one = (1.0d+0, 0.0d+0) )
152*
153* .. Local Scalars ..
154 LOGICAL LQUERY, UPPER
155 INTEGER J, LWKOPT
156 INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
157 COMPLEX*16 ALPHA
158* ..
159* .. External Functions ..
160 LOGICAL LSAME
161 INTEGER ILAENV
162 EXTERNAL lsame, ilaenv
163* ..
164* .. External Subroutines ..
166* ..
167* .. Intrinsic Functions ..
168 INTRINSIC dble, dconjg, max
169* ..
170* .. Executable Statements ..
171*
172* Determine the block size
173*
174 nb = ilaenv( 1, 'ZHETRF_AA', uplo, n, -1, -1, -1 )
175*
176* Test the input parameters.
177*
178 info = 0
179 upper = lsame( uplo, 'U' )
180 lquery = ( lwork.EQ.-1 )
181 IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
182 info = -1
183 ELSE IF( n.LT.0 ) THEN
184 info = -2
185 ELSE IF( lda.LT.max( 1, n ) ) THEN
186 info = -4
187 ELSE IF( lwork.LT.max( 1, 2*n ) .AND. .NOT.lquery ) THEN
188 info = -7
189 END IF
190*
191 IF( info.EQ.0 ) THEN
192 lwkopt = (nb+1)*n
193 work( 1 ) = lwkopt
194 END IF
195*
196 IF( info.NE.0 ) THEN
197 CALL xerbla( 'ZHETRF_AA', -info )
198 RETURN
199 ELSE IF( lquery ) THEN
200 RETURN
201 END IF
202*
203* Quick return
204*
205 IF ( n.EQ.0 ) THEN
206 RETURN
207 ENDIF
208 ipiv( 1 ) = 1
209 IF ( n.EQ.1 ) THEN
210 a( 1, 1 ) = dble( a( 1, 1 ) )
211 RETURN
212 END IF
213*
214* Adjust block size based on the workspace size
215*
216 IF( lwork.LT.((1+nb)*n) ) THEN
217 nb = ( lwork-n ) / n
218 END IF
219*
220 IF( upper ) THEN
221*
222* .....................................................
223* Factorize A as U**H*D*U using the upper triangle of A
224* .....................................................
225*
226* copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
227*
228 CALL zcopy( n, a( 1, 1 ), lda, work( 1 ), 1 )
229*
230* J is the main loop index, increasing from 1 to N in steps of
231* JB, where JB is the number of columns factorized by ZLAHEF;
232* JB is either NB, or N-J+1 for the last block
233*
234 j = 0
235 10 CONTINUE
236 IF( j.GE.n )
237 $ GO TO 20
238*
239* each step of the main loop
240* J is the last column of the previous panel
241* J1 is the first column of the current panel
242* K1 identifies if the previous column of the panel has been
243* explicitly stored, e.g., K1=1 for the first panel, and
244* K1=0 for the rest
245*
246 j1 = j + 1
247 jb = min( n-j1+1, nb )
248 k1 = max(1, j)-j
249*
250* Panel factorization
251*
252 CALL zlahef_aa( uplo, 2-k1, n-j, jb,
253 $ a( max(1, j), j+1 ), lda,
254 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
255*
256* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
257*
258 DO j2 = j+2, min(n, j+jb+1)
259 ipiv( j2 ) = ipiv( j2 ) + j
260 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
261 CALL zswap( j1-k1-2, a( 1, j2 ), 1,
262 $ a( 1, ipiv(j2) ), 1 )
263 END IF
264 END DO
265 j = j + jb
266*
267* Trailing submatrix update, where
268* the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
269* WORK stores the current block of the auxiriarly matrix H
270*
271 IF( j.LT.n ) THEN
272*
273* if the first panel and JB=1 (NB=1), then nothing to do
274*
275 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
276*
277* Merge rank-1 update with BLAS-3 update
278*
279 alpha = dconjg( a( j, j+1 ) )
280 a( j, j+1 ) = one
281 CALL zcopy( n-j, a( j-1, j+1 ), lda,
282 $ work( (j+1-j1+1)+jb*n ), 1 )
283 CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
284*
285* K1 identifies if the previous column of the panel has been
286* explicitly stored, e.g., K1=0 and K2=1 for the first panel,
287* and K1=1 and K2=0 for the rest
288*
289 IF( j1.GT.1 ) THEN
290*
291* Not first panel
292*
293 k2 = 1
294 ELSE
295*
296* First panel
297*
298 k2 = 0
299*
300* First update skips the first column
301*
302 jb = jb - 1
303 END IF
304*
305 DO j2 = j+1, n, nb
306 nj = min( nb, n-j2+1 )
307*
308* Update (J2, J2) diagonal block with ZGEMV
309*
310 j3 = j2
311 DO mj = nj-1, 1, -1
312 CALL zgemm( 'Conjugate transpose', 'Transpose',
313 $ 1, mj, jb+1,
314 $ -one, a( j1-k2, j3 ), lda,
315 $ work( (j3-j1+1)+k1*n ), n,
316 $ one, a( j3, j3 ), lda )
317 j3 = j3 + 1
318 END DO
319*
320* Update off-diagonal block of J2-th block row with ZGEMM
321*
322 CALL zgemm( 'Conjugate transpose', 'Transpose',
323 $ nj, n-j3+1, jb+1,
324 $ -one, a( j1-k2, j2 ), lda,
325 $ work( (j3-j1+1)+k1*n ), n,
326 $ one, a( j2, j3 ), lda )
327 END DO
328*
329* Recover T( J, J+1 )
330*
331 a( j, j+1 ) = dconjg( alpha )
332 END IF
333*
334* WORK(J+1, 1) stores H(J+1, 1)
335*
336 CALL zcopy( n-j, a( j+1, j+1 ), lda, work( 1 ), 1 )
337 END IF
338 GO TO 10
339 ELSE
340*
341* .....................................................
342* Factorize A as L*D*L**H using the lower triangle of A
343* .....................................................
344*
345* copy first column A(1:N, 1) into H(1:N, 1)
346* (stored in WORK(1:N))
347*
348 CALL zcopy( n, a( 1, 1 ), 1, work( 1 ), 1 )
349*
350* J is the main loop index, increasing from 1 to N in steps of
351* JB, where JB is the number of columns factorized by ZLAHEF;
352* JB is either NB, or N-J+1 for the last block
353*
354 j = 0
355 11 CONTINUE
356 IF( j.GE.n )
357 $ GO TO 20
358*
359* each step of the main loop
360* J is the last column of the previous panel
361* J1 is the first column of the current panel
362* K1 identifies if the previous column of the panel has been
363* explicitly stored, e.g., K1=1 for the first panel, and
364* K1=0 for the rest
365*
366 j1 = j+1
367 jb = min( n-j1+1, nb )
368 k1 = max(1, j)-j
369*
370* Panel factorization
371*
372 CALL zlahef_aa( uplo, 2-k1, n-j, jb,
373 $ a( j+1, max(1, j) ), lda,
374 $ ipiv( j+1 ), work, n, work( n*nb+1 ) )
375*
376* Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
377*
378 DO j2 = j+2, min(n, j+jb+1)
379 ipiv( j2 ) = ipiv( j2 ) + j
380 IF( (j2.NE.ipiv(j2)) .AND. ((j1-k1).GT.2) ) THEN
381 CALL zswap( j1-k1-2, a( j2, 1 ), lda,
382 $ a( ipiv(j2), 1 ), lda )
383 END IF
384 END DO
385 j = j + jb
386*
387* Trailing submatrix update, where
388* A(J2+1, J1-1) stores L(J2+1, J1) and
389* WORK(J2+1, 1) stores H(J2+1, 1)
390*
391 IF( j.LT.n ) THEN
392*
393* if the first panel and JB=1 (NB=1), then nothing to do
394*
395 IF( j1.GT.1 .OR. jb.GT.1 ) THEN
396*
397* Merge rank-1 update with BLAS-3 update
398*
399 alpha = dconjg( a( j+1, j ) )
400 a( j+1, j ) = one
401 CALL zcopy( n-j, a( j+1, j-1 ), 1,
402 $ work( (j+1-j1+1)+jb*n ), 1 )
403 CALL zscal( n-j, alpha, work( (j+1-j1+1)+jb*n ), 1 )
404*
405* K1 identifies if the previous column of the panel has been
406* explicitly stored, e.g., K1=0 and K2=1 for the first panel,
407* and K1=1 and K2=0 for the rest
408*
409 IF( j1.GT.1 ) THEN
410*
411* Not first panel
412*
413 k2 = 1
414 ELSE
415*
416* First panel
417*
418 k2 = 0
419*
420* First update skips the first column
421*
422 jb = jb - 1
423 END IF
424*
425 DO j2 = j+1, n, nb
426 nj = min( nb, n-j2+1 )
427*
428* Update (J2, J2) diagonal block with ZGEMV
429*
430 j3 = j2
431 DO mj = nj-1, 1, -1
432 CALL zgemm( 'No transpose', 'Conjugate transpose',
433 $ mj, 1, jb+1,
434 $ -one, work( (j3-j1+1)+k1*n ), n,
435 $ a( j3, j1-k2 ), lda,
436 $ one, a( j3, j3 ), lda )
437 j3 = j3 + 1
438 END DO
439*
440* Update off-diagonal block of J2-th block column with ZGEMM
441*
442 CALL zgemm( 'No transpose', 'Conjugate transpose',
443 $ n-j3+1, nj, jb+1,
444 $ -one, work( (j3-j1+1)+k1*n ), n,
445 $ a( j2, j1-k2 ), lda,
446 $ one, a( j3, j2 ), lda )
447 END DO
448*
449* Recover T( J+1, J )
450*
451 a( j+1, j ) = dconjg( alpha )
452 END IF
453*
454* WORK(J+1, 1) stores H(J+1, 1)
455*
456 CALL zcopy( n-j, a( j+1, j+1 ), 1, work( 1 ), 1 )
457 END IF
458 GO TO 11
459 END IF
460*
461 20 CONTINUE
462 work( 1 ) = lwkopt
463 RETURN
464*
465* End of ZHETRF_AA
466*
integer function ilaenv(ISPEC, NAME, OPTS, N1, N2, N3, N4)
ILAENV
Definition: ilaenv.f:162
subroutine xerbla(SRNAME, INFO)
XERBLA
Definition: xerbla.f:60
logical function lsame(CA, CB)
LSAME
Definition: lsame.f:53
subroutine zswap(N, ZX, INCX, ZY, INCY)
ZSWAP
Definition: zswap.f:81
subroutine zscal(N, ZA, ZX, INCX)
ZSCAL
Definition: zscal.f:78
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
Definition: zcopy.f:81
subroutine zgemv(TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY)
ZGEMV
Definition: zgemv.f:158
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
Definition: zgemm.f:187
subroutine zlahef_aa(UPLO, J1, M, NB, A, LDA, IPIV, H, LDH, WORK)
ZLAHEF_AA
Definition: zlahef_aa.f:144
Here is the call graph for this function:
Here is the caller graph for this function: