159 $ IPIV2, WORK, LWORK, INFO )
169 INTEGER N, LDA, LTB, LWORK, INFO
172 INTEGER IPIV( * ), IPIV2( * )
173 COMPLEX*16 A( LDA, * ), TB( * ), WORK( * )
178 COMPLEX*16 CZERO, CONE
179 parameter( czero = ( 0.0d+0, 0.0d+0 ),
180 $ cone = ( 1.0d+0, 0.0d+0 ) )
183 LOGICAL UPPER, TQUERY, WQUERY
184 INTEGER I, J, K, I1, I2, TD
185 INTEGER LDTB, NB, KB, JB, NT, IINFO
191 EXTERNAL lsame, ilaenv
205 upper = lsame( uplo,
'U' )
206 wquery = ( lwork.EQ.-1 )
207 tquery = ( ltb.EQ.-1 )
208 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
210 ELSE IF( n.LT.0 )
THEN
212 ELSE IF( lda.LT.max( 1, n ) )
THEN
214 ELSE IF ( ltb .LT. 4*n .AND. .NOT.tquery )
THEN
216 ELSE IF ( lwork .LT. n .AND. .NOT.wquery )
THEN
221 CALL xerbla(
'ZSYTRF_AA_2STAGE', -info )
227 nb = ilaenv( 1,
'ZSYTRF_AA_2STAGE', uplo, n, -1, -1, -1 )
236 IF( tquery .OR. wquery )
THEN
249 IF( ldtb .LT. 3*nb+1 )
THEN
252 IF( lwork .LT. nb*n )
THEN
286 IF( i .EQ. (j-1) )
THEN
291 CALL zgemm(
'NoTranspose',
'NoTranspose',
293 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
294 $ a( (i-1)*nb+1, j*nb+1 ), lda,
295 $ czero, work( i*nb+1 ), n )
298 IF( i .EQ. (j-1) )
THEN
303 CALL zgemm(
'NoTranspose',
'NoTranspose',
305 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
307 $ a( (i-2)*nb+1, j*nb+1 ), lda,
308 $ czero, work( i*nb+1 ), n )
314 CALL zlacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
315 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
318 CALL zgemm(
'Transpose',
'NoTranspose',
320 $ -cone, a( 1, j*nb+1 ), lda,
322 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
324 CALL zgemm(
'Transpose',
'NoTranspose',
326 $ cone, a( (j-1)*nb+1, j*nb+1 ), lda,
327 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
328 $ czero, work( 1 ), n )
329 CALL zgemm(
'NoTranspose',
'NoTranspose',
331 $ -cone, work( 1 ), n,
332 $ a( (j-2)*nb+1, j*nb+1 ), lda,
333 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
340 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
341 $ = tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
348 CALL ztrsm(
'L',
'U',
'T',
'N', kb, kb, cone,
349 $ a( (j-1)*nb+1, j*nb+1 ), lda,
350 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
351 CALL ztrsm(
'R',
'U',
'N',
'N', kb, kb, cone,
352 $ a( (j-1)*nb+1, j*nb+1 ), lda,
353 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
362 CALL zgemm(
'NoTranspose',
'NoTranspose',
364 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
365 $ a( (j-1)*nb+1, j*nb+1 ), lda,
366 $ czero, work( j*nb+1 ), n )
368 CALL zgemm(
'NoTranspose',
'NoTranspose',
370 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
372 $ a( (j-2)*nb+1, j*nb+1 ), lda,
373 $ czero, work( j*nb+1 ), n )
378 CALL zgemm(
'Transpose',
'NoTranspose',
379 $ nb, n-(j+1)*nb, j*nb,
380 $ -cone, work( nb+1 ), n,
381 $ a( 1, (j+1)*nb+1 ), lda,
382 $ cone, a( j*nb+1, (j+1)*nb+1 ), lda )
388 CALL zcopy( n-(j+1)*nb,
389 $ a( j*nb+k, (j+1)*nb+1 ), lda,
390 $ work( 1+(k-1)*n ), 1 )
395 CALL zgetrf( n-(j+1)*nb, nb,
397 $ ipiv( (j+1)*nb+1 ), iinfo )
405 CALL zcopy( n-(j+1)*nb,
406 $ work( 1+(k-1)*n ), 1,
407 $ a( j*nb+k, (j+1)*nb+1 ), lda )
412 kb = min(nb, n-(j+1)*nb)
413 CALL zlaset(
'Full', kb, nb, czero, czero,
414 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
415 CALL zlacpy(
'Upper', kb, nb,
417 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
419 CALL ztrsm(
'R',
'U',
'N',
'U', kb, nb, cone,
420 $ a( (j-1)*nb+1, j*nb+1 ), lda,
421 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
429 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
430 $ = tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
433 CALL zlaset(
'Lower', kb, nb, czero, cone,
434 $ a( j*nb+1, (j+1)*nb+1), lda )
440 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
443 i2 = ipiv( (j+1)*nb+k )
446 CALL zswap( k-1, a( (j+1)*nb+1, i1 ), 1,
447 $ a( (j+1)*nb+1, i2 ), 1 )
450 $
CALL zswap( i2-i1-1, a( i1, i1+1 ), lda,
454 $
CALL zswap( n-i2, a( i1, i2+1 ), lda,
455 $ a( i2, i2+1 ), lda )
458 a( i1, i1 ) = a( i2, i2 )
462 CALL zswap( j*nb, a( 1, i1 ), 1,
483 IF( i .EQ. (j-1) )
THEN
488 CALL zgemm(
'NoTranspose',
'Transpose',
490 $ cone, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
491 $ a( j*nb+1, (i-1)*nb+1 ), lda,
492 $ czero, work( i*nb+1 ), n )
495 IF( i .EQ. (j-1) )
THEN
500 CALL zgemm(
'NoTranspose',
'Transpose',
502 $ cone, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
504 $ a( j*nb+1, (i-2)*nb+1 ), lda,
505 $ czero, work( i*nb+1 ), n )
511 CALL zlacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
512 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
515 CALL zgemm(
'NoTranspose',
'NoTranspose',
517 $ -cone, a( j*nb+1, 1 ), lda,
519 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
521 CALL zgemm(
'NoTranspose',
'NoTranspose',
523 $ cone, a( j*nb+1, (j-1)*nb+1 ), lda,
524 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
525 $ czero, work( 1 ), n )
526 CALL zgemm(
'NoTranspose',
'Transpose',
528 $ -cone, work( 1 ), n,
529 $ a( j*nb+1, (j-2)*nb+1 ), lda,
530 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
537 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
538 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
545 CALL ztrsm(
'L',
'L',
'N',
'N', kb, kb, cone,
546 $ a( j*nb+1, (j-1)*nb+1 ), lda,
547 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
548 CALL ztrsm(
'R',
'L',
'T',
'N', kb, kb, cone,
549 $ a( j*nb+1, (j-1)*nb+1 ), lda,
550 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
557 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
558 $ = tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
568 CALL zgemm(
'NoTranspose',
'Transpose',
570 $ cone, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
571 $ a( j*nb+1, (j-1)*nb+1 ), lda,
572 $ czero, work( j*nb+1 ), n )
574 CALL zgemm(
'NoTranspose',
'Transpose',
576 $ cone, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
578 $ a( j*nb+1, (j-2)*nb+1 ), lda,
579 $ czero, work( j*nb+1 ), n )
584 CALL zgemm(
'NoTranspose',
'NoTranspose',
585 $ n-(j+1)*nb, nb, j*nb,
586 $ -cone, a( (j+1)*nb+1, 1 ), lda,
588 $ cone, a( (j+1)*nb+1, j*nb+1 ), lda )
593 CALL zgetrf( n-(j+1)*nb, nb,
594 $ a( (j+1)*nb+1, j*nb+1 ), lda,
595 $ ipiv( (j+1)*nb+1 ), iinfo )
602 kb = min(nb, n-(j+1)*nb)
603 CALL zlaset(
'Full', kb, nb, czero, czero,
604 $ tb( td+nb+1 + (j*nb)*ldtb), ldtb-1 )
605 CALL zlacpy(
'Upper', kb, nb,
606 $ a( (j+1)*nb+1, j*nb+1 ), lda,
607 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
609 CALL ztrsm(
'R',
'L',
'T',
'U', kb, nb, cone,
610 $ a( j*nb+1, (j-1)*nb+1 ), lda,
611 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
619 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb ) =
620 $ tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb )
623 CALL zlaset(
'Upper', kb, nb, czero, cone,
624 $ a( (j+1)*nb+1, j*nb+1 ), lda )
630 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
633 i2 = ipiv( (j+1)*nb+k )
636 CALL zswap( k-1, a( i1, (j+1)*nb+1 ), lda,
637 $ a( i2, (j+1)*nb+1 ), lda )
640 $
CALL zswap( i2-i1-1, a( i1+1, i1 ), 1,
641 $ a( i2, i1+1 ), lda )
644 $
CALL zswap( n-i2, a( i2+1, i1 ), 1,
648 a( i1, i1 ) = a( i2, i2 )
652 CALL zswap( j*nb, a( i1, 1 ), lda,
667 CALL zgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )
subroutine xerbla(SRNAME, INFO)
XERBLA
subroutine zswap(N, ZX, INCX, ZY, INCY)
ZSWAP
subroutine zcopy(N, ZX, INCX, ZY, INCY)
ZCOPY
subroutine zgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
ZGEMM
subroutine ztrsm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
ZTRSM
subroutine zgbtrf(M, N, KL, KU, AB, LDAB, IPIV, INFO)
ZGBTRF
subroutine zlaswp(N, A, LDA, K1, K2, IPIV, INCX)
ZLASWP performs a series of row interchanges on a general rectangular matrix.
subroutine zlacpy(UPLO, M, N, A, LDA, B, LDB)
ZLACPY copies all or part of one two-dimensional array to another.
subroutine zlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values.
subroutine zsytrf_aa_2stage(UPLO, N, A, LDA, TB, LTB, IPIV, IPIV2, WORK, LWORK, INFO)
ZSYTRF_AA_2STAGE
subroutine zgetrf(M, N, A, LDA, IPIV, INFO)
ZGETRF VARIANT: Crout Level 3 BLAS version of the algorithm.