157 $ IPIV2, WORK, LWORK, INFO )
167 INTEGER N, LDA, LTB, LWORK, INFO
170 INTEGER IPIV( * ), IPIV2( * )
171 COMPLEX*16 A( LDA, * ), TB( * ), WORK( * )
177 parameter( zero = ( 0.0e+0, 0.0e+0 ),
178 $ one = ( 1.0e+0, 0.0e+0 ) )
181 LOGICAL UPPER, TQUERY, WQUERY
182 INTEGER I, J, K, I1, I2, TD
183 INTEGER LWKOPT, LDTB, NB, KB, JB, NT, IINFO
189 EXTERNAL lsame, ilaenv
197 INTRINSIC dconjg, min, max
204 upper = lsame( uplo,
'U' )
205 wquery = ( lwork.EQ.-1 )
206 tquery = ( ltb.EQ.-1 )
207 IF( .NOT.upper .AND. .NOT.lsame( uplo,
'L' ) )
THEN
209 ELSE IF( n.LT.0 )
THEN
211 ELSE IF( lda.LT.max( 1, n ) )
THEN
213 ELSE IF( ltb.LT.max( 1, 4*n ) .AND. .NOT.tquery )
THEN
215 ELSE IF( lwork.LT.max( 1, n ) .AND. .NOT.wquery )
THEN
220 CALL xerbla(
'ZHETRF_AA_2STAGE', -info )
226 nb = ilaenv( 1,
'ZHETRF_AA_2STAGE', uplo, n, -1, -1, -1 )
229 tb( 1 ) = max( 1, (3*nb+1)*n )
232 work( 1 ) = max( 1, n*nb )
235 IF( tquery .OR. wquery )
THEN
248 IF( ldtb .LT. 3*nb+1 )
THEN
251 IF( lwork .LT. nb*n )
THEN
285 IF( i .EQ. (j-1) )
THEN
290 CALL zgemm(
'NoTranspose',
'NoTranspose',
292 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
293 $ a( (i-1)*nb+1, j*nb+1 ), lda,
294 $ zero, work( i*nb+1 ), n )
297 IF( i .EQ. (j-1) )
THEN
302 CALL zgemm(
'NoTranspose',
'NoTranspose',
304 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
306 $ a( (i-2)*nb+1, j*nb+1 ), lda,
307 $ zero, work( i*nb+1 ), n )
313 CALL zlacpy(
'Upper', kb, kb, a( j*nb+1, j*nb+1 ), lda,
314 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
317 CALL zgemm(
'Conjugate transpose',
'NoTranspose',
319 $ -one, a( 1, j*nb+1 ), lda,
321 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
323 CALL zgemm(
'Conjugate transpose',
'NoTranspose',
325 $ one, a( (j-1)*nb+1, j*nb+1 ), lda,
326 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
327 $ zero, work( 1 ), n )
328 CALL zgemm(
'NoTranspose',
'NoTranspose',
330 $ -one, work( 1 ), n,
331 $ a( (j-2)*nb+1, j*nb+1 ), lda,
332 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
335 CALL zhegst( 1,
'Upper', kb,
336 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
337 $ a( (j-1)*nb+1, j*nb+1 ), lda, iinfo )
343 tb( td+1 + (j*nb+i-1)*ldtb )
344 $ = real( tb( td+1 + (j*nb+i-1)*ldtb ) )
346 tb( td+(k-i)+1 + (j*nb+i-1)*ldtb )
347 $ = dconjg( tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb ) )
357 CALL zgemm(
'NoTranspose',
'NoTranspose',
359 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
360 $ a( (j-1)*nb+1, j*nb+1 ), lda,
361 $ zero, work( j*nb+1 ), n )
363 CALL zgemm(
'NoTranspose',
'NoTranspose',
365 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
367 $ a( (j-2)*nb+1, j*nb+1 ), lda,
368 $ zero, work( j*nb+1 ), n )
373 CALL zgemm(
'Conjugate transpose',
'NoTranspose',
374 $ nb, n-(j+1)*nb, j*nb,
375 $ -one, work( nb+1 ), n,
376 $ a( 1, (j+1)*nb+1 ), lda,
377 $ one, a( j*nb+1, (j+1)*nb+1 ), lda )
383 CALL zcopy( n-(j+1)*nb,
384 $ a( j*nb+k, (j+1)*nb+1 ), lda,
385 $ work( 1+(k-1)*n ), 1 )
390 CALL zgetrf( n-(j+1)*nb, nb,
392 $ ipiv( (j+1)*nb+1 ), iinfo )
403 CALL zcopy( n-k-(j+1)*nb,
404 $ work( k+1+(k-1)*n ), 1,
405 $ a( j*nb+k, (j+1)*nb+k+1 ), lda )
409 CALL zlacgv( k, work( 1+(k-1)*n ), 1 )
414 kb = min(nb, n-(j+1)*nb)
415 CALL zlaset(
'Full', kb, nb, zero, zero,
416 $ tb( td+nb+1 + (j*nb)*ldtb) , ldtb-1 )
417 CALL zlacpy(
'Upper', kb, nb,
419 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
421 CALL ztrsm(
'R',
'U',
'N',
'U', kb, nb, one,
422 $ a( (j-1)*nb+1, j*nb+1 ), lda,
423 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
431 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
432 $ = dconjg( tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb ) )
435 CALL zlaset(
'Lower', kb, nb, zero, one,
436 $ a( j*nb+1, (j+1)*nb+1), lda )
442 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
445 i2 = ipiv( (j+1)*nb+k )
448 CALL zswap( k-1, a( (j+1)*nb+1, i1 ), 1,
449 $ a( (j+1)*nb+1, i2 ), 1 )
451 IF( i2.GT.(i1+1) )
THEN
452 CALL zswap( i2-i1-1, a( i1, i1+1 ), lda,
454 CALL zlacgv( i2-i1-1, a( i1+1, i2 ), 1 )
456 CALL zlacgv( i2-i1, a( i1, i1+1 ), lda )
459 $
CALL zswap( n-i2, a( i1, i2+1 ), lda,
460 $ a( i2, i2+1 ), lda )
463 a( i1, i1 ) = a( i2, i2 )
467 CALL zswap( j*nb, a( 1, i1 ), 1,
488 IF( i .EQ. (j-1) )
THEN
493 CALL zgemm(
'NoTranspose',
'Conjugate transpose',
495 $ one, tb( td+1 + (i*nb)*ldtb ), ldtb-1,
496 $ a( j*nb+1, (i-1)*nb+1 ), lda,
497 $ zero, work( i*nb+1 ), n )
500 IF( i .EQ. (j-1) )
THEN
505 CALL zgemm(
'NoTranspose',
'Conjugate transpose',
507 $ one, tb( td+nb+1 + ((i-1)*nb)*ldtb ),
509 $ a( j*nb+1, (i-2)*nb+1 ), lda,
510 $ zero, work( i*nb+1 ), n )
516 CALL zlacpy(
'Lower', kb, kb, a( j*nb+1, j*nb+1 ), lda,
517 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
520 CALL zgemm(
'NoTranspose',
'NoTranspose',
522 $ -one, a( j*nb+1, 1 ), lda,
524 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
526 CALL zgemm(
'NoTranspose',
'NoTranspose',
528 $ one, a( j*nb+1, (j-1)*nb+1 ), lda,
529 $ tb( td+nb+1 + ((j-1)*nb)*ldtb ), ldtb-1,
530 $ zero, work( 1 ), n )
531 CALL zgemm(
'NoTranspose',
'Conjugate transpose',
533 $ -one, work( 1 ), n,
534 $ a( j*nb+1, (j-2)*nb+1 ), lda,
535 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1 )
538 CALL zhegst( 1,
'Lower', kb,
539 $ tb( td+1 + (j*nb)*ldtb ), ldtb-1,
540 $ a( j*nb+1, (j-1)*nb+1 ), lda, iinfo )
546 tb( td+1 + (j*nb+i-1)*ldtb )
547 $ = real( tb( td+1 + (j*nb+i-1)*ldtb ) )
549 tb( td-(k-(i+1)) + (j*nb+k-1)*ldtb )
550 $ = dconjg( tb( td+(k-i)+1 + (j*nb+i-1)*ldtb ) )
560 CALL zgemm(
'NoTranspose',
561 $
'Conjugate transpose',
563 $ one, tb( td+1 + (j*nb)*ldtb ), ldtb-1,
564 $ a( j*nb+1, (j-1)*nb+1 ), lda,
565 $ zero, work( j*nb+1 ), n )
567 CALL zgemm(
'NoTranspose',
568 $
'Conjugate transpose',
570 $ one, tb( td+nb+1 + ((j-1)*nb)*ldtb ),
572 $ a( j*nb+1, (j-2)*nb+1 ), lda,
573 $ zero, work( j*nb+1 ), n )
578 CALL zgemm(
'NoTranspose',
'NoTranspose',
579 $ n-(j+1)*nb, nb, j*nb,
580 $ -one, a( (j+1)*nb+1, 1 ), lda,
582 $ one, a( (j+1)*nb+1, j*nb+1 ), lda )
587 CALL zgetrf( n-(j+1)*nb, nb,
588 $ a( (j+1)*nb+1, j*nb+1 ), lda,
589 $ ipiv( (j+1)*nb+1 ), iinfo )
596 kb = min(nb, n-(j+1)*nb)
597 CALL zlaset(
'Full', kb, nb, zero, zero,
598 $ tb( td+nb+1 + (j*nb)*ldtb) , ldtb-1 )
599 CALL zlacpy(
'Upper', kb, nb,
600 $ a( (j+1)*nb+1, j*nb+1 ), lda,
601 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
603 CALL ztrsm(
'R',
'L',
'C',
'U', kb, nb, one,
604 $ a( j*nb+1, (j-1)*nb+1 ), lda,
605 $ tb( td+nb+1 + (j*nb)*ldtb ), ldtb-1 )
613 tb( td-nb+k-i+1 + (j*nb+nb+i-1)*ldtb )
614 $ = dconjg( tb( td+nb+i-k+1 + (j*nb+k-1)*ldtb ) )
617 CALL zlaset(
'Upper', kb, nb, zero, one,
618 $ a( (j+1)*nb+1, j*nb+1), lda )
624 ipiv( (j+1)*nb+k ) = ipiv( (j+1)*nb+k ) + (j+1)*nb
627 i2 = ipiv( (j+1)*nb+k )
630 CALL zswap( k-1, a( i1, (j+1)*nb+1 ), lda,
631 $ a( i2, (j+1)*nb+1 ), lda )
633 IF( i2.GT.(i1+1) )
THEN
634 CALL zswap( i2-i1-1, a( i1+1, i1 ), 1,
635 $ a( i2, i1+1 ), lda )
636 CALL zlacgv( i2-i1-1, a( i2, i1+1 ), lda )
638 CALL zlacgv( i2-i1, a( i1+1, i1 ), 1 )
641 $
CALL zswap( n-i2, a( i2+1, i1 ), 1,
645 a( i1, i1 ) = a( i2, i2 )
649 CALL zswap( j*nb, a( i1, 1 ), lda,
664 CALL zgbtrf( n, n, nb, nb, tb, ldtb, ipiv2, info )