1 SUBROUTINE pzhettrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
10 INTEGER IA, INFO, JA, LWORK, N
14 DOUBLE PRECISION D( * ), E( * )
15 COMPLEX*16 A( * ), TAU( * ), WORK( * )
402 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
403 $ mb_, nb_, rsrc_, csrc_, lld_
404 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
405 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
406 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
408 parameter( one = 1.0d0 )
409 COMPLEX*16 Z_ONE, Z_NEGONE, Z_ZERO
410 parameter( z_one = 1.0d0, z_negone = -1.0d0,
412 DOUBLE PRECISION ZERO
413 parameter( zero = 0.0d+0 )
420 LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
421 INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
422 $ indexa, indexinh, indexinv, inh, inhb, inht,
423 $ inhtb, intmp, inv, invb, invt, invtb, j, lda,
424 $ ldv, ldzg, lii, liib, liip1, lij, lijb, lijp1,
425 $ ltlip1, ltnm1, lwmin, maxindex, minindex,
426 $ mycol, myfirstrow, myrow, mysetnum, nbzg, np,
427 $ npb, npcol, npm0, npm1, nprow, nps, npset, nq,
428 $ nqb, nqm1, numrows, nxtcol, nxtrow, pbmax,
429 $ pbmin, pbsize, pnb, rowsperproc
430 DOUBLE PRECISION NORM, SAFMAX, SAFMIN
431 COMPLEX*16 ALPHA, BETA, C, CONJTOPH, CONJTOPV,
432 $ oneoverbeta, toph, topnv, toptau, topv
439 INTEGER IDUM1( 1 ), IDUM2( 1 )
440 DOUBLE PRECISION DTMP( 5 )
446 $
pxerbla, zgebr2d, zgebs2d, zgemm, zgemv,
447 $ zgerv2d, zgesd2d, zgsum2d, zlamov, zscal,
453 INTEGER ICEIL, NUMROC, PJLAENV
454 DOUBLE PRECISION DZNRM2, PDLAMCH
455 EXTERNAL lsame, iceil, numroc, pjlaenv, dznrm2, pdlamch
458 INTRINSIC dble, dcmplx, dconjg, dimag, ichar,
max,
min,
465 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
488 ictxt = desca( ctxt_ )
489 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
491 safmax = sqrt( pdlamch( ictxt,
'O' ) ) / n
492 safmin = sqrt( pdlamch( ictxt,
'S' ) )
497 IF( nprow.EQ.-1 )
THEN
498 info = -( 600+ctxt_ )
503 pnb = pjlaenv( ictxt, 2,
'PZHETTRD',
'L', 0, 0, 0, 0 )
504 anb = pjlaenv( ictxt, 3,
'PZHETTRD',
'L', 0, 0, 0, 0 )
506 interleave = ( pjlaenv( ictxt, 4,
'PZHETTRD',
'L', 1, 0, 0,
508 twogemms = ( pjlaenv( ictxt, 4,
'PZHETTRD',
'L', 2, 0, 0,
510 balanced = ( pjlaenv( ictxt, 4,
'PZHETTRD',
'L', 3, 0, 0,
513 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
516 upper = lsame( uplo,
'U' )
517 IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
534 nps =
max( numroc( n, 1, 0, 0, nprow ), 2*anb )
535 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
537 work( 1 ) = dcmplx( lwmin )
538 IF( .NOT.lsame( uplo,
'L' ) )
THEN
540 ELSE IF( ia.NE.1 )
THEN
542 ELSE IF( ja.NE.1 )
THEN
544 ELSE IF( nprow.NE.npcol )
THEN
545 info = -( 600+ctxt_ )
546 ELSE IF( desca( dtype_ ).NE.1 )
THEN
547 info = -( 600+dtype_ )
548 ELSE IF( desca( mb_ ).NE.1 )
THEN
550 ELSE IF( desca( nb_ ).NE.1 )
THEN
552 ELSE IF( desca( rsrc_ ).NE.0 )
THEN
553 info = -( 600+rsrc_ )
554 ELSE IF( desca( csrc_ ).NE.0 )
THEN
555 info = -( 600+csrc_ )
556 ELSE IF( lwork.LT.lwmin )
THEN
561 idum1( 1 ) = ichar(
'U' )
563 idum1( 1 ) = ichar(
'L' )
567 CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
572 CALL pxerbla( ictxt,
'PZHETTRD', -info )
584 np = numroc( n, 1, myrow, 0, nprow )
585 nq = numroc( n, 1, mycol, 0, npcol )
596 ictxt = desca( ctxt_ )
617 IF( interleave )
THEN
633 ldv = 4*(
max( npm1, nqm1 ) ) + 2
638 invt = inh + ( anb+1 )*ldv
640 inht = invt + ldv / 2
641 intmp = invt + ldv*( anb+1 )
644 ldv =
max( npm1, nqm1 )
646 inht = inh + ldv*( anb+1 )
647 inv = inht + ldv*( anb+1 )
655 invt = inv + ldv*( anb+1 ) + 1
656 intmp = invt + ldv*( 2*anb )
661 CALL pxerbla( ictxt,
'PZHETTRD', -info )
662 work( 1 ) = dcmplx( lwmin )
678 work( inh+i-1 ) = z_zero
679 work( inv+i-1 ) = z_zero
682 work( inht+i-1 ) = z_zero
691 IF( mycol.GT.myrow )
THEN
697 DO 210 minindex = 1, n - 1, anb
700 maxindex =
min( minindex+anb-1, n )
701 lijb = numroc( maxindex, 1, mycol, 0, npcol ) + 1
702 liib = numroc( maxindex, 1, myrow, 0, nprow ) + 1
706 inhtb = inht + lijb - 1
707 invtb = invt + lijb - 1
708 inhb = inh + liib - 1
709 invb = inv + liib - 1
714 DO 160 index = minindex,
min( maxindex, n-1 )
716 bindex = index - minindex
721 nxtrow = mod( currow+1, nprow )
722 nxtcol = mod( curcol+1, npcol )
728 IF( myrow.EQ.currow )
THEN
732 IF( mycol.EQ.curcol )
THEN
748 IF( mycol.EQ.curcol )
THEN
750 indexa = lii + ( lij-1 )*lda
751 indexinv = inv + lii - 1 + ( bindex-1 )*ldv
752 indexinh = inh + lii - 1 + ( bindex-1 )*ldv
753 conjtoph = dconjg( work( inht+lij-1+bindex*ldv ) )
754 conjtopv = dconjg( topnv )
756 IF( index.GT.1 )
THEN
757 DO 30 i = 0, npm0 - 1
759 a( indexa+i ) = a( indexa+i ) -
760 $ work( indexinv+ldv+i )*conjtoph -
761 $ work( indexinh+ldv+i )*conjtopv
769 IF( mycol.EQ.curcol )
THEN
773 IF( myrow.EQ.currow )
THEN
774 dtmp( 2 ) = dble( a( lii+( lij-1 )*lda ) )
778 IF( myrow.EQ.nxtrow )
THEN
779 dtmp( 3 ) = dble( a( liip1+( lij-1 )*lda ) )
780 dtmp( 4 ) = dimag( a( liip1+( lij-1 )*lda ) )
786 norm = dznrm2( npm1, a( liip1+( lij-1 )*lda ), 1 )
794 IF( dtmp( 1 ).GE.safmax .OR. dtmp( 1 ).LT.safmin )
THEN
799 dtmp( 1 ) = dtmp( 1 )*dtmp( 1 )
800 CALL dgsum2d( ictxt,
'C',
' ', 5, 1, dtmp, 5, -1,
802 IF( dtmp( 5 ).EQ.zero )
THEN
803 dtmp( 1 ) = sqrt( dtmp( 1 ) )
806 CALL pdtreecomb( ictxt,
'C', 1, dtmp, -1, mycol,
813 IF( myrow.EQ.currow .AND. mycol.EQ.curcol )
THEN
814 a( lii+( lij-1 )*lda ) = dcmplx( d( lij ), zero )
818 alpha = dcmplx( dtmp( 3 ), dtmp( 4 ) )
820 norm = sign( norm, dble( alpha ) )
822 IF( norm.EQ.zero )
THEN
827 oneoverbeta = 1.0d0 / beta
829 CALL zscal( npm1, oneoverbeta,
830 $ a( liip1+( lij-1 )*lda ), 1 )
833 IF( myrow.EQ.nxtrow )
THEN
834 a( liip1+( lij-1 )*lda ) = z_one
845 DO 40 i = 0, npm1 - 1
846 work( inv+liip1-1+bindex*ldv+npm1+i ) = a( liip1+i+
850 IF( mycol.EQ.curcol )
THEN
851 work( inv+liip1-1+bindex*ldv+npm1+npm1 ) = toptau
852 CALL zgebs2d( ictxt,
'R',
' ', npm1+npm1+1, 1,
853 $ work( inv+liip1-1+bindex*ldv ),
856 CALL zgebr2d( ictxt,
'R',
' ', npm1+npm1+1, 1,
857 $ work( inv+liip1-1+bindex*ldv ),
858 $ npm1+npm1+1, myrow, curcol )
859 toptau = work( inv+liip1-1+bindex*ldv+npm1+npm1 )
861 DO 50 i = 0, npm1 - 1
862 work( inh+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
863 $ 1+bindex*ldv+npm1+i )
866 IF( index.LT.n )
THEN
867 IF( myrow.EQ.nxtrow .AND. mycol.EQ.curcol )
868 $ a( liip1+( lij-1 )*lda ) = e( lij )
874 IF( myrow.EQ.mycol )
THEN
875 DO 60 i = 0, npm1 + npm1
876 work( invt+lijp1-1+bindex*ldv+i ) = work( inv+liip1-1+
880 CALL zgesd2d( ictxt, npm1+npm1, 1,
881 $ work( inv+liip1-1+bindex*ldv ), npm1+npm1,
883 CALL zgerv2d( ictxt, nqm1+nqm1, 1,
884 $ work( invt+lijp1-1+bindex*ldv ), nqm1+nqm1,
888 DO 70 i = 0, nqm1 - 1
889 work( inht+lijp1-1+( bindex+1 )*ldv+i ) = work( invt+
890 $ lijp1-1+bindex*ldv+nqm1+i )
896 IF( index.GT.1 )
THEN
897 DO 90 j = lijp1, lijb - 1
898 DO 80 i = 0, npm1 - 1
900 a( liip1+i+( j-1 )*lda ) = a( liip1+i+( j-1 )*lda )
901 $ - work( inv+liip1-1+bindex*ldv+i )*
902 $ dconjg( work( inht+j-1+bindex*ldv ) ) -
903 $ work( inh+liip1-1+bindex*ldv+i )*
904 $ dconjg( work( invt+j-1+bindex*ldv ) )
921 work( inv+liip1-1+( bindex+1 )*ldv ) = z_zero
922 work( invt+lijp1-1+( bindex+1 )*ldv+nqm1-1 ) = z_zero
925 IF( myrow.EQ.mycol )
THEN
926 IF( ltnm1.GT.1 )
THEN
927 CALL ztrmvt(
'L', ltnm1-1,
928 $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
929 $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
930 $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
931 $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
932 $ ldv ), 1, work( inht+lijp1-1+( bindex+
936 work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
937 $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
938 $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
939 $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
943 $
CALL ztrmvt(
'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
944 $ lda, work( invt+lijp1-1+( bindex+1 )*
945 $ ldv ), 1, work( inh+ltlip1-1+( bindex+
946 $ 1 )*ldv ), 1, work( inv+ltlip1-1+
947 $ ( bindex+1 )*ldv ), 1,
948 $ work( inht+lijp1-1+( bindex+1 )*ldv ),
967 DO 110 i = 1, 2*( bindex+1 )
968 work( intmp-1+i ) = 0
974 rowsperproc = iceil( nqb, npset )
975 myfirstrow =
min( nqb+1, 1+rowsperproc*mysetnum )
976 numrows =
min( rowsperproc, nqb-myfirstrow+1 )
981 CALL zgemv(
'C', numrows, bindex+1, z_one,
982 $ work( inhtb+myfirstrow-1 ), ldv,
983 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
984 $ 1, z_zero, work( intmp ), 1 )
986 CALL zgemv(
'C', numrows, bindex+1, z_one,
987 $ work( invtb+myfirstrow-1 ), ldv,
988 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
989 $ 1, z_zero, work( intmp+bindex+1 ), 1 )
992 CALL zgsum2d( ictxt,
'C',
' ', 2*( bindex+1 ), 1,
993 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
997 CALL zgemv(
'C', nqb, bindex+1, z_one, work( inhtb ),
998 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
999 $ z_zero, work( intmp ), 1 )
1001 CALL zgemv(
'C', nqb, bindex+1, z_one, work( invtb ),
1002 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
1003 $ z_zero, work( intmp+bindex+1 ), 1 )
1012 rowsperproc = iceil( npb, npset )
1013 myfirstrow =
min( npb+1, 1+rowsperproc*mysetnum )
1014 numrows =
min( rowsperproc, npb-myfirstrow+1 )
1016 CALL zgsum2d( ictxt,
'R',
' ', 2*( bindex+1 ), 1,
1017 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1021 IF( index.GT.1. )
THEN
1022 CALL zgemv(
'N', numrows, bindex+1, z_negone,
1023 $ work( invb+myfirstrow-1 ), ldv,
1024 $ work( intmp ), 1, z_one,
1025 $ work( invb+myfirstrow-1+( bindex+1 )*
1029 CALL zgemv(
'N', numrows, bindex+1, z_negone,
1030 $ work( inhb+myfirstrow-1 ), ldv,
1031 $ work( intmp+bindex+1 ), 1, z_one,
1032 $ work( invb+myfirstrow-1+( bindex+1 )*
1038 CALL zgemv(
'N', npb, bindex+1, z_negone, work( invb ),
1039 $ ldv, work( intmp ), 1, z_one,
1040 $ work( invb+( bindex+1 )*ldv ), 1 )
1044 CALL zgemv(
'N', npb, bindex+1, z_negone, work( inhb ),
1045 $ ldv, work( intmp+bindex+1 ), 1, z_one,
1046 $ work( invb+( bindex+1 )*ldv ), 1 )
1053 IF( myrow.EQ.mycol )
THEN
1054 DO 120 i = 0, nqm1 - 1
1055 work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1059 CALL zgesd2d( ictxt, nqm1, 1,
1060 $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1061 $ nqm1, mycol, myrow )
1062 CALL zgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1066 DO 130 i = 0, npm1 - 1
1067 work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1068 $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1073 CALL zgsum2d( ictxt,
'R',
' ', npm1, 1,
1074 $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1082 IF( mycol.EQ.nxtcol )
THEN
1084 DO 140 i = 0, npm1 - 1
1085 cc( 1 ) = cc( 1 ) + dconjg( work( inv+liip1-1+
1086 $ ( bindex+1 )*ldv+i ) )*
1087 $ work( inh+liip1-1+( bindex+1 )*ldv+i )
1089 IF( myrow.EQ.nxtrow )
THEN
1090 cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1091 cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1096 CALL zgsum2d( ictxt,
'C',
' ', 3, 1, cc, 3, -1, nxtcol )
1102 topnv = toptau*( topv-c*dconjg( toptau ) / 2*toph )
1108 DO 150 i = 0, npm1 - 1
1109 work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1110 $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*
1111 $ dconjg( toptau ) / 2*work( inh+liip1-1+( bindex+
1123 IF( maxindex.LT.n )
THEN
1125 DO 170 i = 0, npm1 - 1
1126 work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1131 IF( .NOT.twogemms )
THEN
1132 IF( interleave )
THEN
1135 CALL zlamov(
'A', ltnm1, anb, work( inht+lijp1-1 ),
1136 $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1138 CALL zlamov(
'A', ltnm1, anb, work( inv+ltlip1-1 ),
1139 $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1149 DO 180 pbmin = 1, ltnm1, pnb
1151 pbsize =
min( pnb, ltnm1-pbmin+1 )
1152 pbmax =
min( ltnm1, pbmin+pnb-1 )
1153 CALL zgemm(
'N',
'C', pbsize, pbmax, nbzg, z_negone,
1154 $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1155 $ work( invt+lijp1-1 ), ldzg, z_one,
1156 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1158 CALL zgemm(
'N',
'C', pbsize, pbmax, anb, z_negone,
1159 $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1160 $ work( inht+lijp1-1 ), ldzg, z_one,
1161 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1167 DO 190 i = 0, npm1 - 1
1168 work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1169 work( inh+liip1-1+i ) = work( intmp+i )
1171 DO 200 i = 0, nqm1 - 1
1172 work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1182 IF( mycol.EQ.nxtcol )
THEN
1183 IF( myrow.EQ.nxtrow )
THEN
1185 d( nq ) = dble( a( np+( nq-1 )*lda ) )
1186 a( np+( nq-1 )*lda ) = d( nq )
1188 CALL dgebs2d( ictxt,
'C',
' ', 1, 1, d( nq ), 1 )
1190 CALL dgebr2d( ictxt,
'C',
' ', 1, 1, d( nq ), 1, nxtrow,
1198 work( 1 ) = dcmplx( lwmin )