1 SUBROUTINE pchettrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
10 INTEGER IA, INFO, JA, LWORK, N
15 COMPLEX A( * ), TAU( * ), WORK( * )
402 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
403 $ mb_, nb_, rsrc_, csrc_, lld_
404 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
405 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
406 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
408 parameter( one = 1.0e0 )
409 COMPLEX Z_ONE, Z_NEGONE, Z_ZERO
410 parameter( z_one = 1.0e0, z_negone = -1.0e0,
413 parameter( zero = 0.0e+0 )
420 LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
421 INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
422 $ indexa, indexinh, indexinv, inh, inhb, inht,
423 $ inhtb, intmp, inv, invb, invt, invtb, j, lda,
424 $ ldv, ldzg, lii, liib, liip1, lij, lijb, lijp1,
425 $ ltlip1, ltnm1, lwmin, maxindex, minindex,
426 $ mycol, myfirstrow, myrow, mysetnum, nbzg, np,
427 $ npb, npcol, npm0, npm1, nprow, nps, npset, nq,
428 $ nqb, nqm1, numrows, nxtcol, nxtrow, pbmax,
429 $ pbmin, pbsize, pnb, rowsperproc
430 REAL NORM, SAFMAX, SAFMIN
431 COMPLEX ALPHA, BETA, C, ONEOVERBETA, TOPH, TOPNV,
432 $ toptau, topv, ttoph, ttopv
439 INTEGER IDUM1( 1 ), IDUM2( 1 )
444 EXTERNAL blacs_gridinfo, cgebr2d, cgebs2d, cgemm, cgemv,
445 $ cgerv2d, cgesd2d, cgsum2d,
chk1mat, clamov,
452 INTEGER ICEIL, NUMROC, PJLAENV
454 EXTERNAL lsame, iceil, numroc, pjlaenv, pslamch, scnrm2
457 INTRINSIC aimag,
cmplx, conjg, ichar,
max,
min, mod,
464 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
487 ictxt = desca( ctxt_ )
488 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
490 safmax = sqrt( pslamch( ictxt,
'O' ) ) / n
491 safmin = sqrt( pslamch( ictxt,
'S' ) )
496 IF( nprow.EQ.-1 )
THEN
497 info = -( 600+ctxt_ )
502 pnb = pjlaenv( ictxt, 2,
'PCHETTRD',
'L', 0, 0, 0, 0 )
503 anb = pjlaenv( ictxt, 3,
'PCHETTRD',
'L', 0, 0, 0, 0 )
505 interleave = ( pjlaenv( ictxt, 4,
'PCHETTRD',
'L', 1, 0, 0,
507 twogemms = ( pjlaenv( ictxt, 4,
'PCHETTRD',
'L', 2, 0, 0,
509 balanced = ( pjlaenv( ictxt, 4,
'PCHETTRD',
'L', 3, 0, 0,
512 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
515 upper = lsame( uplo,
'U' )
516 IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
533 nps =
max( numroc( n, 1, 0, 0, nprow ), 2*anb )
534 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
536 work( 1 ) =
cmplx( lwmin )
537 IF( .NOT.lsame( uplo,
'L' ) )
THEN
539 ELSE IF( ia.NE.1 )
THEN
541 ELSE IF( ja.NE.1 )
THEN
543 ELSE IF( nprow.NE.npcol )
THEN
544 info = -( 600+ctxt_ )
545 ELSE IF( desca( dtype_ ).NE.1 )
THEN
546 info = -( 600+dtype_ )
547 ELSE IF( desca( mb_ ).NE.1 )
THEN
549 ELSE IF( desca( nb_ ).NE.1 )
THEN
551 ELSE IF( desca( rsrc_ ).NE.0 )
THEN
552 info = -( 600+rsrc_ )
553 ELSE IF( desca( csrc_ ).NE.0 )
THEN
554 info = -( 600+csrc_ )
555 ELSE IF( lwork.LT.lwmin )
THEN
560 idum1( 1 ) = ichar(
'U' )
562 idum1( 1 ) = ichar(
'L' )
566 CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
571 CALL pxerbla( ictxt,
'PCHETTRD', -info )
583 np = numroc( n, 1, myrow, 0, nprow )
584 nq = numroc( n, 1, mycol, 0, npcol )
595 ictxt = desca( ctxt_ )
616 IF( interleave )
THEN
632 ldv = 4*(
max( npm1, nqm1 ) ) + 2
637 invt = inh + ( anb+1 )*ldv
639 inht = invt + ldv / 2
640 intmp = invt + ldv*( anb+1 )
643 ldv =
max( npm1, nqm1 )
645 inht = inh + ldv*( anb+1 )
646 inv = inht + ldv*( anb+1 )
654 invt = inv + ldv*( anb+1 ) + 1
655 intmp = invt + ldv*( 2*anb )
660 CALL pxerbla( ictxt,
'PCHETTRD', -info )
661 work( 1 ) =
cmplx( lwmin )
677 work( inh+i-1 ) = z_zero
678 work( inv+i-1 ) = z_zero
681 work( inht+i-1 ) = z_zero
690 IF( mycol.GT.myrow )
THEN
696 DO 210 minindex = 1, n - 1, anb
699 maxindex =
min( minindex+anb-1, n )
700 lijb = numroc( maxindex, 1, mycol, 0, npcol ) + 1
701 liib = numroc( maxindex, 1, myrow, 0, nprow ) + 1
705 inhtb = inht + lijb - 1
706 invtb = invt + lijb - 1
707 inhb = inh + liib - 1
708 invb = inv + liib - 1
713 DO 160 index = minindex,
min( maxindex, n-1 )
715 bindex = index - minindex
720 nxtrow = mod( currow+1, nprow )
721 nxtcol = mod( curcol+1, npcol )
727 IF( myrow.EQ.currow )
THEN
731 IF( mycol.EQ.curcol )
THEN
747 IF( mycol.EQ.curcol )
THEN
749 indexa = lii + ( lij-1 )*lda
750 indexinv = inv + lii - 1 + ( bindex-1 )*ldv
751 indexinh = inh + lii - 1 + ( bindex-1 )*ldv
752 ttoph = conjg( work( inht+lij-1+bindex*ldv ) )
753 ttopv = conjg( topnv )
755 IF( index.GT.1 )
THEN
756 DO 30 i = 0, npm0 - 1
758 a( indexa+i ) = a( indexa+i ) -
759 $ work( indexinv+ldv+i )*ttoph -
760 $ work( indexinh+ldv+i )*ttopv
768 IF( mycol.EQ.curcol )
THEN
772 IF( myrow.EQ.currow )
THEN
773 dtmp( 2 ) = real( a( lii+( lij-1 )*lda ) )
777 IF( myrow.EQ.nxtrow )
THEN
778 dtmp( 3 ) = real( a( liip1+( lij-1 )*lda ) )
779 dtmp( 4 ) = aimag( a( liip1+( lij-1 )*lda ) )
785 norm = scnrm2( npm1, a( liip1+( lij-1 )*lda ), 1 )
793 IF( dtmp( 1 ).GE.safmax .OR. dtmp( 1 ).LT.safmin )
THEN
798 dtmp( 1 ) = dtmp( 1 )*dtmp( 1 )
799 CALL sgsum2d( ictxt,
'C',
' ', 5, 1, dtmp, 5, -1,
801 IF( dtmp( 5 ).EQ.zero )
THEN
802 dtmp( 1 ) = sqrt( dtmp( 1 ) )
805 CALL pstreecomb( ictxt,
'C', 1, dtmp, -1, mycol,
812 IF( myrow.EQ.currow .AND. mycol.EQ.curcol )
THEN
813 a( lii+( lij-1 )*lda ) =
cmplx( d( lij ), zero )
817 alpha =
cmplx( dtmp( 3 ), dtmp( 4 ) )
819 norm = sign( norm, real( alpha ) )
821 IF( norm.EQ.zero )
THEN
826 oneoverbeta = 1.0e0 / beta
828 CALL cscal( npm1, oneoverbeta,
829 $ a( liip1+( lij-1 )*lda ), 1 )
832 IF( myrow.EQ.nxtrow )
THEN
833 a( liip1+( lij-1 )*lda ) = z_one
844 DO 40 i = 0, npm1 - 1
845 work( inv+liip1-1+bindex*ldv+npm1+i ) = a( liip1+i+
849 IF( mycol.EQ.curcol )
THEN
850 work( inv+liip1-1+bindex*ldv+npm1+npm1 ) = toptau
851 CALL cgebs2d( ictxt,
'R',
' ', npm1+npm1+1, 1,
852 $ work( inv+liip1-1+bindex*ldv ),
855 CALL cgebr2d( ictxt,
'R',
' ', npm1+npm1+1, 1,
856 $ work( inv+liip1-1+bindex*ldv ),
857 $ npm1+npm1+1, myrow, curcol )
858 toptau = work( inv+liip1-1+bindex*ldv+npm1+npm1 )
860 DO 50 i = 0, npm1 - 1
861 work( inh+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
862 $ 1+bindex*ldv+npm1+i )
865 IF( index.LT.n )
THEN
866 IF( myrow.EQ.nxtrow .AND. mycol.EQ.curcol )
867 $ a( liip1+( lij-1 )*lda ) = e( lij )
873 IF( myrow.EQ.mycol )
THEN
874 DO 60 i = 0, npm1 + npm1
875 work( invt+lijp1-1+bindex*ldv+i ) = work( inv+liip1-1+
879 CALL cgesd2d( ictxt, npm1+npm1, 1,
880 $ work( inv+liip1-1+bindex*ldv ), npm1+npm1,
882 CALL cgerv2d( ictxt, nqm1+nqm1, 1,
883 $ work( invt+lijp1-1+bindex*ldv ), nqm1+nqm1,
887 DO 70 i = 0, nqm1 - 1
888 work( inht+lijp1-1+( bindex+1 )*ldv+i ) = work( invt+
889 $ lijp1-1+bindex*ldv+nqm1+i )
895 IF( index.GT.1 )
THEN
896 DO 90 j = lijp1, lijb - 1
897 DO 80 i = 0, npm1 - 1
899 a( liip1+i+( j-1 )*lda ) = a( liip1+i+( j-1 )*lda )
900 $ - work( inv+liip1-1+bindex*ldv+i )*
901 $ conjg( work( inht+j-1+bindex*ldv ) ) -
902 $ work( inh+liip1-1+bindex*ldv+i )*
903 $ conjg( work( invt+j-1+bindex*ldv ) )
920 work( inv+liip1-1+( bindex+1 )*ldv ) = z_zero
921 work( invt+lijp1-1+( bindex+1 )*ldv+nqm1-1 ) = z_zero
924 IF( myrow.EQ.mycol )
THEN
925 IF( ltnm1.GT.1 )
THEN
926 CALL ctrmvt(
'L', ltnm1-1,
927 $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
928 $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
929 $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
930 $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
931 $ ldv ), 1, work( inht+lijp1-1+( bindex+
935 work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
936 $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
937 $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
938 $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
942 $
CALL ctrmvt(
'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
943 $ lda, work( invt+lijp1-1+( bindex+1 )*
944 $ ldv ), 1, work( inh+ltlip1-1+( bindex+
945 $ 1 )*ldv ), 1, work( inv+ltlip1-1+
946 $ ( bindex+1 )*ldv ), 1,
947 $ work( inht+lijp1-1+( bindex+1 )*ldv ),
966 DO 110 i = 1, 2*( bindex+1 )
967 work( intmp-1+i ) = 0
973 rowsperproc = iceil( nqb, npset )
974 myfirstrow =
min( nqb+1, 1+rowsperproc*mysetnum )
975 numrows =
min( rowsperproc, nqb-myfirstrow+1 )
980 CALL cgemv(
'C', numrows, bindex+1, z_one,
981 $ work( inhtb+myfirstrow-1 ), ldv,
982 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
983 $ 1, z_zero, work( intmp ), 1 )
985 CALL cgemv(
'C', numrows, bindex+1, z_one,
986 $ work( invtb+myfirstrow-1 ), ldv,
987 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
988 $ 1, z_zero, work( intmp+bindex+1 ), 1 )
991 CALL cgsum2d( ictxt,
'C',
' ', 2*( bindex+1 ), 1,
992 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
996 CALL cgemv(
'C', nqb, bindex+1, z_one, work( inhtb ),
997 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
998 $ z_zero, work( intmp ), 1 )
1000 CALL cgemv(
'C', nqb, bindex+1, z_one, work( invtb ),
1001 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
1002 $ z_zero, work( intmp+bindex+1 ), 1 )
1011 rowsperproc = iceil( npb, npset )
1012 myfirstrow =
min( npb+1, 1+rowsperproc*mysetnum )
1013 numrows =
min( rowsperproc, npb-myfirstrow+1 )
1015 CALL cgsum2d( ictxt,
'R',
' ', 2*( bindex+1 ), 1,
1016 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1020 IF( index.GT.1. )
THEN
1021 CALL cgemv(
'N', numrows, bindex+1, z_negone,
1022 $ work( invb+myfirstrow-1 ), ldv,
1023 $ work( intmp ), 1, z_one,
1024 $ work( invb+myfirstrow-1+( bindex+1 )*
1028 CALL cgemv(
'N', numrows, bindex+1, z_negone,
1029 $ work( inhb+myfirstrow-1 ), ldv,
1030 $ work( intmp+bindex+1 ), 1, z_one,
1031 $ work( invb+myfirstrow-1+( bindex+1 )*
1037 CALL cgemv(
'N', npb, bindex+1, z_negone, work( invb ),
1038 $ ldv, work( intmp ), 1, z_one,
1039 $ work( invb+( bindex+1 )*ldv ), 1 )
1043 CALL cgemv(
'N', npb, bindex+1, z_negone, work( inhb ),
1044 $ ldv, work( intmp+bindex+1 ), 1, z_one,
1045 $ work( invb+( bindex+1 )*ldv ), 1 )
1052 IF( myrow.EQ.mycol )
THEN
1053 DO 120 i = 0, nqm1 - 1
1054 work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1058 CALL cgesd2d( ictxt, nqm1, 1,
1059 $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1060 $ nqm1, mycol, myrow )
1061 CALL cgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1065 DO 130 i = 0, npm1 - 1
1066 work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1067 $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1072 CALL cgsum2d( ictxt,
'R',
' ', npm1, 1,
1073 $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1081 IF( mycol.EQ.nxtcol )
THEN
1083 DO 140 i = 0, npm1 - 1
1084 cc( 1 ) = cc( 1 ) + conjg( work( inv+liip1-1+( bindex+
1085 $ 1 )*ldv+i ) )*work( inh+liip1-1+
1086 $ ( bindex+1 )*ldv+i )
1088 IF( myrow.EQ.nxtrow )
THEN
1089 cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1090 cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1095 CALL cgsum2d( ictxt,
'C',
' ', 3, 1, cc, 3, -1, nxtcol )
1101 topnv = toptau*( topv-c*conjg( toptau ) / 2*toph )
1107 DO 150 i = 0, npm1 - 1
1108 work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1109 $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*
1110 $ conjg( toptau ) / 2*work( inh+liip1-1+( bindex+1 )*
1122 IF( maxindex.LT.n )
THEN
1124 DO 170 i = 0, npm1 - 1
1125 work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1130 IF( .NOT.twogemms )
THEN
1131 IF( interleave )
THEN
1134 CALL clamov(
'A', ltnm1, anb, work( inht+lijp1-1 ),
1135 $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1137 CALL clamov(
'A', ltnm1, anb, work( inv+ltlip1-1 ),
1138 $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1148 DO 180 pbmin = 1, ltnm1, pnb
1150 pbsize =
min( pnb, ltnm1-pbmin+1 )
1151 pbmax =
min( ltnm1, pbmin+pnb-1 )
1152 CALL cgemm(
'N',
'C', pbsize, pbmax, nbzg, z_negone,
1153 $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1154 $ work( invt+lijp1-1 ), ldzg, z_one,
1155 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1157 CALL cgemm(
'N',
'C', pbsize, pbmax, anb, z_negone,
1158 $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1159 $ work( inht+lijp1-1 ), ldzg, z_one,
1160 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1166 DO 190 i = 0, npm1 - 1
1167 work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1168 work( inh+liip1-1+i ) = work( intmp+i )
1170 DO 200 i = 0, nqm1 - 1
1171 work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1181 IF( mycol.EQ.nxtcol )
THEN
1182 IF( myrow.EQ.nxtrow )
THEN
1184 d( nq ) = real( a( np+( nq-1 )*lda ) )
1185 a( np+( nq-1 )*lda ) = d( nq )
1187 CALL sgebs2d( ictxt,
'C',
' ', 1, 1, d( nq ), 1 )
1189 CALL sgebr2d( ictxt,
'C',
' ', 1, 1, d( nq ), 1, nxtrow,
1197 work( 1 ) =
cmplx( lwmin )