1 SUBROUTINE pdsyttrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
10 INTEGER IA, INFO, JA, LWORK, N
14 DOUBLE PRECISION A( * ), D( * ), E( * ), TAU( * ), WORK( * )
401 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
402 $ mb_, nb_, rsrc_, csrc_, lld_
403 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
404 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
405 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
407 parameter( one = 1.0d0 )
408 DOUBLE PRECISION Z_ONE, Z_NEGONE, Z_ZERO
409 parameter( z_one = 1.0d0, z_negone = -1.0d0,
411 DOUBLE PRECISION ZERO
412 parameter( zero = 0.0d+0 )
419 LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
420 INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
421 $ indexa, indexinh, indexinv, inh, inhb, inht,
422 $ inhtb, intmp, inv, invb, invt, invtb, j, lda,
423 $ ldv, ldzg, lii, liib, liip1, lij, lijb, lijp1,
424 $ ltlip1, ltnm1, lwmin, maxindex, minindex,
425 $ mycol, myfirstrow, myrow, mysetnum, nbzg, np,
426 $ npb, npcol, npm0, npm1, nprow, nps, npset, nq,
427 $ nqb, nqm1, numrows, nxtcol, nxtrow, pbmax,
428 $ pbmin, pbsize, pnb, rowsperproc
429 DOUBLE PRECISION ALPHA, BETA, C, CONJTOPH, CONJTOPV, NORM,
430 $ oneoverbeta, safmax, safmin, toph, topnv,
438 INTEGER IDUM1( 1 ), IDUM2( 1 )
439 DOUBLE PRECISION CC( 3 ), DTMP( 5 )
443 $ dgebs2d, dgemm, dgemv, dgerv2d, dgesd2d,
450 INTEGER ICEIL, NUMROC, PJLAENV
451 DOUBLE PRECISION DNRM2, PDLAMCH
452 EXTERNAL lsame, iceil, numroc, pjlaenv, dnrm2, pdlamch
455 INTRINSIC dble, ichar,
max,
min, mod, sign, sqrt
461 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
484 ictxt = desca( ctxt_ )
485 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
487 safmax = sqrt( pdlamch( ictxt,
'O' ) ) / n
488 safmin = sqrt( pdlamch( ictxt,
'S' ) )
493 IF( nprow.EQ.-1 )
THEN
494 info = -( 600+ctxt_ )
499 pnb = pjlaenv( ictxt, 2,
'PDSYTTRD',
'L', 0, 0, 0, 0 )
500 anb = pjlaenv( ictxt, 3,
'PDSYTTRD',
'L', 0, 0, 0, 0 )
502 interleave = ( pjlaenv( ictxt, 4,
'PDSYTTRD',
'L', 1, 0, 0,
504 twogemms = ( pjlaenv( ictxt, 4,
'PDSYTTRD',
'L', 2, 0, 0,
506 balanced = ( pjlaenv( ictxt, 4,
'PDSYTTRD',
'L', 3, 0, 0,
509 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
512 upper = lsame( uplo,
'U' )
513 IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
530 nps =
max( numroc( n, 1, 0, 0, nprow ), 2*anb )
531 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
533 work( 1 ) = dble( lwmin )
534 IF( .NOT.lsame( uplo,
'L' ) )
THEN
536 ELSE IF( ia.NE.1 )
THEN
538 ELSE IF( ja.NE.1 )
THEN
540 ELSE IF( nprow.NE.npcol )
THEN
541 info = -( 600+ctxt_ )
542 ELSE IF( desca( dtype_ ).NE.1 )
THEN
543 info = -( 600+dtype_ )
544 ELSE IF( desca( mb_ ).NE.1 )
THEN
546 ELSE IF( desca( nb_ ).NE.1 )
THEN
548 ELSE IF( desca( rsrc_ ).NE.0 )
THEN
549 info = -( 600+rsrc_ )
550 ELSE IF( desca( csrc_ ).NE.0 )
THEN
551 info = -( 600+csrc_ )
552 ELSE IF( lwork.LT.lwmin )
THEN
557 idum1( 1 ) = ichar(
'U' )
559 idum1( 1 ) = ichar(
'L' )
563 CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
568 CALL pxerbla( ictxt,
'PDSYTTRD', -info )
580 np = numroc( n, 1, myrow, 0, nprow )
581 nq = numroc( n, 1, mycol, 0, npcol )
592 ictxt = desca( ctxt_ )
613 IF( interleave )
THEN
629 ldv = 4*(
max( npm1, nqm1 ) ) + 2
634 invt = inh + ( anb+1 )*ldv
636 inht = invt + ldv / 2
637 intmp = invt + ldv*( anb+1 )
640 ldv =
max( npm1, nqm1 )
642 inht = inh + ldv*( anb+1 )
643 inv = inht + ldv*( anb+1 )
651 invt = inv + ldv*( anb+1 ) + 1
652 intmp = invt + ldv*( 2*anb )
657 CALL pxerbla( ictxt,
'PDSYTTRD', -info )
658 work( 1 ) = dble( lwmin )
674 work( inh+i-1 ) = z_zero
675 work( inv+i-1 ) = z_zero
678 work( inht+i-1 ) = z_zero
687 IF( mycol.GT.myrow )
THEN
693 DO 210 minindex = 1, n - 1, anb
696 maxindex =
min( minindex+anb-1, n )
697 lijb = numroc( maxindex, 1, mycol, 0, npcol ) + 1
698 liib = numroc( maxindex, 1, myrow, 0, nprow ) + 1
702 inhtb = inht + lijb - 1
703 invtb = invt + lijb - 1
704 inhb = inh + liib - 1
705 invb = inv + liib - 1
710 DO 160 index = minindex,
min( maxindex, n-1 )
712 bindex = index - minindex
717 nxtrow = mod( currow+1, nprow )
718 nxtcol = mod( curcol+1, npcol )
724 IF( myrow.EQ.currow )
THEN
728 IF( mycol.EQ.curcol )
THEN
744 IF( mycol.EQ.curcol )
THEN
746 indexa = lii + ( lij-1 )*lda
747 indexinv = inv + lii - 1 + ( bindex-1 )*ldv
748 indexinh = inh + lii - 1 + ( bindex-1 )*ldv
749 conjtoph = work( inht+lij-1+bindex*ldv )
752 IF( index.GT.1 )
THEN
753 DO 30 i = 0, npm0 - 1
755 a( indexa+i ) = a( indexa+i ) -
756 $ work( indexinv+ldv+i )*conjtoph -
757 $ work( indexinh+ldv+i )*conjtopv
765 IF( mycol.EQ.curcol )
THEN
769 IF( myrow.EQ.currow )
THEN
770 dtmp( 2 ) = a( lii+( lij-1 )*lda )
774 IF( myrow.EQ.nxtrow )
THEN
775 dtmp( 3 ) = a( liip1+( lij-1 )*lda )
782 norm = dnrm2( npm1, a( liip1+( lij-1 )*lda ), 1 )
790 IF( dtmp( 1 ).GE.safmax .OR. dtmp( 1 ).LT.safmin )
THEN
795 dtmp( 1 ) = dtmp( 1 )*dtmp( 1 )
796 CALL dgsum2d( ictxt,
'C',
' ', 5, 1, dtmp, 5, -1,
798 IF( dtmp( 5 ).EQ.zero )
THEN
799 dtmp( 1 ) = sqrt( dtmp( 1 ) )
802 CALL pdtreecomb( ictxt,
'C', 1, dtmp, -1, mycol,
809 IF( myrow.EQ.currow .AND. mycol.EQ.curcol )
THEN
810 a( lii+( lij-1 )*lda ) = d( lij )
816 norm = sign( norm, alpha )
818 IF( norm.EQ.zero )
THEN
823 oneoverbeta = 1.0d0 / beta
825 CALL dscal( npm1, oneoverbeta,
826 $ a( liip1+( lij-1 )*lda ), 1 )
829 IF( myrow.EQ.nxtrow )
THEN
830 a( liip1+( lij-1 )*lda ) = z_one
841 DO 40 i = 0, npm1 - 1
842 work( inv+liip1-1+bindex*ldv+npm1+i ) = a( liip1+i+
846 IF( mycol.EQ.curcol )
THEN
847 work( inv+liip1-1+bindex*ldv+npm1+npm1 ) = toptau
848 CALL dgebs2d( ictxt,
'R',
' ', npm1+npm1+1, 1,
849 $ work( inv+liip1-1+bindex*ldv ),
852 CALL dgebr2d( ictxt,
'R',
' ', npm1+npm1+1, 1,
853 $ work( inv+liip1-1+bindex*ldv ),
854 $ npm1+npm1+1, myrow, curcol )
855 toptau = work( inv+liip1-1+bindex*ldv+npm1+npm1 )
857 DO 50 i = 0, npm1 - 1
858 work( inh+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
859 $ 1+bindex*ldv+npm1+i )
862 IF( index.LT.n )
THEN
863 IF( myrow.EQ.nxtrow .AND. mycol.EQ.curcol )
864 $ a( liip1+( lij-1 )*lda ) = e( lij )
870 IF( myrow.EQ.mycol )
THEN
871 DO 60 i = 0, npm1 + npm1
872 work( invt+lijp1-1+bindex*ldv+i ) = work( inv+liip1-1+
876 CALL dgesd2d( ictxt, npm1+npm1, 1,
877 $ work( inv+liip1-1+bindex*ldv ), npm1+npm1,
879 CALL dgerv2d( ictxt, nqm1+nqm1, 1,
880 $ work( invt+lijp1-1+bindex*ldv ), nqm1+nqm1,
884 DO 70 i = 0, nqm1 - 1
885 work( inht+lijp1-1+( bindex+1 )*ldv+i ) = work( invt+
886 $ lijp1-1+bindex*ldv+nqm1+i )
892 IF( index.GT.1 )
THEN
893 DO 90 j = lijp1, lijb - 1
894 DO 80 i = 0, npm1 - 1
896 a( liip1+i+( j-1 )*lda ) = a( liip1+i+( j-1 )*lda )
897 $ - work( inv+liip1-1+bindex*ldv+i )*
898 $ work( inht+j-1+bindex*ldv ) -
899 $ work( inh+liip1-1+bindex*ldv+i )*
900 $ work( invt+j-1+bindex*ldv )
917 work( inv+liip1-1+( bindex+1 )*ldv ) = z_zero
918 work( invt+lijp1-1+( bindex+1 )*ldv+nqm1-1 ) = z_zero
921 IF( myrow.EQ.mycol )
THEN
922 IF( ltnm1.GT.1 )
THEN
923 CALL dtrmvt(
'L', ltnm1-1,
924 $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
925 $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
926 $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
927 $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
928 $ ldv ), 1, work( inht+lijp1-1+( bindex+
932 work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
933 $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
934 $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
935 $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
939 $
CALL dtrmvt(
'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
940 $ lda, work( invt+lijp1-1+( bindex+1 )*
941 $ ldv ), 1, work( inh+ltlip1-1+( bindex+
942 $ 1 )*ldv ), 1, work( inv+ltlip1-1+
943 $ ( bindex+1 )*ldv ), 1,
944 $ work( inht+lijp1-1+( bindex+1 )*ldv ),
963 DO 110 i = 1, 2*( bindex+1 )
964 work( intmp-1+i ) = 0
970 rowsperproc = iceil( nqb, npset )
971 myfirstrow =
min( nqb+1, 1+rowsperproc*mysetnum )
972 numrows =
min( rowsperproc, nqb-myfirstrow+1 )
977 CALL dgemv(
'C', numrows, bindex+1, z_one,
978 $ work( inhtb+myfirstrow-1 ), ldv,
979 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
980 $ 1, z_zero, work( intmp ), 1 )
982 CALL dgemv(
'C', numrows, bindex+1, z_one,
983 $ work( invtb+myfirstrow-1 ), ldv,
984 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
985 $ 1, z_zero, work( intmp+bindex+1 ), 1 )
988 CALL dgsum2d( ictxt,
'C',
' ', 2*( bindex+1 ), 1,
989 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
993 CALL dgemv(
'C', nqb, bindex+1, z_one, work( inhtb ),
994 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
995 $ z_zero, work( intmp ), 1 )
997 CALL dgemv(
'C', nqb, bindex+1, z_one, work( invtb ),
998 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
999 $ z_zero, work( intmp+bindex+1 ), 1 )
1008 rowsperproc = iceil( npb, npset )
1009 myfirstrow =
min( npb+1, 1+rowsperproc*mysetnum )
1010 numrows =
min( rowsperproc, npb-myfirstrow+1 )
1012 CALL dgsum2d( ictxt,
'R',
' ', 2*( bindex+1 ), 1,
1013 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1017 IF( index.GT.1. )
THEN
1018 CALL dgemv(
'N', numrows, bindex+1, z_negone,
1019 $ work( invb+myfirstrow-1 ), ldv,
1020 $ work( intmp ), 1, z_one,
1021 $ work( invb+myfirstrow-1+( bindex+1 )*
1025 CALL dgemv(
'N', numrows, bindex+1, z_negone,
1026 $ work( inhb+myfirstrow-1 ), ldv,
1027 $ work( intmp+bindex+1 ), 1, z_one,
1028 $ work( invb+myfirstrow-1+( bindex+1 )*
1034 CALL dgemv(
'N', npb, bindex+1, z_negone, work( invb ),
1035 $ ldv, work( intmp ), 1, z_one,
1036 $ work( invb+( bindex+1 )*ldv ), 1 )
1040 CALL dgemv(
'N', npb, bindex+1, z_negone, work( inhb ),
1041 $ ldv, work( intmp+bindex+1 ), 1, z_one,
1042 $ work( invb+( bindex+1 )*ldv ), 1 )
1049 IF( myrow.EQ.mycol )
THEN
1050 DO 120 i = 0, nqm1 - 1
1051 work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1055 CALL dgesd2d( ictxt, nqm1, 1,
1056 $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1057 $ nqm1, mycol, myrow )
1058 CALL dgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1062 DO 130 i = 0, npm1 - 1
1063 work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1064 $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1069 CALL dgsum2d( ictxt,
'R',
' ', npm1, 1,
1070 $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1078 IF( mycol.EQ.nxtcol )
THEN
1080 DO 140 i = 0, npm1 - 1
1081 cc( 1 ) = cc( 1 ) + work( inv+liip1-1+( bindex+1 )*
1082 $ ldv+i )*work( inh+liip1-1+( bindex+1 )*ldv+
1085 IF( myrow.EQ.nxtrow )
THEN
1086 cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1087 cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1092 CALL dgsum2d( ictxt,
'C',
' ', 3, 1, cc, 3, -1, nxtcol )
1098 topnv = toptau*( topv-c*toptau / 2*toph )
1104 DO 150 i = 0, npm1 - 1
1105 work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1106 $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*toptau /
1107 $ 2*work( inh+liip1-1+( bindex+1 )*ldv+i ) )
1118 IF( maxindex.LT.n )
THEN
1120 DO 170 i = 0, npm1 - 1
1121 work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1126 IF( .NOT.twogemms )
THEN
1127 IF( interleave )
THEN
1130 CALL dlamov(
'A', ltnm1, anb, work( inht+lijp1-1 ),
1131 $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1133 CALL dlamov(
'A', ltnm1, anb, work( inv+ltlip1-1 ),
1134 $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1144 DO 180 pbmin = 1, ltnm1, pnb
1146 pbsize =
min( pnb, ltnm1-pbmin+1 )
1147 pbmax =
min( ltnm1, pbmin+pnb-1 )
1148 CALL dgemm(
'N',
'C', pbsize, pbmax, nbzg, z_negone,
1149 $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1150 $ work( invt+lijp1-1 ), ldzg, z_one,
1151 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1153 CALL dgemm(
'N',
'C', pbsize, pbmax, anb, z_negone,
1154 $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1155 $ work( inht+lijp1-1 ), ldzg, z_one,
1156 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1162 DO 190 i = 0, npm1 - 1
1163 work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1164 work( inh+liip1-1+i ) = work( intmp+i )
1166 DO 200 i = 0, nqm1 - 1
1167 work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1177 IF( mycol.EQ.nxtcol )
THEN
1178 IF( myrow.EQ.nxtrow )
THEN
1180 d( nq ) = a( np+( nq-1 )*lda )
1182 CALL dgebs2d( ictxt,
'C',
' ', 1, 1, d( nq ), 1 )
1184 CALL dgebr2d( ictxt,
'C',
' ', 1, 1, d( nq ), 1, nxtrow,
1192 work( 1 ) = dble( lwmin )