1 SUBROUTINE pssyttrd( UPLO, N, A, IA, JA, DESCA, D, E, TAU, WORK,
10 INTEGER IA, INFO, JA, LWORK, N
14 REAL A( * ), D( * ), E( * ), TAU( * ), WORK( * )
401 INTEGER BLOCK_CYCLIC_2D, DLEN_, DTYPE_, CTXT_, M_, N_,
402 $ mb_, nb_, rsrc_, csrc_, lld_
403 parameter( block_cyclic_2d = 1, dlen_ = 9, dtype_ = 1,
404 $ ctxt_ = 2, m_ = 3, n_ = 4, mb_ = 5, nb_ = 6,
405 $ rsrc_ = 7, csrc_ = 8, lld_ = 9 )
407 parameter( one = 1.0e0 )
408 REAL Z_ONE, Z_NEGONE, Z_ZERO
409 parameter( z_one = 1.0e0, z_negone = -1.0e0,
412 parameter( zero = 0.0e+0 )
419 LOGICAL BALANCED, INTERLEAVE, TWOGEMMS, UPPER
420 INTEGER ANB, BINDEX, CURCOL, CURROW, I, ICTXT, INDEX,
421 $ indexa, indexinh, indexinv, inh, inhb, inht,
422 $ inhtb, intmp, inv, invb, invt, invtb, j, lda,
423 $ ldv, ldzg, lii, liib, liip1, lij, lijb, lijp1,
424 $ ltlip1, ltnm1, lwmin, maxindex, minindex,
425 $ mycol, myfirstrow, myrow, mysetnum, nbzg, np,
426 $ npb, npcol, npm0, npm1, nprow, nps, npset, nq,
427 $ nqb, nqm1, numrows, nxtcol, nxtrow, pbmax,
428 $ pbmin, pbsize, pnb, rowsperproc
429 REAL ALPHA, BETA, C, NORM, ONEOVERBETA, SAFMAX,
430 $ safmin, toph, topnv, toptau, topv, ttoph, ttopv
437 INTEGER IDUM1( 1 ), IDUM2( 1 )
438 REAL CC( 3 ), DTMP( 5 )
443 $ sgemv, sgerv2d, sgesd2d, sgsum2d, slamov,
449 INTEGER ICEIL, NUMROC, PJLAENV
451 EXTERNAL lsame, iceil, numroc, pjlaenv, pslamch, snrm2
454 INTRINSIC ichar,
max,
min, mod, real, sign, sqrt
460 IF( block_cyclic_2d*csrc_*ctxt_*dlen_*dtype_*lld_*mb_*m_*nb_*n_*
483 ictxt = desca( ctxt_ )
484 CALL blacs_gridinfo( ictxt, nprow, npcol, myrow, mycol )
486 safmax = sqrt( pslamch( ictxt,
'O' ) ) / n
487 safmin = sqrt( pslamch( ictxt,
'S' ) )
492 IF( nprow.EQ.-1 )
THEN
493 info = -( 600+ctxt_ )
498 pnb = pjlaenv( ictxt, 2,
'PSSYTTRD',
'L', 0, 0, 0, 0 )
499 anb = pjlaenv( ictxt, 3,
'PSSYTTRD',
'L', 0, 0, 0, 0 )
501 interleave = ( pjlaenv( ictxt, 4,
'PSSYTTRD',
'L', 1, 0, 0,
503 twogemms = ( pjlaenv( ictxt, 4,
'PSSYTTRD',
'L', 2, 0, 0,
505 balanced = ( pjlaenv( ictxt, 4,
'PSSYTTRD',
'L', 3, 0, 0,
508 CALL chk1mat( n, 2, n, 2, ia, ja, desca, 6, info )
511 upper = lsame( uplo,
'U' )
512 IF( info.EQ.0 .AND. desca( nb_ ).NE.1 )
529 nps =
max( numroc( n, 1, 0, 0, nprow ), 2*anb )
530 lwmin = 2*( anb+1 )*( 4*nps+2 ) + nps
532 work( 1 ) = real( lwmin )
533 IF( .NOT.lsame( uplo,
'L' ) )
THEN
535 ELSE IF( ia.NE.1 )
THEN
537 ELSE IF( ja.NE.1 )
THEN
539 ELSE IF( nprow.NE.npcol )
THEN
540 info = -( 600+ctxt_ )
541 ELSE IF( desca( dtype_ ).NE.1 )
THEN
542 info = -( 600+dtype_ )
543 ELSE IF( desca( mb_ ).NE.1 )
THEN
545 ELSE IF( desca( nb_ ).NE.1 )
THEN
547 ELSE IF( desca( rsrc_ ).NE.0 )
THEN
548 info = -( 600+rsrc_ )
549 ELSE IF( desca( csrc_ ).NE.0 )
THEN
550 info = -( 600+csrc_ )
551 ELSE IF( lwork.LT.lwmin )
THEN
556 idum1( 1 ) = ichar(
'U' )
558 idum1( 1 ) = ichar(
'L' )
562 CALL pchk1mat( n, 2, n, 2, ia, ja, desca, 6, 1, idum1, idum2,
567 CALL pxerbla( ictxt,
'PSSYTTRD', -info )
579 np = numroc( n, 1, myrow, 0, nprow )
580 nq = numroc( n, 1, mycol, 0, npcol )
591 ictxt = desca( ctxt_ )
612 IF( interleave )
THEN
628 ldv = 4*(
max( npm1, nqm1 ) ) + 2
633 invt = inh + ( anb+1 )*ldv
635 inht = invt + ldv / 2
636 intmp = invt + ldv*( anb+1 )
639 ldv =
max( npm1, nqm1 )
641 inht = inh + ldv*( anb+1 )
642 inv = inht + ldv*( anb+1 )
650 invt = inv + ldv*( anb+1 ) + 1
651 intmp = invt + ldv*( 2*anb )
656 CALL pxerbla( ictxt,
'PSSYTTRD', -info )
657 work( 1 ) = real( lwmin )
673 work( inh+i-1 ) = z_zero
674 work( inv+i-1 ) = z_zero
677 work( inht+i-1 ) = z_zero
686 IF( mycol.GT.myrow )
THEN
692 DO 210 minindex = 1, n - 1, anb
695 maxindex =
min( minindex+anb-1, n )
696 lijb = numroc( maxindex, 1, mycol, 0, npcol ) + 1
697 liib = numroc( maxindex, 1, myrow, 0, nprow ) + 1
701 inhtb = inht + lijb - 1
702 invtb = invt + lijb - 1
703 inhb = inh + liib - 1
704 invb = inv + liib - 1
709 DO 160 index = minindex,
min( maxindex, n-1 )
711 bindex = index - minindex
716 nxtrow = mod( currow+1, nprow )
717 nxtcol = mod( curcol+1, npcol )
723 IF( myrow.EQ.currow )
THEN
727 IF( mycol.EQ.curcol )
THEN
743 IF( mycol.EQ.curcol )
THEN
745 indexa = lii + ( lij-1 )*lda
746 indexinv = inv + lii - 1 + ( bindex-1 )*ldv
747 indexinh = inh + lii - 1 + ( bindex-1 )*ldv
748 ttoph = work( inht+lij-1+bindex*ldv )
751 IF( index.GT.1 )
THEN
752 DO 30 i = 0, npm0 - 1
754 a( indexa+i ) = a( indexa+i ) -
755 $ work( indexinv+ldv+i )*ttoph -
756 $ work( indexinh+ldv+i )*ttopv
764 IF( mycol.EQ.curcol )
THEN
768 IF( myrow.EQ.currow )
THEN
769 dtmp( 2 ) = a( lii+( lij-1 )*lda )
773 IF( myrow.EQ.nxtrow )
THEN
774 dtmp( 3 ) = a( liip1+( lij-1 )*lda )
781 norm = snrm2( npm1, a( liip1+( lij-1 )*lda ), 1 )
789 IF( dtmp( 1 ).GE.safmax .OR. dtmp( 1 ).LT.safmin )
THEN
794 dtmp( 1 ) = dtmp( 1 )*dtmp( 1 )
795 CALL sgsum2d( ictxt,
'C',
' ', 5, 1, dtmp, 5, -1,
797 IF( dtmp( 5 ).EQ.zero )
THEN
798 dtmp( 1 ) = sqrt( dtmp( 1 ) )
801 CALL pstreecomb( ictxt,
'C', 1, dtmp, -1, mycol,
808 IF( myrow.EQ.currow .AND. mycol.EQ.curcol )
THEN
809 a( lii+( lij-1 )*lda ) = d( lij )
815 norm = sign( norm, alpha )
817 IF( norm.EQ.zero )
THEN
822 oneoverbeta = 1.0e0 / beta
824 CALL sscal( npm1, oneoverbeta,
825 $ a( liip1+( lij-1 )*lda ), 1 )
828 IF( myrow.EQ.nxtrow )
THEN
829 a( liip1+( lij-1 )*lda ) = z_one
840 DO 40 i = 0, npm1 - 1
841 work( inv+liip1-1+bindex*ldv+npm1+i ) = a( liip1+i+
845 IF( mycol.EQ.curcol )
THEN
846 work( inv+liip1-1+bindex*ldv+npm1+npm1 ) = toptau
847 CALL sgebs2d( ictxt,
'R',
' ', npm1+npm1+1, 1,
848 $ work( inv+liip1-1+bindex*ldv ),
851 CALL sgebr2d( ictxt,
'R',
' ', npm1+npm1+1, 1,
852 $ work( inv+liip1-1+bindex*ldv ),
853 $ npm1+npm1+1, myrow, curcol )
854 toptau = work( inv+liip1-1+bindex*ldv+npm1+npm1 )
856 DO 50 i = 0, npm1 - 1
857 work( inh+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
858 $ 1+bindex*ldv+npm1+i )
861 IF( index.LT.n )
THEN
862 IF( myrow.EQ.nxtrow .AND. mycol.EQ.curcol )
863 $ a( liip1+( lij-1 )*lda ) = e( lij )
869 IF( myrow.EQ.mycol )
THEN
870 DO 60 i = 0, npm1 + npm1
871 work( invt+lijp1-1+bindex*ldv+i ) = work( inv+liip1-1+
875 CALL sgesd2d( ictxt, npm1+npm1, 1,
876 $ work( inv+liip1-1+bindex*ldv ), npm1+npm1,
878 CALL sgerv2d( ictxt, nqm1+nqm1, 1,
879 $ work( invt+lijp1-1+bindex*ldv ), nqm1+nqm1,
883 DO 70 i = 0, nqm1 - 1
884 work( inht+lijp1-1+( bindex+1 )*ldv+i ) = work( invt+
885 $ lijp1-1+bindex*ldv+nqm1+i )
891 IF( index.GT.1 )
THEN
892 DO 90 j = lijp1, lijb - 1
893 DO 80 i = 0, npm1 - 1
895 a( liip1+i+( j-1 )*lda ) = a( liip1+i+( j-1 )*lda )
896 $ - work( inv+liip1-1+bindex*ldv+i )*
897 $ work( inht+j-1+bindex*ldv ) -
898 $ work( inh+liip1-1+bindex*ldv+i )*
899 $ work( invt+j-1+bindex*ldv )
916 work( inv+liip1-1+( bindex+1 )*ldv ) = z_zero
917 work( invt+lijp1-1+( bindex+1 )*ldv+nqm1-1 ) = z_zero
920 IF( myrow.EQ.mycol )
THEN
921 IF( ltnm1.GT.1 )
THEN
922 CALL strmvt(
'L', ltnm1-1,
923 $ a( ltlip1+1+( lijp1-1 )*lda ), lda,
924 $ work( invt+lijp1-1+( bindex+1 )*ldv ), 1,
925 $ work( inh+ltlip1+1-1+( bindex+1 )*ldv ),
926 $ 1, work( inv+ltlip1+1-1+( bindex+1 )*
927 $ ldv ), 1, work( inht+lijp1-1+( bindex+
931 work( invt+lijp1+i-1-1+( bindex+1 )*ldv )
932 $ = work( invt+lijp1+i-1-1+( bindex+1 )*ldv ) +
933 $ a( ltlip1+i-1+( lijp1+i-1-1 )*lda )*
934 $ work( inh+ltlip1+i-1-1+( bindex+1 )*ldv )
938 $
CALL strmvt(
'L', ltnm1, a( ltlip1+( lijp1-1 )*lda ),
939 $ lda, work( invt+lijp1-1+( bindex+1 )*
940 $ ldv ), 1, work( inh+ltlip1-1+( bindex+
941 $ 1 )*ldv ), 1, work( inv+ltlip1-1+
942 $ ( bindex+1 )*ldv ), 1,
943 $ work( inht+lijp1-1+( bindex+1 )*ldv ),
962 DO 110 i = 1, 2*( bindex+1 )
963 work( intmp-1+i ) = 0
969 rowsperproc = iceil( nqb, npset )
970 myfirstrow =
min( nqb+1, 1+rowsperproc*mysetnum )
971 numrows =
min( rowsperproc, nqb-myfirstrow+1 )
976 CALL sgemv(
'C', numrows, bindex+1, z_one,
977 $ work( inhtb+myfirstrow-1 ), ldv,
978 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
979 $ 1, z_zero, work( intmp ), 1 )
981 CALL sgemv(
'C', numrows, bindex+1, z_one,
982 $ work( invtb+myfirstrow-1 ), ldv,
983 $ work( inhtb+myfirstrow-1+( bindex+1 )*ldv ),
984 $ 1, z_zero, work( intmp+bindex+1 ), 1 )
987 CALL sgsum2d( ictxt,
'C',
' ', 2*( bindex+1 ), 1,
988 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
992 CALL sgemv(
'C', nqb, bindex+1, z_one, work( inhtb ),
993 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
994 $ z_zero, work( intmp ), 1 )
996 CALL sgemv(
'C', nqb, bindex+1, z_one, work( invtb ),
997 $ ldv, work( inhtb+( bindex+1 )*ldv ), 1,
998 $ z_zero, work( intmp+bindex+1 ), 1 )
1007 rowsperproc = iceil( npb, npset )
1008 myfirstrow =
min( npb+1, 1+rowsperproc*mysetnum )
1009 numrows =
min( rowsperproc, npb-myfirstrow+1 )
1011 CALL sgsum2d( ictxt,
'R',
' ', 2*( bindex+1 ), 1,
1012 $ work( intmp ), 2*( bindex+1 ), -1, -1 )
1016 IF( index.GT.1. )
THEN
1017 CALL sgemv(
'N', numrows, bindex+1, z_negone,
1018 $ work( invb+myfirstrow-1 ), ldv,
1019 $ work( intmp ), 1, z_one,
1020 $ work( invb+myfirstrow-1+( bindex+1 )*
1024 CALL sgemv(
'N', numrows, bindex+1, z_negone,
1025 $ work( inhb+myfirstrow-1 ), ldv,
1026 $ work( intmp+bindex+1 ), 1, z_one,
1027 $ work( invb+myfirstrow-1+( bindex+1 )*
1033 CALL sgemv(
'N', npb, bindex+1, z_negone, work( invb ),
1034 $ ldv, work( intmp ), 1, z_one,
1035 $ work( invb+( bindex+1 )*ldv ), 1 )
1039 CALL sgemv(
'N', npb, bindex+1, z_negone, work( inhb ),
1040 $ ldv, work( intmp+bindex+1 ), 1, z_one,
1041 $ work( invb+( bindex+1 )*ldv ), 1 )
1048 IF( myrow.EQ.mycol )
THEN
1049 DO 120 i = 0, nqm1 - 1
1050 work( intmp+i ) = work( invt+lijp1-1+( bindex+1 )*ldv+
1054 CALL sgesd2d( ictxt, nqm1, 1,
1055 $ work( invt+lijp1-1+( bindex+1 )*ldv ),
1056 $ nqm1, mycol, myrow )
1057 CALL sgerv2d( ictxt, npm1, 1, work( intmp ), npm1, mycol,
1061 DO 130 i = 0, npm1 - 1
1062 work( inv+liip1-1+( bindex+1 )*ldv+i ) = work( inv+liip1-
1063 $ 1+( bindex+1 )*ldv+i ) + work( intmp+i )
1068 CALL sgsum2d( ictxt,
'R',
' ', npm1, 1,
1069 $ work( inv+liip1-1+( bindex+1 )*ldv ), npm1,
1077 IF( mycol.EQ.nxtcol )
THEN
1079 DO 140 i = 0, npm1 - 1
1080 cc( 1 ) = cc( 1 ) + work( inv+liip1-1+( bindex+1 )*
1081 $ ldv+i )*work( inh+liip1-1+( bindex+1 )*ldv+
1084 IF( myrow.EQ.nxtrow )
THEN
1085 cc( 2 ) = work( inv+liip1-1+( bindex+1 )*ldv )
1086 cc( 3 ) = work( inh+liip1-1+( bindex+1 )*ldv )
1091 CALL sgsum2d( ictxt,
'C',
' ', 3, 1, cc, 3, -1, nxtcol )
1097 topnv = toptau*( topv-c*toptau / 2*toph )
1103 DO 150 i = 0, npm1 - 1
1104 work( inv+liip1-1+( bindex+1 )*ldv+i ) = toptau*
1105 $ ( work( inv+liip1-1+( bindex+1 )*ldv+i )-c*toptau /
1106 $ 2*work( inh+liip1-1+( bindex+1 )*ldv+i ) )
1117 IF( maxindex.LT.n )
THEN
1119 DO 170 i = 0, npm1 - 1
1120 work( intmp+i ) = work( inh+liip1-1+anb*ldv+i )
1125 IF( .NOT.twogemms )
THEN
1126 IF( interleave )
THEN
1129 CALL slamov(
'A', ltnm1, anb, work( inht+lijp1-1 ),
1130 $ ldv, work( invt+lijp1-1+anb*ldv ), ldv )
1132 CALL slamov(
'A', ltnm1, anb, work( inv+ltlip1-1 ),
1133 $ ldv, work( inh+ltlip1-1+anb*ldv ), ldv )
1143 DO 180 pbmin = 1, ltnm1, pnb
1145 pbsize =
min( pnb, ltnm1-pbmin+1 )
1146 pbmax =
min( ltnm1, pbmin+pnb-1 )
1147 CALL sgemm(
'N',
'C', pbsize, pbmax, nbzg, z_negone,
1148 $ work( inh+ltlip1-1+pbmin-1 ), ldzg,
1149 $ work( invt+lijp1-1 ), ldzg, z_one,
1150 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1152 CALL sgemm(
'N',
'C', pbsize, pbmax, anb, z_negone,
1153 $ work( inv+ltlip1-1+pbmin-1 ), ldzg,
1154 $ work( inht+lijp1-1 ), ldzg, z_one,
1155 $ a( ltlip1+pbmin-1+( lijp1-1 )*lda ), lda )
1161 DO 190 i = 0, npm1 - 1
1162 work( inv+liip1-1+i ) = work( inv+liip1-1+anb*ldv+i )
1163 work( inh+liip1-1+i ) = work( intmp+i )
1165 DO 200 i = 0, nqm1 - 1
1166 work( inht+lijp1-1+i ) = work( inht+lijp1-1+anb*ldv+i )
1176 IF( mycol.EQ.nxtcol )
THEN
1177 IF( myrow.EQ.nxtrow )
THEN
1179 d( nq ) = a( np+( nq-1 )*lda )
1181 CALL sgebs2d( ictxt,
'C',
' ', 1, 1, d( nq ), 1 )
1183 CALL sgebr2d( ictxt,
'C',
' ', 1, 1, d( nq ), 1, nxtrow,
1191 work( 1 ) = real( lwmin )