The following code is the present version of the symmetric eigenvalue code
based upon rank one tearing and rank one updating.
We are still working on some things but would like to share the code with you
now to get more experience with it and to find out where the problems are.
This code is serial... we have parallel versions for the HEP and for CRAY-XMP-4.
We would appreciate it if you would inform us if (when) you have problems with
the code.  We are particularly concerned with loss of orthogonality in the 
presence of pathologically close eigenvalues.  

Jack Dongarra (dongarra@anl-mcs)
Danny Sorensen (sorensen@anl-mcs)
Mathematics and Computer Science Division
Argonne National Laboratory
Argonne, Illinois 60439

      double precision d(500),e(500),q(500,500),beta(500)
      double precision dd(500),ee(500),qq(500,500),z(500)
      double precision wv1(500),wv2(500),wm1(500,500),wv3(500),wv4(500)
      double precision esave(500),dsave(500)
      double precision s,s2,t,t1,t2,t2t,tnorm,tnorm2,res,res2
      double precision enorm
      integer indx(500),indxp(500)
      integer icase
      integer lda,n,nd2,i,j,ierr,info,k
      real gtime,second,foo,rand
c****************************************************************
c
c      Test driver for parallel symmetric eigenvalue routine
c
c      User must supply real function second     
c
c
c     The following program is an experimental piece of software.
c     It should be more efficient on sequential machines than TQL2
c     from EISPACK in finding the eigenvalues and eigenvectors of
c     a symmetric tridiagonal matrix. On parallel machines it
c     should be far superior.
c     If you encounter problems in the use of this routine or
c     have questions on its implementation for your architecture 
c     please contact us.
c
c     This version dated 5/17/85.
c
c     Jack Dongarra (dongarra@anl-mcs)
c     Danny Sorensen (sorensen@anl-mcs)
c     Mathematics and Computer Science Division
c     Argonne National Laboratory
c     Argonne, Illinois 60439
c
c     312-972-7246 or 312-972-8711
c
c
c****************************************************************
      lda = 500
      do 9999 n = 50,500,50
      write(6,*)'=============================='
      write(6,*)' n = ',n
      do 9998 icase = 1,2
      write(6,*)'++++++++++++++++++++++++++++++'
      if( icase .eq. 1 ) write(6,*)' twos on diagonal'
      if( icase .eq. 2 ) write(6,*)' random numbers on diagonal'
      nd2 = n/2
      go to (112,113), icase
  112 do 13 i = 1,n
         d(i) = 2.0d0
         e(i) = 1.0d0
   13 continue
      go to 444
  113 do 14 i = 1,n
         d(i) = rand(foo)
         e(i) = rand(foo)
   14 continue
  444 do 10 j = 1,n
         do 5 i = 1,n
              q(i,j) = 0.0
              qq(i,j) = 0.0
    5    continue
         q(j,j) = 1.0
         qq(j,j) = 1.0
         dd(j) = d(j)
         ee(j) = e(j)
         dsave(j) = d(j)
         esave(j) = e(j)
   10 continue
      t1 = second(gtime)
      call tql2(lda,n,dd,ee,qq,ierr)
      t2t = second(gtime) - t1
      write(6,*) ' time for tql ',t2t
c
c     the test problem has been defined now comes the numerical 
c     refinements that will avoid cancellation
c
      t1 = second(gtime) 
      call treeql(lda,n,d,e,q,z,beta,wv1,wv2,wv3,wv4,wm1,indxp,
     $            indx,info)
      t2 = second(gtime) - t1
c
      write(6,*) ' time for treeql ',t2
      if( info .gt. 1 ) write(6,*)' deflate from sesupd',info
      if( t2 .ne. 0.0 )write(6,*)' ratio of tql2/new',t2t/t2
      tnorm = 0.0d0
      tnorm2 = 0.0d0
      res = 0.0d0
      res2 = 0.0d0
      do 530 j = 1,n
         e(1) =   dsave(1)*q(1,j) + 
     $          esave(2)*q(2,j) - d(j)*q(1,j)
         ee(1) = dsave(1)*qq(1,j) + 
     $          esave(2)*qq(2,j) - dd(j)*qq(1,j)
      do 400 i = 2,n-1
         e(i) = esave(i)*q(i-1,j) + dsave(i)*q(i,j) + 
     $          esave(i+1)*q(i+1,j) - d(j)*q(i,j)
         ee(i) = esave(i)*qq(i-1,j) + dsave(i)*qq(i,j) + 
     $          esave(i+1)*qq(i+1,j) - dd(j)*qq(i,j)
  400 continue
         e(n) = esave(n)*q(n-1,j) + dsave(n)*q(n,j)  
     $          - d(j)*q(n,j)
         ee(i) = esave(n)*qq(n-1,j) + dsave(n)*qq(n,j)  
     $          - dd(j)*qq(n,j)
      t = enorm(n,e)
      t2 = enorm(n,ee)
      res = dmax1(res,t)
      res2 = dmax1(res2,t2)
c      if (t .gt. .000000001) then 
c      write(6,*)' j ',j, ' d ',d(j),' er ',t,' dd ',dd(j),' eer ',t2
c      write(6,*) '     ev ',q(1,j),' eev ',qq(1,j)
c      endif
         do 520 i = 1,n
            t = 0.0
            s = 0.0
            t2 = 0.0
            s2 = 0.0
            do 510 k = 1,n
               s2 = s2 + qq(k,i)*qq(k,j)
               s = s + q(k,i)*q(k,j)
  510       continue
            if (i .eq. j) s2 = s2 - 1.0
            if (i .eq. j) s = s - 1.0
            t2 =  dabs(s2)
            t =  dabs(s)
         tnorm2 = dmax1(tnorm2,t2)
         tnorm = dmax1(tnorm,t)
c         if (t .gt. .000000001) write(6,*) ' ij ',i,j,'   tnorm  ',tnorm
  520    continue
  530 continue
      write(6,*)' the residual for the tql values and vectors',res2
      write(6,*)' the residual for the updated values and vectors',res
      write(6,*)' the tql norm of q*q sup t  is',tnorm2
      write(6,*)' the upd norm of q*q sup t  is',tnorm
 9998 continue
 9999 continue
      stop
      end
      subroutine treeql(ldq,n,d,e,q,z,beta,wv1,wv2,wv3,wv4,wm1,indxp,
     $            indx,info)
c***********************************************************************
c
c     The following program is an experimental piece of software.
c     It should be more efficient on sequential machines than TQL2
c     from EISPACK in finding the eigenvalues and eigenvectors of
c     a symmetric tridiagonal matrix. On parallel machines it
c     should be far superior.
c     If you encounter problems in the use of this routine or
c     have questions on its implementation for your architecture 
c     please contact us.
c
c     This version dated 5/17/85.
c
c     Jack Dongarra (dongarra@anl-mcs)
c     Danny Sorensen (sorensen@anl-mcs)
c     Mathematics and Computer Science Division
c     Argonne National Laboratory
c     Argonne, Illinois 60439
c
c     312-972-7246 or 312-972-8711
c
c
c     this subroutine will compute the eigensystem of a double precision
c     of a symmetric matrix .
c     are stored in q, and the eigenvalues are in d.
c     the algorithm consists of three stages...
c
c
c        the first stage constists of splitting the problem into
c        several independent subproblems by a succession of rank one
c        tearings yielding a binary tree as a computational graph with
c        the leaves representing the lowest level splits.  There are
c        two independent eigenvalue problems residing at each of the 
c        leaves once the graph is formed.
c
c        the second stage consists of calculating the 
c        eigensystem of the subproblems residing at the leaves.  this 
c        requires calls to the subroutine tql2 from eispack.
c
c        the final stage consists of traversing the tree right to left
c        from deepest level to the root solving rank one eigensystem 
c        updating problems at each node of the graph.
c
c
c
c
c     input variables...
c
c        ldq    the leading dimension of the two dimensional arrays
c
c        n      the dimension of the problem.  q is n x n
c
c        q      an  n x n  matrix that contains the orthogonal similarity
c               transformation used to reduce the original matrix to 
c               tridiagonal form.  on output this matrix will contain the
c               eigenvectors of that matrix.
c
c        d      a vector of length n. the diagonal of the tridiagonal 
c               matrix is contained in d on input.  the eigenvalues are
c               contained in d on input.  
c
c        e      a vector of length n. the subdiagonal of the tridiagonal 
c               matrix is contained in e on input.  
c               e is destroyed on output.
c        
c
c        z      a work vector of length n.  
c
c        beta   a working array of length n.
c
c        wv1    a working array of length n
c
c        wv2    a working array of length n
c
c        wv3    a working array of length n
c
c        wv4    a working array of length n
c
c        wm1    a working array of dimension  n x n
c
c        indxp  an integer array of length n.
c
c        indx   an integer array of length n.
c
c        info   this integer variable indicates failure of the
c               updating process with value -1,  and success
c               with value info .ge. 0. if info .gt. 0 the number
c               of deflations that occurred during the last step
c               is reported.
c
c     called subroutines...
c
c        tql2    a subroutine for calculating the eigensystem of a 
c                symmetric tridiagonal matrix.
c
c        sesupd  a subroutine for calculating the updated eigensystems.
c
c
c***********************************************************************
c
c     declare shared variables 
c
      integer ldq,n,indxp(*),indx(*),info
      double precision d(*),e(*),q(ldq,*),z(*),beta(*)
      double precision wv1(*),wv2(*),wv3(*),wv4(*),wm1(ldq,*)
c      
c     declare local variables 
c
      integer ndiml(50),ndimr(50),inodes(50)
      integer ksect,nsect,klast,i,jleft,kk,jright,klj,j,ndl,ir,il
      integer nbp,jj,ndr,jjj,ierr,ndim
      double precision alpha,alphap
c
      ksect   = 3
c
c     ksect is the number of levels on the tree.   there are 2**ksect
c     nodes and 2**(ksect-1) leaves ( rank one tears) on the tree
c     if 2**ksect .gt. 50 desired then dimensions of ndiml,ndimr,inodes
c     must be changed.
c
      nsect = 2**ksect
c
      if (n .lt. 6*nsect) then
c
c        the subproblems are too small to be efficient
c
         call tql2(ldq,n,d,e,q,ierr)
         return
      endif
c
      do 20 i = 1,n
         z(i) = 0.0
   20 continue
      nbp = n/2
      inodes(1) = nbp + 1 
      ndiml(1) = nbp
      ndimr(1) = n - nbp
      klast = 1
      jleft = 0
      jright = 1
      do 50 kk = 1,ksect-1
c
c        construct the tree... klast is the number of nodes on the
c        kk -th  level of the tree.
c
         do 40 jj = 0,klast-1
            jleft = jleft + 2
            jright = jright + 2
            klj = klast + jj
            ndiml(jleft)  = ndiml(klj)/2
            ndimr(jleft)  = ndiml(klj) - ndiml(jleft)
            inodes(jleft) = inodes(klj) - ndimr(jleft)
            ndiml(jright)  = ndimr(klj)/2
            ndimr(jright)  = ndimr(klj) - ndiml(jright)
            inodes(jright) = inodes(klj) + ndiml(jright)
c
c           inodes contains the index of the split
c           ndiml contains the dimension of problem to the left
c           ndimr contains the dimension of problem to the right
c
   40    continue
         klast = 2*klast
   50 continue
      klast = 2**ksect
      j = klast
      klast = klast/2
      do 90 jj = 1,klast
         j = j-1
         ndl = ndiml(j)  
         ir  = inodes(j) 
         il  = ir - ndl 
         nbp = ir-1
   95    continue
c
c           arrange the tearing so that the cancellation during the 
c           modification of the diagonal elements on either side of the
c           tear does not occur
c
            beta(nbp+1) =  e(nbp+1)
            alpha = d(nbp)
            alphap = d(nbp+1)
            z(nbp) = 1.0d0
            z(nbp+1) = 1.0d0
c
c           the tear is at index nbp.  beta(nbp+2) is used to store the
c           value of z(nbp+1) used for the tear.
c
            beta(nbp+2) = 1.0d0
            if (dsign(1.0d0,alpha)*dsign(1.0d0,alphap) .ge. 0.0d0) 
     *      then
               if (alpha .lt. 0.0d0 .or. alphap .lt. 0.0d0)
     *         then
                  if(beta(nbp+1) .lt. 0.0d0) 
     *            then
                    beta(nbp+1) = -beta(nbp+1)   
                    z(nbp) = 1.0d0
                    z(nbp+1) = -1.0d0
                    beta(nbp+2) = -1.0d0
                  endif
               else
                  if (beta(nbp+1) .gt. 0.0d0)
     *            then
                    beta(nbp+1) = -beta(nbp+1)   
                    z(nbp) = 1.0d0
                    z(nbp+1) = -1.0d0
                    beta(nbp+2) = -1.0d0
                  endif
               endif
            endif 
           d(nbp) = d(nbp) - beta(nbp+1)  
           d(nbp+1) = d(nbp+1) - beta(nbp+1)  
         if (nbp .eq. ir-1) 
     *   then
            nbp = il-1
            if (nbp .gt. 0) go to 95
         endif
   90 continue
c
c     invert the tree and spawn problems
c
      klast = 2**ksect
      j = klast
      klast = klast/2
         do 140 jj = 1,klast
            j = j-1
            ndl = ndiml(j)  
            ndr = ndimr(j)
            ir  = inodes(j) 
            il  = ir - ndl 
c
c           independent tridiagonal eigenvalue problems are solved
c           at each of the leaves.
c
            call tql2(ldq,ndl,d(il),e(il),q(il,il),ierr)
            call tql2(ldq,ndr,d(ir),e(ir),q(ir,ir),ierr)
  140    continue
      klast = 2**ksect
      j = klast
      klast = klast/2
      do 250 kk = 1,ksect
         do 240 jj = 1,klast
c
c           there are klast nodes on this level
c           problems are spawned right to left
c           the dimension of the updating problem is equal to ndl+ndr
c
            j = j-1
            ndl = ndiml(j)  
            ndr = ndimr(j)
            ir  = inodes(j) 
            il  = ir - ndl 
            ndim = ndl+ndr
c
c           reset the z vector
c
            z(ir-1) = 1.0d0
            z(ir) = 1.0d0*beta(ir+1)
            do 222 jjj = il,ir-2
               z(jjj) = 0.0d0
  222       continue
            do 333 jjj = ir+1,ir+ndr-1
               z(jjj) = 0.0d0
  333       continue
c
            call sesupd(ldq,ndim,q(il,il),d(il),beta(ir),z(il),
     *                  wv1(il),wv2(il),wm1(il,il),wv3(il),wv4(il),
     *                                  indxp(il),indx(il),info)
      if( info .lt. 0 ) write(6,*)' error from sesupd',info
  240    continue
         klast = klast/2
  250 continue
      return
c
c     last card of treeql
c
      end
      double precision function enorm(n,x)
      integer n
      double precision x(n)
c     **********
c
c     function enorm
c
c     given an n-vector x, this function calculates the
c     euclidean norm of x.
c
c     the euclidean norm is computed by accumulating the sum of
c     squares in three different sums. the sums of squares for the
c     small and large components are scaled so that no overflows
c     occur. non-destructive underflows are permitted. underflows
c     and overflows do not occur in the computation of the unscaled
c     sum of squares for the intermediate components.
c     the definitions of small, intermediate and large components
c     depend on two constants, rdwarf and rgiant. the main
c     restrictions on these constants are that rdwarf**2 not
c     underflow and rgiant**2 not overflow. the constants
c     given here are suitable for every known computer.
c
c     the function statement is
c
c       double precision function enorm(n,x)
c
c     where
c
c       n is a positive integer input variable.
c
c       x is an input array of length n.
c
c     subprograms called
c
c       fortran-supplied ... dabs,dsqrt
c
c     argonne national laboratory. minpack project. march 1980.
c     burton s. garbow, kenneth e. hillstrom, jorge j. more
c
c     **********
      integer i
      double precision agiant,floatn,one,rdwarf,rgiant,s1,s2,s3,xabs,
     *                 x1max,x3max,zero
      data one,zero,rdwarf,rgiant /1.0d0,0.0d0,3.834d-20,1.304d19/
      s1 = zero
      s2 = zero
      s3 = zero
      x1max = zero
      x3max = zero
      floatn = n
      agiant = rgiant/floatn
      do 90 i = 1, n
         xabs = dabs(x(i))
         if (xabs .gt. rdwarf .and. xabs .lt. agiant) go to 70
            if (xabs .le. rdwarf) go to 30
c
c              sum for large components.
c
               if (xabs .le. x1max) go to 10
                  s1 = one + s1*(x1max/xabs)**2
                  x1max = xabs
                  go to 20
   10          continue
                  s1 = s1 + (xabs/x1max)**2
   20          continue
               go to 60
   30       continue
c
c              sum for small components.
c
               if (xabs .le. x3max) go to 40
                  s3 = one + s3*(x3max/xabs)**2
                  x3max = xabs
                  go to 50
   40          continue
                  if (xabs .ne. zero) s3 = s3 + (xabs/x3max)**2
   50          continue
   60       continue
            go to 80
   70    continue
c
c           sum for intermediate components.
c
            s2 = s2 + xabs**2
   80    continue
   90    continue
c
c     calculation of norm.
c
      if (s1 .eq. zero) go to 100
         enorm = x1max*dsqrt(s1+(s2/x1max)/x1max)
         go to 130
  100 continue
         if (s2 .eq. zero) go to 110
            if (s2 .ge. x3max)
     *         enorm = dsqrt(s2*(one+(x3max/s2)*(x3max*s3)))
            if (s2 .lt. x3max)
     *         enorm = dsqrt(x3max*((s2/x3max)+(x3max*s3)))
            go to 120
  110    continue
            enorm = x3max*dsqrt(s3)
  120    continue
  130 continue
      return
c
c     last card of function enorm.
c
      end
      double precision function epslon (x)
      double precision x
c
c     estimate unit roundoff in quantities of size x.
c
      double precision a,b,c,eps
c
c     this program should function properly on all systems
c     satisfying the following two assumptions,
c        1.  the base used in representing floating point
c            numbers is not a power of three.
c        2.  the quantity  a  in statement 10 is represented to 
c            the accuracy used in floating point variables
c            that are stored in memory.
c     the statement number 10 and the go to 10 are intended to
c     force optimizing compilers to generate code satisfying 
c     assumption 2.
c     under these assumptions, it should be true that,
c            a  is not exactly equal to four-thirds,
c            b  has a zero for its last bit or digit,
c            c  is not exactly equal to one,
c            eps  measures the separation of 1.0 from
c                 the next larger floating point number.
c     the developers of eispack would appreciate being informed
c     about any systems where these assumptions do not hold.
c
c     this version dated 4/6/83.
c
      a = 4.0d0/3.0d0
   10 b = a - 1.0d0
      c = b + b + b
      eps = dabs(c-1.0d0)
      if (eps .eq. 0.0d0) go to 10
      epslon = eps*dabs(x)
      return
      end
      subroutine evupd(n,i,d,z,delta,rho,dlam,ifail)
c***********************************************************************
c
c     The following program is an experimental piece of software.
c     It should be more efficient on sequential machines than TQL2
c     from EISPACK in finding the eigenvalues and eigenvectors of
c     a symmetric tridiagonal matrix. On parallel machines it
c     should be far superior.
c     If you encounter problems in the use of this routine or
c     have questions on its implementation for your architecture 
c     please contact us.
c
c     This version dated 5/17/85.
c
c     Jack Dongarra (dongarra@anl-mcs)
c     Danny Sorensen (sorensen@anl-mcs)
c     Mathematics and Computer Science Division
c     Argonne National Laboratory
c     Argonne, Illinois 60439
c
c     312-972-7246 or 312-972-8711
c
c
c     this subroutine computes the updated eigenvalues of a
c     rank one modification to a symmetric matrix.  it is assumed
c     that the eigenvalues are in the array d, and that
c
c        d(i) .ne. d(j)  for  i .ne. j
c
c     it is also assumed that the eigenvalues are in increasing
c     order and that the value of rho is positive.  this is
c     arranged by the calling subroutine sesupd, and is no loss
c     in generality.
c     it is also assumed that the values in the array z are
c     the squares of the components of the updatingvector.
c
c
c     the method consists of approximating the rational functions
c
c        rho = sum(z(j)/((d(j)-d(i))/rho - lamda): j = i+1,n)
c
c        phi =sum(z(j)/(d(j)-d(i))/rho - lamda): j = 1,i)
c
c     by simple interpolating rational functions.  this avoids
c     the need for safeguarding by bisection since
c     the convergence is monotone, and quadratic from any starting
c     point that is greater than zero but less than the solution
c
c
c     input variables...
c        n      the length of all arrays
c
c        i      the i - th  eigenvalue is computed
c
c        d      the original eigenvalues.  it is assumed that they are
c               in order, d(i) .lt. d(j)  for i .lt. j.
c
c        z      this array of length n contains the squares of
c               of the components of the updating vector
c
c        delta  this array of length n contains (d(j) - lamda(i))/rho
c               in its j - th component.  these values will be used
c               to update the eigenvectors in sesupd.
c
c        rho    this is the scalar in the the symmetric updating
c               formula.
c
c        dlam   this scalar will contain the value of the i - th
c               updated eigenvalue on return
c
c        ifail  this integer variable indicates failure of the
c               updating process with value 1,  and success
c               with value 0.
c
c     subroutines called
c   
c        epslon  determies the machine accuracy
c
c
c**************************************************************************
      integer i,n,im1,ip1,ip2,niter
      dimension d(n),z(n), delta(n)
      double precision d,z,rho,eps,delta,zero,one,two,
     1                 del,phi,dphi,psi,dpsi,lambda,oldlam,
     2                 t,temp,a,b,d1,w,eps,eps1,eta,dlam,dmax
      integer ifail,j
      double precision epslon
c
         zero = 0.0d0
         one  = 1.0d0
         two  = 2.0d0
c
c     eps is machine precision
c     eta is the relative accuracy requirement on the roots.
c
c         these values should be adjusted for the particular machine.
c
         eps = 100*epslon(1.0d0)      
         eta = 8.0*eps 
c
         im1 = i - 1
         ip1 = i + 1
         ip2 = i + 2
         niter = 1
         lambda = zero
         oldlam = zero
         del = d(i)
         do 100 j = 1,n
            delta(j) = (d(j) - del)/rho
  100    continue
         dmax = dmax1(dabs(d(1)), dabs(d(n)))
         dmax = dabs(rho) + dmax
c
c     calculate initial guess
c
         if (i .lt. n) 
     *   then
            del = d(ip1) 
         else
            del = d(n) + rho 
         endif
         a = zero
         do 200 j = 1,im1
            a = a + rho*z(j)/(d(j) - del)
  200    continue
         b = zero
         do 220 j = ip2,n
            b = b + rho*z(j)/(d(j) - del)
  220    continue
         a = a + (b + one)
         if (i .eq. n) 
     *   then
            t = z(i)/a
         else         
            t = a*delta(ip1)
            b = t + z(i) + z(ip1)
            if (b .ge. zero)
     *      then
               t = two*z(i)*delta(ip1)/
     *             (b + dsqrt(dabs(b*b - 4.0d0*t*z(i))))
            else
               t = (b - dsqrt(b*b - 4.0d0*t*z(i)))/(two*a)
            endif
         endif
         if (t .le. zero) write(6,*) ' int ges ',t
c
c     test to see that the initial guess is not too close to endpoint
c
      if (ip1 .le. n .and. t .ge. .9d0*delta(ip1)) 
     *                              t = .9d0*delta(ip1)
c
c     update the values of the array delta
c
  250 continue
         do 300 j = 1,n
             delta(j) = delta(j) - t
  300    continue
         lambda = lambda + t
         dlam = d(i) + rho*lambda
c
c     evaluate psi and the derivative dpsi
c
         dpsi = zero
         psi  = zero
         do 400 j = 1,i
            t = z(j)/delta(j)
            psi = psi + t
            dpsi = dpsi + t/delta(j)
  400    continue
c
c     evaluate phi and the derivative dphi
c
         dphi = zero
         phi  = zero
         if (i .eq. n) go to 600
         do 500 j = ip1,n
            t = z(j)/delta(j)
            phi = phi + t
            dphi = dphi + t/delta(j)
  500    continue
c
c     test for convergence
c        return if the test is satisfied
c
  600    continue
         w = one + phi + psi
         eps1 = eps*dmax*dsqrt(dphi + dpsi)
         if ((dabs(w) .le. eps1) .and. (dabs(lambda - oldlam) .le.
     1                eta*oldlam) ) then
            return
         endif
c
c     return with ifail = 1 if convergence has not ocurred
c     within 45 iterations.
c
         if (niter .lt. 45) go to 650
            ifail = 1
            return
  650    continue
         niter = niter + 1
         oldlam = lambda
 
c
c     calculate the new step
c
         if (i .ne. n) go to 700
c
c           otherwise
c           calculate the step for the special case  i = n
c
            t = (w*psi)/dpsi
            go to 250
  700    continue
         del = delta(ip1)
         temp = psi/dpsi
         d1 = one + phi - del*dphi
         a = (del*(one + phi) + psi*temp)/d1 + temp
         b = (two*temp*del*w)/d1
         t = dsqrt(dabs(a*a - two*b))
         t = b/(a + t)
      go to 250
c
c     last card of evupd
c
      end
      double precision function pythag(a,b)
      double precision a,b
c
c     finds dsqrt(a**2+b**2) without overflow or destructive underflow
c
      double precision p,r,s,t,u
      p = dmax1(dabs(a),dabs(b))
      if (p .eq. 0.0d0) go to 20
      r = (dmin1(dabs(a),dabs(b))/p)**2
   10 continue
         t = 4.0d0 + r
         if (t .eq. 4.0d0) go to 20
         s = r/t
         u = 1.0d0 + 2.0d0*s
         p = u*p
         r = (s/u)**2 * r
      go to 10
   20 pythag = p
      return
      end
      real function rand(foo)
      real foo
      integer init
      data init/1365/
      init = mod(3125*init,65536)
      rand = (init - 32768.)/(2.**15)
      return
      end
      subroutine rotate(icount,indx,jm1,n,ldq,q,z,rhoge0)
c
c     rotate z components to 0
c     assumes norm(z) .eq. 1
c
      logical rhoge0
      integer icount,indx(*),jm1,n,ldq
      double precision q(ldq,*),z(*),gamma,sigma,tau,t1,t2
      integer j,jp1,jj,jjp1,i
c
      if (rhoge0)
     *then
        do 200 j = jm1-icount+1,jm1-1
           jp1 = j + 1
           gamma = z(jp1)     
           sigma = z(j)        
           tau = dsqrt(gamma*gamma + sigma*sigma)
           if (tau .gt. 0.0d0) 
     *     then
              gamma = gamma/tau
              sigma = sigma/tau
              z(jp1) = tau
              z(j) = 0.0
              jj = indx(j)
              jjp1 = indx(jp1)
              do 100 i = 1,n
                 t1 = q(i,jj)
                 t2 = q(i,jjp1)
                 q(i,jj) = t1*gamma - t2*sigma
                 q(i,jjp1) = t1*sigma + t2*gamma
  100         continue
           endif
  200   continue
      else
        do 400 j = jm1-1,jm1-icount+1,-1
           jp1 = j + 1
           gamma = z(j)     
           sigma = z(jp1)        
           tau = dsqrt(gamma*gamma + sigma*sigma)
           if (tau .gt. 0.0d0) 
     *     then
              gamma = gamma/tau
              sigma = sigma/tau
              z(j) = tau
              z(jp1) = 0.0d0
              jj = indx(j)
              jjp1 = indx(jp1)
              do 300 i = 1,n
                 t1 = q(i,jj)
                 t2 = q(i,jjp1)
                 q(i,jjp1) = t2*gamma - t1*sigma
                 q(i,jj) = t2*sigma + t1*gamma
  300         continue
           endif
  400   continue
      endif
c
      return
c
c     last card of rotate
c
      end
      subroutine sesupd(ldq,n,q,d,rho,z,x,dlamda,q2,delta,w,indxp,
     *                                                  indx,info)
c***********************************************************************
c
c     The following program is an experimental piece of software.
c     It should be more efficient on sequential machines than TQL2
c     from EISPACK in finding the eigenvalues and eigenvectors of
c     a symmetric tridiagonal matrix. On parallel machines it
c     should be far superior.
c     If you encounter problems in the use of this routine or
c     have questions on its implementation for your architecture 
c     please contact us.
c
c     This version dated 5/17/85.
c
c     Jack Dongarra (dongarra@anl-mcs)
c     Danny Sorensen (sorensen@anl-mcs)
c     Mathematics and Computer Science Division
c     Argonne National Laboratory
c     Argonne, Illinois 60439
c
c     312-972-7246 or 312-972-8711
c
c
c
c     this subroutine will compute the updated eigensystem of a
c     of a symmetric matrix after modification by a rank one
c     symmetric matrix.
c
c     a = qdq' + rho*z*z'
c
c     it is assumed that the eigenvectors of the original matrix
c     are stored in q, and the eigenvalues are in d.
c     the algorithm consists of three stages...
c
c
c        the first stage constists of deflating the size of
c        the problem when there multiple eigenvalues or if there
c        zero of the vector q'z.  for each such ocurrence the dimension
c        is reduced by one.
c
c        the second stage consists of calculating the updated
c        eigenvalues of the reduced problem.  this requires a call
c        to the zero finding routine evupd.
c
c        the final stage consists of computing the updated eigenvectors
c        directly using the updated eigenvalue.
c
c
c     the algorithm requires o(n**2) operations to update the
c     eigenvectors, but n**3 + o(n**2) to update the eigenvectors.
c
c
c     input variables...
c
c        n      the dimension of the problem.  q is n x n
c
c        q      an  n x n  matrix that contains the eigenvectors of
c               the original matrix on input and the updated
c               eigenvectors on output.
c
c        d      a vector of length n. the original eigenvalues are
c               contained in d on input.  the updated eigenvectors
c               are contained on output.
c
c        rho    a scalar
c
c        z      a vector of length n.  on input this vector
c               containes the updating vector.  the contents of z
c               are destroyed during the updating process.
c
c        x      a working array of length n.
c
c        dlamda a working array of length n
c
c        q2     a working array of dimension  n x n
c
c        delta  a working array of length n
c
c        w      a working array of length n.
c
c        indx   an integer array of length n.
c
c        info   this integer variable indicates failure of the
c               updating process with value -1,  and success
c               with value info .ge. 0. if info .gt. 0 the number
c               of deflations that occurred during the last step
c               is reported.
c
c     called subroutines...
c
c        evupd   a subroutine for calculating the updated eigenvalues.
c        rotate  a subroutine for rotating components of z to zero.
c        enorm   a function for calculating the 2-norm of a vector.
c        epslon  a function for calculating the machine round off level.
c
c
c***********************************************************************
c
c     declare shared variables
c
      integer ldq,n,info,indx(n),indxp(n)
      double precision q(ldq,n),d(n),z(n),x(n),dlamda(n),q2(ldq,n),
     $                 delta(n),w(n)
c
c     declare local variables
c
      logical rhoge0
      integer i,j,nm1,inx,k,im1,k2,icount,jm1,jp,ii,i1,jj,j3,jjpp,ifail
      double precision rho,eps,zero,one,two,s,t,evsprd,dlam,dmax
      double precision enorm,epslon
c
c
c     eps is machine precision
c
      eps = epslon(1.0d0)      
      zero = 0.0d0
      info = 0
      ifail = 0
      one = 1.0d0
      two = 2.0d0
      rhoge0 = (rho .ge. zero)
c
c     compute q(transpose)*z
c
      do 200 i = 1,n
         s = zero
         do 100 j = 1,n
            s = s + q(j,i)*z(j)
            q2(j,i) = q(j,i)
  100    continue
         w(i) = s
  200 continue
c
c     normalize z so that norm(z) = 1
c
      t = enorm(n,w)
      do 300 j = 1,n
         z(j) = w(j)/t
         indx(j) = j
  300 continue
      rho = rho*t*t
      if (rho .eq. zero) return
c
c     order the eigenvalues
c
      nm1 = n - 1
      do 600 j = 1,nm1
         t = d(1)
         s = z(1)
         inx = indx(1)
         k = n - j + 1
         do 500 i = 2,k
            im1 = i - 1
            if (d(i) .lt. t) go to 400
               t = d(i)
               s = z(i)
               inx = indx(i)
               go to 500
  400       continue
            d(im1) = d(i)
            d(i) = t
            z(im1) = z(i)
            z(i) = s
            indx(im1) = indx(i)
            indx(i) = inx
  500    continue
  600 continue
c
c     calculate the spread in eigenvalues
c
         evsprd = dabs(d(1)-d(n))
c
c     if there multiple eigenvalues then the problem deflates.
c     here the number of equal eigenvalues are found.
c     then an elementary reflector is computed to rotate the
c     corresponding eigensubspace so that certain components of
c     z are zero in this new basis.
c
      if (rhoge0) 
     *then
         j = 1
         dlam = d(1)
         k = 0
         k2 = n + 1
         t = z(1)**2
         icount = 1
  700    continue
            jm1 = j 
            j = j + 1
            if (j .gt. n) go to 800
            if(dabs(d(j)-dlam) .le. eps*evsprd) 
     *      then
               t = t + z(j)**2
               icount = icount + 1
               k2 = k2 - 1
               indxp(k2) = jm1
            else
               dmax = dmax1(dabs(d(1)),dabs(d(n)))
               if (dabs(rho)*dsqrt(t) .gt. n*dmax*eps) 
     *         then
                  k = k + 1
                  dlamda(k) = d(jm1)
                  indxp(k) = jm1
                  if (icount .gt. 1) 
     *             call rotate(icount,indx,jm1,n,ldq,q,z,rhoge0)
                  w(k) = t        
               else
                  k2 = k2 - 1
                  indxp(k2) = jm1
               endif
               dlam = d(j)
               icount = 1
               t = z(j)**2
            endif 
            go to 700
  800    continue
         dmax = dmax1(dabs(d(1)),dabs(d(n)))
         if (dabs(rho)*dsqrt(t) .gt. n*dmax*eps) 
     *   then
            k = k + 1
            w(k) = t        
            dlamda(k) = d(jm1)
            indxp(k) = jm1
            if (icount .gt. 1) 
     *       call rotate(icount,indx,jm1,n,ldq,q,z,rhoge0)
         else
            k2 = k2 - 1
            indxp(k2) = jm1
         endif
      else
         jm1 = n
         dlam = d(n)
         k = n + 1
         k2 = 0
         t = z(n)**2
         icount = 1
  701    continue
            j   = jm1
            jm1 = j - 1
            if (jm1 .lt. 1) go to 801
            if(dabs(d(jm1)-dlam) .le. eps*evsprd) 
     *      then
               t = t + z(jm1)**2
               icount = icount + 1
               k2 = k2 + 1
               indxp(n-k2+1) = j
            else
               dmax = dmax1(dabs(d(1)),dabs(d(n)))
               if (dabs(rho)*dsqrt(t) .gt. n*dmax*eps) 
     *         then
                  k = k - 1
                  w(n-k+1) = t        
                  dlamda(n-k+1) = -d(j)
                  indxp(n-k+1) = j
                  jp = jm1 + icount
                  if (icount .gt. 1) 
     *            call rotate(icount,indx,jp,n,ldq,q,z,rhoge0)
               else     
                  k2 = k2 + 1
                  indxp(n-k2+1) = j
               endif
               dlam = d(jm1)
               icount = 1
               t = z(jm1)**2
            endif 
            go to 701
  801       continue
            dmax = dmax1(dabs(d(1)),dabs(d(n)))
            if (dabs(rho)*dsqrt(t) .gt. n*dmax*eps) 
     *      then
               k = k - 1
               w(n-k+1) = t        
               dlamda(n-k+1) = -d(j)
               indxp(n-k+1) = j
               jp = jm1 + icount
               if (icount .gt. 1) 
     *          call rotate(icount,indx,jp,n,ldq,q,z,rhoge0)
            else     
               k2 = k2 + 1
               indxp(n-k2+1) = j
            endif
            k = n - k + 1
            rho = -rho
      endif
c
c
c     compute the updated eigenvalues of the deflated problem
c
c
      j = 0
      do 2700 ii = 1,k
         j = j + 1
         i = ii
         if (.not. rhoge0) i = k - i + 1
         call evupd(k,i,dlamda,w,delta,rho,dlam,ifail)
c
c     if the zero finder failed the computation is terminated
c
         info = -1
         if (ifail .eq. 1) return
         info = 0
c
         if (rhoge0) 
     *   then
           jp = indxp(j)
         else 
           dlam = -dlam
           jp = indxp(k-j+1)
         endif
c
         d(jp) = dlam
c
c     compute the updated eigenvectors
c
            do 2200 i1 = 1,n
               x(i1) = zero
 2200       continue
         do 2500 jj = 1,k
            j3 = jj
            if (.not. rhoge0) j3 = k - jj + 1
               jp = indxp(j3)
               jjpp = indx(jp)
               s = z(jp)/delta(j3)
               do 2300 i1 = 1,n
                  x(i1) = x(i1) + q(i1,jjpp)*s
 2300          continue
 2500    continue
         t =  enorm(n,x)
         j3 = j
         if (.not. rhoge0) j3 = k - j + 1
         jp = indxp(j3)
         do 2600 i1 = 1,n
            q2(i1,jp) = x(i1)/t
 2600    continue
 2700 continue
c
c     store the updated eigenvectors back into q
c
      do 2780 j = k+1,n
         jp = indxp(j)
         jjpp = indx(jp)
         do 2740 i = 1,n
            q2(i,jp) = q(i,jjpp)
 2740    continue
 2780 continue
      do 2900 j = 1,n
         do 2800 i = 1,n
            q(i,j) = q2(i,j)
 2800    continue
 2900 continue
c
c     record the number of deflations in info
c
      info = n-k
c
      return
c
c     last card of sesupd
c
      end
      subroutine tql2(nm,n,d,e,z,ierr)
c
      integer i,j,k,l,m,n,ii,l1,l2,nm,mml,ierr
      double precision d(n),e(n),z(nm,n)
      double precision c,c2,c3,dl1,el1,f,g,h,p,r,s,s2,tst1,tst2,pythag
c
c     this subroutine is a translation of the algol procedure tql2,
c     num. math. 11, 293-306(1968) by bowdler, martin, reinsch, and
c     wilkinson.
c     handbook for auto. comp., vol.ii-linear algebra, 227-240(1971).
c
c     this subroutine finds the eigenvalues and eigenvectors
c     of a symmetric tridiagonal matrix by the ql method.
c     the eigenvectors of a full symmetric matrix can also
c     be found if  tred2  has been used to reduce this
c     full matrix to tridiagonal form.
c
c     on input
c
c        nm must be set to the row dimension of two-dimensional
c          array parameters as declared in the calling program
c          dimension statement.
c
c        n is the order of the matrix.
c
c        d contains the diagonal elements of the input matrix.
c
c        e contains the subdiagonal elements of the input matrix
c          in its last n-1 positions.  e(1) is arbitrary.
c
c        z contains the transformation matrix produced in the
c          reduction by  tred2, if performed.  if the eigenvectors
c          of the tridiagonal matrix are desired, z must contain
c          the identity matrix.
c
c      on output
c
c        d contains the eigenvalues in ascending order.  if an
c          error exit is made, the eigenvalues are correct but
c          unordered for indices 1,2,...,ierr-1.
c
c        e has been destroyed.
c
c        z contains orthonormal eigenvectors of the symmetric
c          tridiagonal (or full) matrix.  if an error exit is made,
c          z contains the eigenvectors associated with the stored
c          eigenvalues.
c
c        ierr is set to
c          zero       for normal return,
c          j          if the j-th eigenvalue has not been
c                     determined after 30 iterations.
c
c     calls pythag for  dsqrt(a*a + b*b) .
c
c     questions and comments should be directed to burton s. garbow,
c     mathematics and computer science div, argonne national laboratory
c
c     this version dated august 1983.
c
c     ------------------------------------------------------------------
c
      ierr = 0
      if (n .eq. 1) go to 1001
c
      do 100 i = 2, n
  100 e(i-1) = e(i)
c
      f = 0.0d0
      tst1 = 0.0d0
      e(n) = 0.0d0
c
      do 240 l = 1, n
         j = 0
         h = dabs(d(l)) + dabs(e(l))
         if (tst1 .lt. h) tst1 = h
c     .......... look for small sub-diagonal element ..........
         do 110 m = l, n
            tst2 = tst1 + dabs(e(m))
            if (tst2 .eq. tst1) go to 120
c     .......... e(n) is always zero, so there is no exit
c                through the bottom of the loop ..........
  110    continue
c
  120    if (m .eq. l) go to 220
  130    if (j .eq. 30) go to 1000
         j = j + 1
c     .......... form shift ..........
         l1 = l + 1
         l2 = l1 + 1
         g = d(l)
         p = (d(l1) - g) / (2.0d0 * e(l))
         r = pythag(p,1.0d0)
         d(l) = e(l) / (p + dsign(r,p))
         d(l1) = e(l) * (p + dsign(r,p))
         dl1 = d(l1)
         h = g - d(l)
         if (l2 .gt. n) go to 145
c
         do 140 i = l2, n
  140    d(i) = d(i) - h
c
  145    f = f + h
c     .......... ql transformation ..........
         p = d(m)
         c = 1.0d0
         c2 = c
         el1 = e(l1)
         s = 0.0d0
         mml = m - l
c     .......... for i=m-1 step -1 until l do -- ..........
         do 200 ii = 1, mml
            c3 = c2
            c2 = c
            s2 = s
            i = m - ii
            g = c * e(i)
            h = c * p
            r = pythag(p,e(i))
            e(i+1) = s * r
            s = e(i) / r
            c = p / r
            p = c * d(i) - s * g
            d(i+1) = h + s * (c * g + s * d(i))
c     .......... form vector ..........
            do 180 k = 1, n
               h = z(k,i+1)
               z(k,i+1) = s * z(k,i) + c * h
               z(k,i) = c * z(k,i) - s * h
  180       continue
c
  200    continue
c
         p = -s * s2 * c3 * el1 * e(l) / dl1
         e(l) = s * p
         d(l) = c * p
         tst2 = tst1 + dabs(e(l))
         if (tst2 .gt. tst1) go to 130
  220    d(l) = d(l) + f
  240 continue
c     .......... order eigenvalues and eigenvectors ..........
      do 300 ii = 2, n
         i = ii - 1
         k = i
         p = d(i)
c
         do 260 j = ii, n
            if (d(j) .ge. p) go to 260
            k = j
            p = d(j)
  260    continue
c
         if (k .eq. i) go to 300
         d(k) = d(i)
         d(i) = p
c
         do 280 j = 1, n
            p = z(j,i)
            z(j,i) = z(j,k)
            z(j,k) = p
  280    continue
c
  300 continue
c
      go to 1001
c     .......... set error -- no convergence to an
c                eigenvalue after 30 iterations ..........
 1000 ierr = l
 1001 return
      end