program pcg3d
      implicit double precision (a-h,o-z)
      common hx(202),hy(202),resd(751),x(281000),
     1  nxp(10),nyp(10),nzp(10),ibc(6),epsp(10),nitp(10),ibcp(6,10)
      common /inde/ li(281000),liw(281000),lin(500),liw2(500)
      common /matrix/ a(-7000:281000,10)
      common /proble/ iprobl
      common /method/ imet,caa
      external dc,zc,cc,fc,mbnds,mbndw,mbndn,mbnde
********************************************************************
*  1 march 1990,  h.a. van der vorst (delft university)
* This program has been designed to demonstrate the possibilities
* for vectorization of ICCG when solving discretized elliptic pde's.
* The implementations of the algorithms are straight-forward and no
* care has been taken to check parameters or to take care of round-off
* errors.
*
* Those who are familiar with the algorithms and vectorization approaches
* described in the book by Dongarra, Duff, Sorenson and Van der Vorst
* (in particular chapter 7) will recognize these elements in the
* Fortran source. Explicit references to chapters of that book are
* made throughout the source text of this program
*
* The present version of the code contains vector and parallel comment-
* directives for the Convex C-2 series. Also the measurement of CPU-time
* and Wall Clock Time is done by Convex timing routines. The adaptation
* to other vector supercomputers is straight-forward.
*
* This program solves 3d partial differential equations, discretised
* by finite differences.
* Boundary conditions are specified as follows:
*   ibc(1)=1  dirichlet on lower y-boundary
*   ibc(1)=2  neumann      ,,,
*   ibc(2)=1  dirichlet on lower x-boundary
*   ibc(2)=2  neumann      ,,,
*   ibc(3)=1  dirichlet on upper y-boundary
*   ibc(3)=2  neumann      ,,,
*   ibc(4)=1  dirichlet on upper x-boundary
*   ibc(4)=2  neumann      ,,,
*   ibc(5)=1  dirichlet u=0 on lower z-boundary
*   ibc(5)=2  neumann du/dn=0 on    ,,,
*   ibc(6)=1  dirichlet u=0 on upper z-boundary
*   ibc(6)=2  neumann du/dn=0 on    ,,,
*
*   nxp, nyp and nzp denote the number of gridpoints
*   in the respective directions.
*   nxp and nyp should be less than 203, the product of
*   nxp, nyp and nzp should not exceed 281000. For larger
*   values the appropriate dimensions should be specified.
*
*   The discretised systems are iteratively solved by
*   the following 10 methods:
*  imet=1   standard iccg
*  imet=2   iccg, parallel in z, 2-term neumann in x,y
*  imet=3   standard miccg (alpha=1.)
*  imet=4   miccg with vectorised 2-term neumann in x,y-plane
*  imet=5   miccg with diagonal-ordering scheme in x,y-plane
*  imet=6   iccg, parallel in z, diag ordering in x,y-plane
*  imet=7   cg with diagonal scaling
*  imet=8   miccg, nested truncation in (x,y)-plane
*  imet=9   eisenstat's implementation of method 10
*  imet=10  miccg vectorized by hyperplane approach (ushiro)
*******************************************************************
      nc=281000
c initialization
      do 2 i=1,281000
 2       x(i)=0.
      do 3 j=1,9
         do 3 i=-7000,281000
 3          a(i,j)=0.
c number of gridpoints (including boundary points) in x, y, and z
      nxp(1)=60
      nyp(1)=60
      nzp(1)=60
      nxp(2)=102
      nyp(2)=102
      nzp(2)=27
      do 1 i=1,2
c epsp(i) specifies the desired error reduction for problem i
         epsp(i)=1.e-6
c nitp(i) specifies the maximum number of iteration steps
         nitp(i)=750
c boundary conditions for problem i
         ibcp(1,i)=1
         ibcp(2,i)=2
         ibcp(3,i)=2
         ibcp(4,i)=2
         ibcp(5,i)=1
         ibcp(6,i)=1
 1    continue
      ibcp(2,1)=1
      ibcp(3,1)=1
      ibcp(4,1)=1
      do 100 ip=1,2
      iprobl=ip
c  specification of x interval:  [a1,b1]
      a1=0.
      b1=1.
c  specification of y interval:  [c1,d1]
      c1=0.
      d1=1.
c  specification of z interval:  [e1,f1]
      e1=0.
      f1=1.
c  computation of the number of unknowns in each direction
      nx=nxp(ip)
      nxx=nx-2
      if (ibcp(2,ip).eq.2) nxx=nxx+1
      if (ibcp(4,ip).eq.2) nxx=nxx+1
      ny=nyp(ip)
      nyy=ny-2
      if (ibcp(1,ip).eq.2) nyy=nyy+1
      if (ibcp(3,ip).eq.2) nyy=nyy+1
      nz=nzp(ip)-2
      if (ibcp(5,ip).eq.2) nz=nz+1
      if (ibcp(6,ip).eq.2) nz=nz+1
      nxny=nx*ny
      nxnynz=nz*nxny
c  specification of the mesh sizes
      do 10 i=1,nx
 10   hx(i)=(b1-a1)/float(nx-1)
      do 20 i=1,ny
 20   hy(i)=(d1-c1)/float(ny-1)
      hzz=(f1-e1)/(nzp(ip)-1.)
      ibc(1)=ibcp(1,ip)
      ibc(2)=ibcp(2,ip)
      ibc(3)=ibcp(3,ip)
      ibc(4)=ibcp(4,ip)
      ibc(5)=ibcp(5,ip)
      ibc(6)=ibcp(6,ip)
      eps=epsp(ip)
      nit=nitp(ip)
      if (ip.eq.1) print 1001
 1001 format(/////'  problem nr 1: '/
     1  '      poisson equation with dirichlet bound. cond.:'/
     2  '      in x and y direction sinus values,'/
     3  '      in z-direction u=0.'/
     4  '      right-hand side: 0. ')
      if (ip.eq.2) print 1002,ip
 1002 format(/////'  problem nr',i2,':'/
     1  '      inner cubic part (.25,.75)x'/
     2  '      (.25,.75)x(.25,.75) with diffusion 100., outer'/
     3  '      region with diffusion 1., linear term=0.'/
     4  '      right-hand side inner: 100, outer: 0'/
     5  '      south dirichlet=1.0, others neumann.'/
     6  '      in z-direction dirichlet: u=0.')
      print 1000,nxx,nyy,nz
 1000 format('  number of unknowns in the x-direction',i4,/
     1  '  number of unknowns in the y-direction',i4,/
     2  '  number of unknowns in the z-direction',i4)
c
c  driver routine for creation of the linear system and its solution
c
      call epsd3d(a1,b1,nx,hx,c1,d1,ny,hy,e1,f1,nz,hzz,nxny,nxnynz,
     1   dc,dc,zc,cc,fc,ibc,mbnds,mbndw,mbndn,mbnde,x,
     2   nit,eps,ierr,resd)
 100  continue
      stop
      end
      subroutine epsd3d(xl,xu,nx,hx,yl,yu,ny,hy,zl,zu,nz,hzz,nxny,n,
     1   dc,ec,zc,cc,fc,ibc,mbnds,mbndw,mbndn,mbnde,
     2   x,nitc,epsc,ier,resd)
      implicit double precision (a-h,o-z)
      dimension resd(1),x(*),ibc(6),hx(1),hy(1)
      common /matrix/ a(-7000:281000,9)
      common /inde/ li(281000),liw1(281000),lin(500),liw2(500)
      common /method/ imet,ca
      real mflops
      external dc,ec,zc,cc,fc,mbnds,mbndw,mbndn,mbnde
      do 1000 ik=1,10
      imet=ik
      nit=nitc
      eps=epsc
      nxc=nx
      nyc=ny
      nzc=nz
      nxnyc=nxny
      nnc=n
c  initialization
      do 3 i=1,281000
 3       x(i)=0.
      do 4 j=1,9
         do 4 i=-7000,281000
 4          a(i,j)=0.
      time=wall(t)
c
c  construction of linear system. The system has a nonzero structure
c  similar to the structure in the 2D case (as in Chapter 7.2.1). The
c  nonzero diagonals of the upper triangular part of A are given by
c  a(i,1) (main diagonal), a(i,2), a(i,3) and a(i,4). The right-hand
c  side is delivered in a(i,5).
c
      call pdedi3(nxc,nyc,nzc,nxnyc,nnc,xl,xu,yl,yu,zl,zu,a(1,1),a(1,2),
     1   a(1,3),a(1,4),a(1,5),hx,hy,hzz,ibc,dc,ec,zc,cc,fc,mbnds,
     2   mbndw,mbndn,mbnde)
      time=wall(t)-time
      if (imet.ge.2) goto 2
      print 10,time
 2    continue
c
c  construction of various incomplete decompositions of A (of the form
c  as in Chapter 7.3. The inverse of the diagonal D is delivered in a(i,6).
c
      time=wall(t)
      if (imet.eq.1) then
         ca=0.
         call minch0(a(1,6),nxc,nxnyc,nnc)
      endif
      if (imet.eq.4.or.imet.eq.5.or.imet.eq.8.or.imet.eq.9
     1             .or.imet.eq.10) then
         ca=.95
         call minch0(a(1,6),nxc,nxnyc,nnc)
      endif
      if (imet.eq.2.or.imet.eq.6)
     1   call inc3p(nzc,nyc,nxc,a(1,1),a(1,2),a(1,3),a(1,4),a(1,6))
      if (imet.eq.3) then
         ca=1.
         call minch0(a(1,6),nxc,nxnyc,nnc)
      endif
      if (imet.eq.7) then
          do 35 i=1,nnc
 35         a(i,6)=1./a(i,1)
      endif
      if (imet.ge.9) call indeks(nxc,nyc,nzc,nnc,m1,m2,li,lin,
     1     liw1,liw2,n0)
      tima=wall(t)-time
      if (imet.eq.1) print 40
 40   format(///,'  standard iccg with recursions ')
      if (imet.eq.2) print 50
 50   format(///'  iccg, parallel in z, neumann in x,y ')
      if (imet.eq.3) print 60
 60   format(///'  standard miccg with recursions  ')
      if (imet.eq.4) print 70,ca
 70   format(///'  miccg(0) with neumann-series, alfa=',f8.4)
      if (imet.eq.5) print 80,ca
 80   format(///'  miccg(0), diagonal-ordered prec alfa=',f8.4)
      if (imet.eq.6) print 90
 90   format(///'  iccg, parallel in z, diag ord. in x,y  ')
      if (imet.eq.7) print 95
 95   format(///'  cg with diagonal scaling ')
      if (imet.eq.8) print 96,ca
 96   format(///'  miccg, nested truncation in x,y-plane, alfa= ',f8.4)
      if (imet.eq.9) print 97
 97   format(///'  miccg(0), hyperplane-ordering+eisenstat trick ')
      if (imet.eq.10) print 98,ca
 98   format(///'  miccg(0) with hyperplanes method, alfa= ',f8.4)
      print 20,tima
      do 99 i=1,nnc
 99      a(i,10)=sqrt(a(i,6))
      time=wall(t)
      tijd=second(t)
c
c  scaling of the system by the diagonal of the inc. dec.
c  this has the effect that the inc. dec. of the scaled system
c  has a unit diagonal. See chapter 7.3.1
c
      do 100 i=1,nnc
      a(i,1)=a(i,1)*a(i,6)
      a(i,6)=sqrt(a(i,6))
 100  a(i,5)=a(i,5)*a(i,6)
      do 200 i=2,nnc
 200  a(i-1,2)=a(i-1,2)*a(i,6)*a(i-1,6)
      nxc1=nxc+1
      do 300 i=nxc1,nnc
 300  a(i-nxc,3)=a(i-nxc,3)*a(i,6)*a(i-nxc,6)
      nxnyc1=nxnyc+1
      do 350 i=nxnyc1,nnc
 350  a(i-nxnyc,4)=a(i-nxnyc,4)*a(i,6)*a(i-nxnyc,6)
      if (imet.eq.9) then
         do 360 i=1,nnc
            a(i,1)=2.-a(i,1)
 360     continue
      endif
      if (imet.eq.10) then
      call iccgu(nxc,nyc,nzc,a(1,5),x,a(1,7),a(1,8),a(1,9),eps,
     1   nit,resd)
      else
      call iccg3d(nnc,a(1,5),x(1),a(1,6),nxc,nyc,nzc,nit,ier,
     1   eps,resd,a(1,7),a(1,8),a(1,9))
      endif
c  back scaling of the solution
      do 400 i=1,nnc
 400  x(i)=x(i)*a(i,10)
      time=wall(t)-time
      tijd=second(t)-tijd
c  nflops gives the number of flops per method
      nflops=((nit+1)*35+14)*nnc
      if (imet.eq.9) nflops=((nit+1)*24+14)*nnc
      if (imet.eq.4.or.imet.eq.2)
     1 nflops=nflops+4*(nit+1)*nnc
      if (imet.eq.8) nflops=nflops+12*(nit+1)*nnc
      if (imet.eq.7) nflops=((nit+1)*23+14)*nnc
      mflops=nflops*1.e-6/time
      print 30,time,mflops,nit
      mflops=nflops*1.e-6/tijd
      print 303,tijd,mflops,nit
      print 510
 510  format('  first 5 residuals: ')
      print 1505,(i-1,resd(i),i=1,5)
      print 520
 520  format('  last 5 residuals: ')
      nitp4=nit-3
      nitp5=nit+1
      print 1505,(i-1,resd(i),i=nitp4,nitp5)
 1505 format(5('  ',i4,3x,e10.3))
 999  continue
 1000 continue
 10   format(/'   time for discretisation    ',f8.3)
 20   format('   time for inc. dec.       ',f8.3)
 30   format(/'  wct time for iccg/inc.dec.   ',f10.3,
     1       '  mflops=',f8.3,'  nit= ',i5)
 303  format('  cpu time for iccg//inc.dec.  ',f10.3,
     1       '  mflops=',f8.3,'  nit= ',i5)
      return
      end
      subroutine iccg3d(n,b,x,diag,nx,ny,nz,nit,ierr,eps,res,
     1    r,p,ap)
      implicit double precision (a-h,o-z)
      dimension b(1),x(1),r(1),p(1),ap(1),res(1),
     1  diag(1)
      common /matrix/ a(-7000:281000,9)
      common /method/ imet
c  this routine does a number of iccg iteration steps
      nxny=nx*ny
      epe=eps*eps
      if (imet.eq.9) call eislow(b,n,nx,ny,nz)
      do 10 i=1,n
      x(i)=0.
 10   r(i)=b(i)
      kk=0
      if (imet.eq.1.or.imet.eq.3)
     1   call prec3s(r,p,n,nxny,nx)
      if (imet.eq.2.or.imet.eq.6)
     1   call prec3p(r,p,n,nx,ny,nz,a(1,1),a(1,2),a(1,3),a(1,4))
      if (imet.eq.4.or.imet.eq.5.or.imet.eq.8)
     1  call prec3d(r,p,n,nx,ny,nz)
      if (imet.eq.7.or.imet.eq.9) then
         do 15 i=1,n
 15         p(i)=r(i)
      endif
      rkr=0.
      do 16 i=1,n
 16      rkr=rkr+r(i)*p(i)
      if (rkr.gt.0.) goto 20
      ierr=2
      return
 20   rrz=rkr
      res(1)=sqrt(rkr)
      nn=nit
      do 130 kk=1,nn
c  for Eisenstat's implementation, see Chapter 7.3.1
      if (imet.eq.9) then
         call eisup(p(1),diag(1),n,nx,ny,nz)
         do 85 i=1,n
 85      ap(i)=p(i)-a(i,1)*diag(i)
         call eislow(ap(1),n,nx,ny,nz)
         do 86 i=1,n
 86      ap(i)=ap(i)+diag(i)
         pap=ddot(n,p,1,ap,1)
      else
         call a3x(p,ap,n,nx,ny)
         pap=ddot(n,p,1,ap,1)
      endif
      if (pap.gt.0.) goto 90
      ierr=1
      return
 90   alfa=rkr/pap
      if (imet.eq.9) then
         do 95 i=1,n
            x(i)=x(i)+alfa*diag(i)
 95         r(i)=r(i)-alfa*ap(i)
      else
         call upd(n,alfa,x,p,r,ap)
      endif
      if (imet.eq.1.or.imet.eq.3)
     1 call prec3s(r,ap,n,nxny,nx)
      if (imet.eq.2.or.imet.eq.6)
     1 call prec3p(r,ap,n,nx,ny,nz,a(1,1),a(1,2),a(1,3),a(1,4))
      if (imet.eq.4.or.imet.eq.5.or.imet.eq.8)
     1  call prec3d(r,ap,n,nx,ny,nz)
      if (imet.eq.7.or.imet.eq.9) then
         rkrn=0.
         do 96 i=1,n
 96         rkrn=rkrn+r(i)*r(i)
      else
         rkrn=0.
         do 97 i=1,n
 97         rkrn=rkrn+r(i)*ap(i)
      endif
      res(kk+1)=sqrt(rkrn)
      if (rkrn.lt.epe*rrz) nit=kk
      if (kk.eq.nit) return
      if (rkrn.gt.0.) goto 110
      ierr=2
      return
 110  beta=rkrn/rkr
      rkr=rkrn
      if (imet.eq.7.or.imet.eq.9) then
         do 115 i=1,n
 115        p(i)=r(i)+beta*p(i)
      else
         do 120 i=1,n
 120        p(i)=ap(i)+beta*p(i)
      endif
 130  continue
      return
      end
      subroutine upd(n,alfa,x,p,r,ap)
      implicit double precision (a-h,o-z)
      dimension x(*),p(*),r(*),ap(*)
c parallel updating of x and r in iccg process
c$dir begin_tasks
      call daxpy(n,alfa,p,1,x,1)
c$dir next_task
      call daxpy(n,-alfa,ap,1,r,1)
c$dir end_tasks
      return
      end
      subroutine a3x(x, b, n, nx, ny)
      implicit double precision (a-h,o-z)
      dimension x(*), b(*)
      common /matrix/ a(-7000:281000,9)
*
*   matrix multiplication for 7-point difference scheme.
*   for 3D problems
*
      nxny=nx*ny
      b(1)=a(1,1)*x(1)+a(1,2)*x(2)+a(1,3)*x(nx+1)+
     1      a(1,4)*x(1+nxny)
      do 10 i=2,nx
 10      b(i)=a(i-1,2)*x(i-1)+a(i,1)*x(i)+a(i,2)*x(i+1)+
     1         a(i,3)*x(i+nx)+a(i,4)*x(i+nxny)
      do 20 i=nx+1,nxny
 20      b(i)=a(i-nx,3)*x(i-nx)+a(i-1,2)*x(i-1)+a(i,1)*x(i)+
     1      a(i,2)*x(i+1)+a(i,3)*x(i+nx)+a(i,4)*x(i+nxny)
      do 30 i=nxny+1,n-nxny
         b(i)=a(i-nxny,4)*x(i-nxny)+a(i-nx,3)*x(i-nx)+
     1        a(i-1,2)*x(i-1)+a(i,1)*x(i)+a(i,2)*x(i+1)+
     2        a(i,3)*x(i+nx)+a(i,4)*x(i+nxny)
 30   continue
      do 40 i=n-nxny+1,n-nx
 40      b(i)=a(i-nxny,4)*x(i-nxny)+a(i-nx,3)*x(i-nx)+a(i-1,2)*x(i-1)
     1   +a(i,1)*x(i)+a(i,2)*x(i+1)+a(i,3)*x(i+nx)
      do 50 i=n-nx+1,n-1
 50      b(i)=a(i-nxny,4)*x(i-nxny)+a(i-nx,3)*x(i-nx)+a(i-1,2)*x(i-1)
     1      +a(i,1)*x(i)+a(i,2)*x(i+1)
      b(n)=a(n-nxny,4)*x(n-nxny)+a(n-nx,3)*x(n-nx)+a(n-1,2)*x(n-1)+
     1      a(n,1)*x(n)
      return
      end
      subroutine minch0(d,nx,nxny,n)
      implicit double precision (a-h,o-z)
      dimension d(n)
      common /matrix/ a(-7000:281000,9)
      common /method/ imet,t
*  modified iccg(0), 3d-version with relaxation parameter t,
*  in d the elements of the inverse of D are delivered (Chapter 7.3)
      d(1)=1./a(1,1)
      do 10 i=2,nx
 10   d(i)=1./(a(i,1)-a(i-1,2)*(a(i-1,2)+t*a(i-1,3)+t*a(i-1,4))*d(i-1))
      nx1=nx+1
      do 20 i=nx1,nxny
      inx=i-nx
 20   d(i)=1./(a(i,1)-a(i-1,2)*(a(i-1,2)+t*a(i-1,3)+t*a(i-1,4))*d(i-1)
     1        -a(inx,3)*(t*a(inx,4)+a(inx,3)+t*a(inx,2))*d(inx))
      nxny1=nxny+1
      do 30 i=nxny1,n
      inx=i-nx
      imx=i-nxny
 30   d(i)=1./(a(i,1)-a(i-1,2)*(a(i-1,2)+t*a(i-1,3)+t*a(i-1,4))*d(i-1)
     1        -a(inx,3)*(t*a(inx,2)+a(inx,3)+t*a(inx,4))*d(inx)
     2        -a(imx,4)*(t*a(imx,2)+t*a(imx,3)+a(imx,4))*d(imx))
      return
      end
      subroutine inc3p(nz,ny,nx,a,b,c,d,di)
      implicit double precision (a-h,o-z)
      dimension a(*),b(*),c(*),d(*),di(*)
*  incomplete decomposition for 3-dim case
*  nz planes, ny lines, nx points.
*  parallel splitting in plane direction assumed
*  inverse diag of inc. parallel dec. is delivered in di(*)
*  see Chapter 7.3.6
      nz2=nz/2
      nxny=nx*ny
      do 10 j=1,nxny
 10   di(j)=a(j)
      call inc2ps(ny,nx,di,b,c,di,0)
      iprev=(nz-1)*nxny
      il=iprev+1
      iu=iprev+nxny
      do 20 j=il,iu
 20   di(j)=a(j)
      call inc2ps(ny,nx,di,b,c,di,iprev)
      do 40 k=2,nz2
         iprev=(k-1)*nxny
         il=iprev+1
         iu=iprev+nxny
c$dir no_recurrence
         do 30 j=il,iu
 30         di(j)=a(j)-d(j-nxny)**2*di(j-nxny)
         call inc2ps(ny,nx,di,b,c,di,iprev)
 40   continue
      nz22=nz2+2
      do 60 k=nz-1,nz22,-1
         iprev=(k-1)*nxny
         il=iprev+1
         iu=iprev+nxny
c$dir no_recurrence
         do 50 j=il,iu
 50         di(j)=a(j)-d(j)**2*di(j+nxny)
         call inc2ps(ny,nx,di,b,c,di,iprev)
 60   continue
      iprev=nz2*nxny
      il=iprev+1
      iu=iprev+nxny
c$dir no_recurrence
      do 70 j=il,iu
 70      di(j)=a(j)-d(j-nxny)**2*di(j-nxny)-d(j)**2*di(j+nxny)
      call inc2ps(ny,nx,di,b,c,di,iprev)
      return
      end
      subroutine inc2ps(ny,nx,a,b,c,di,iprev)
      implicit double precision (a-h,o-z)
      dimension a(*),b(*),c(*),di(*)
      j=iprev+1
      di(j)=1./a(j)
      do 10 j=iprev+2,iprev+nx
 10     di(j)=1./(a(j)-b(j-1)**2*di(j-1))
      m1=iprev+nx+1
      m2=iprev+nx*ny
      do 20 j=m1,m2
 20     di(j)=1./(a(j)-b(j-1)**2*di(j-1)-c(j-nx)**2*di(j-nx))
      return
      end
      subroutine prec3s(x,px,n,nxny,nx)
      implicit double precision (a-h,o-z)
      dimension x(n),px(n)
      common /matrix/ a(-7000:281000,9)
*   standard preconditioning iccg(0)  .
      px(1) = x(1)
      do 10 i=2,nx
         px(i) = x(i)-a(i-1,2)*px(i-1)
   10 continue
      nx1 = nx + 1
      do 20 i=nx1,nxny
         px(i)=x(i)-a(i-1,2)*px(i-1)-a(i-nx,3)*px(i-nx)
   20 continue
      nxny1=nxny+1
      do 25 i=nxny1,n
         px(i)=x(i)-a(i-1,2)*px(i-1)-a(i-nx,3)*px(i-nx)-
     1          a(i-nxny,4)*px(i-nxny)
 25   continue
*
*  lower triangular part has been solved
*
      ij = n - 1
      do 30 i=2,nx
         px(ij) = px(ij) - a(ij,2)*px(ij+1)
         ij = ij - 1
   30 continue
      do 40 i=nx1,nxny
         px(ij)=px(ij)-(a(ij,2)*px(ij+1)+a(ij,3)*px(ij+nx))
         ij = ij - 1
   40 continue
      do 50 i=nxny1,n
         px(ij)=px(ij)-(a(ij,2)*px(ij+1)+a(ij,3)*px(ij+nx)+
     1           a(ij,4)*px(ij+nxny))
         ij=ij-1
 50   continue
*
*  diagonal and uppertriangular part has been solved
*
      return
      end
      subroutine prec3d(x,px,n,nx,ny,nz)
      implicit double precision (a-h,o-z)
      dimension x(n),px(n)
      common /matrix/ a(-7000:281000,9)
      logical left
      common /method/ imet
c  preconditioning for iccg
c  imet.eq.4: truncated neumann series in (x,y)-plane (Chapter 7.3.5)
c  imet.eq.5: vectorization by diagonalwise ordering of computations
c             in (x,y) plane (Chapter 7.3.4)
c  imet.eq.8: nested truncated neumann series (Chapter 7.3.5)
      nxny=nx*ny
      left=.true.
      do 10 i=1,nxny
 10      px(i)=x(i)
      if (imet.eq.4)
     1   call precv(px(1),nxny,a(1,2),a(1,3),nx,left)
      if (imet.eq.5)
     1   call precd(px(1),nxny,a(1,2),a(1,3),nx,left)
         if (imet.eq.8)
     1      call precvv(px(1),nxny,a(1,2),a(1,3),nx,left)
      do 30 j=2,nz
         jl=(j-1)*nxny+1
         ju=j*nxny
c$dir no_recurrence
         do 20 i=jl,ju
 20         px(i)=x(i)-a(i-nxny,4)*px(i-nxny)
         if (imet.eq.4)
     1      call precv(px(jl),nxny,a(jl,2),a(jl,3),nx,left)
         if (imet.eq.5)
     1      call precd(px(jl),nxny,a(jl,2),a(jl,3),nx,left)
         if (imet.eq.8)
     1      call precvv(px(jl),nxny,a(jl,2),a(jl,3),nx,left)
 30   continue
      left=.false.
      jl=(nz-1)*nxny+1
      if (imet.eq.4)
     1   call precv(px(jl),nxny,a(jl,2),a(jl,3),nx,left)
      if (imet.eq.5)
     1   call precd(px(jl),nxny,a(jl,2),a(jl,3),nx,left)
         if (imet.eq.8)
     1      call precvv(px(jl),nxny,a(jl,2),a(jl,3),nx,left)
      do 50 j=nz-1,1,-1
         jl=(j-1)*nxny+1
         ju=j*nxny
c$dir no_recurrence
         do 40 i=jl,ju
 40         px(i)=px(i)-a(i,4)*px(i+nxny)
         if (imet.eq.4)
     1      call precv(px(jl),nxny,a(jl,2),a(jl,3),nx,left)
         if (imet.eq.5)
     1      call precd(px(jl),nxny,a(jl,2),a(jl,3),nx,left)
         if (imet.eq.8)
     1      call precvv(px(jl),nxny,a(jl,2),a(jl,3),nx,left)
 50   continue
      return
      end
      subroutine precd(px,nxny,a2,a3,nx,left)
      implicit double precision (a-h,o-z)
      dimension px(*),a2(*),a3(*)
      logical left
* diagonalwise computation in preconditioning over x,y-plane
* (Chapter 7.3.4)
      ny=nxny/nx
      if (.not.left) goto 15
      do 10 ii=2,nx+ny-1
         jjmin=max(1,ii-nx+1)
         jjmax=min(ii,ny)
         is=(jjmin-1)*(nx-1)+ii
c$dir no_recurrence
         do 5 j=jjmin,jjmax
         px(is)=px(is)-a2(is-1)*px(is-1)-
     1                     a3(is-nx)*px(is-nx)
 5       is=is+nx-1
10    continue
      return
15    continue
      do 30 ii=nx+ny-1,2,-1
         jjmin=max(1,ii-nx+1)
         jjmax=min(ii,ny)
         is=(jjmin-1)*(nx-1)+ii
c$dir no_recurrence
         do 20 j=jjmin,jjmax
         px(is)=px(is)-a2(is)*px(is+1)-
     1                     a3(is)*px(is+nx)
 20   is=is+nx-1
 30   continue
      px(1)=px(1)-a2(1)*px(2)-a3(1)*px(nx+1)
      return
      end
      subroutine precv(px,nxny,a2,a3,nx,left)
      implicit double precision (a-h,o-z)
      dimension px(nxny),a2(nxny),a3(nxny)
      logical left
* two term truncated Neumann series preconditioner (Chapter 7.3.5)
      ny=nxny/nx
      if (.not.left) goto 33
      px(nx+1)=px(nx+1)-a3(1)*px(1)
      px(2)=px(2)-a2(1)*px(1)
      px(nx+2)=px(nx+2)-a3(2)*px(2)
c$dir no_recurrence
      do 10 i=nx,3,-1
         px(i)=px(i)-a2(i-1)*(px(i-1)-a2(i-2)*px(i-2))
 10      px(nx+i)=px(nx+i)-a3(i)*px(i)
      il=1
      iu=nx
      do 30 j=3,ny
         il=il+nx
         iu=iu+nx
         il1=il+1
c$dir no_recurrence
         do 25 i=iu,il1,-1
            px(i)=px(i)-a2(i-1)*(px(i-1)-a2(i-2)*px(i-2))
 25         px(i+nx)=px(i+nx)-a3(i)*px(i)
         px(nx+il)=px(nx+il)-a3(il)*px(il)
*  assuming a2(il-1)=0.
 30   continue
      il1=il+nx+1
c$dir no_recurrence
      do 31 i=nxny,il1,-1
 31      px(i)=px(i)-a2(i-1)*(px(i-1)-a2(i-2)*px(i-2))
      return
 33   continue
      il=nxny-nx+1
      iu=nxny
      iu2=iu-2
c$dir no_recurrence
      do 35 i=il,iu2
         px(i)=px(i)-a2(i)*(px(i+1)-a2(i+1)*px(i+2))
 35      px(i-nx)=px(i-nx)-a3(i-nx)*px(i)
      px(iu-1)=px(iu-1)-a2(iu-1)*px(iu)
      px(iu-1-nx)=px(iu-1-nx)-a3(iu-nx-1)*px(iu-1)
      px(iu-nx)=px(iu-nx)-a3(iu-nx)*px(iu)
      do 50 j=3,ny
         il=il-nx
         iu=iu-nx
         iu1=iu-1
c$dir no_recurrence
         do 45 i=il,iu1
            px(i)=px(i)-a2(i)*(px(i+1)-a2(i+1)*px(i+2))
 45         px(i-nx)=px(i-nx)-a3(i-nx)*px(i)
         px(iu-nx)=px(iu-nx)-a3(iu-nx)*px(iu)
*  assuming a2(iu)=0.
 50   continue
      iu=nx-1
c$dir no_recurrence
      do 51 i=1,iu
 51      px(i)=px(i)-a2(i)*(px(i+1)-a2(i+1)*px(i+2))
      return
      end
      subroutine precvv(px,nxny,a2,a3,nx,left)
      implicit double precision (a-h,o-z)
      dimension px(nxny),a2(nxny),a3(nxny)
      logical left
*   nested truncated Neumann series (Chapter 7.3.5)
      if (.not.left) goto 30
c$dir no_recurrence
      do 10 j=nxny,nx*2+1,-1
         px(j)=px(j)-a3(j-nx)*(px(j-nx)-a2(j-nx-1)*px(j-nx-1)-
     1      a3(j-2*nx)*px(j-2*nx))
 10   continue
c$dir no_recurrence
      do 11 j=2*nx,nx+2,-1
 11      px(j)=px(j)-a3(j-nx)*(px(j-nx)-a2(j-nx-1)*px(j-nx-1))
      j=nx+1
      px(j)=px(j)-a3(j-nx)*px(j-nx)
      do 20 j=nxny,3,-1
 20      px(j)=px(j)-a2(j-1)*(px(j-1)-a2(j-2)*px(j-2))
      px(2)=px(2)-a2(1)*px(1)
      return
 30   continue
c$dir no_recurrence
      do 40 j=1,nxny-2*nx
         px(j)=px(j)-a3(j)*(px(j+nx)-a2(j+nx)*px(j+nx+1)-
     1   a3(j+nx)*px(j+2*nx))
 40   continue
c$dir no_recurrence
      do 41 j=nxny-2*nx+1,nxny-nx-1
 41      px(j)=px(j)-a3(j)*(px(j+nx)-a2(j+nx)*px(j+nx+1))
      j=nxny-nx
      px(j)=px(j)-a3(j)*px(j+nx)
      do 50 j=1,nxny-2
 50      px(j)=px(j)-a2(j)*(px(j+1)-a2(j+1)*px(j+2))
      j=nxny-1
      px(j)=px(j)-a2(j)*px(j+1)
      return
      end
      subroutine eislow(px,n,nx,ny,nz)
      implicit double precision (a-h,o-z)
      dimension px(*)
      common /matrix/ a(-7000:281000,9)
      common /inde/ l(281000),lw1(281000),ln(500),lw2(500)
c  lower triangular part of preconditioner for Hyperplane ordering
c  Chapter 7.3.4 (indirect addressing version)
      n0=nx+ny+nz-2
      m1=nx
      m2=nx*ny
      do 100 j=2,n0
         is=ln(j)+1
         ie=ln(j+1)
c$dir no_recurrence
         do 50 k=is,ie
            i=l(k)
            px(i)=px(i)-a(i-1,2)*px(i-1)-a(i-m1,3)*px(i-m1)
     1         -a(i-m2,4)*px(i-m2)
 50      continue
 100  continue
      return
      end
      subroutine eisup(x,px,n,nx,ny,nz)
      implicit double precision (a-h,o-z)
      dimension x(*),px(*)
      common /matrix/ a(-7000:281000,9)
      common /inde/ l(281000),lw1(281000),ln(500),lw2(500)
c  upper triangular part of preconditioner for Hyperplane ordering
c  Chapter 7.3.4 (indirect addressing version)
      n0=nx+ny+nz-2
      m1=nx
      m2=nx*ny
      do 200 j=n0,1,-1
         is=ln(j)+1
         ie=ln(j+1)
c$dir no_recurrence
         do 150 k=is,ie
            i=l(k)
 150        px(i)=x(i)-a(i,2)*px(i+1)-a(i,3)*px(i+m1)
     1         -a(i,4)*px(i+m2)
 200  continue
      return
      end
      subroutine pre3pl(nz,ny,nx,x,px,b,c,d)
      implicit double precision (a-h,o-z)
      dimension x(*),px(*),b(*),c(*),d(*)
      common /method/ imet
*  pre3pl solves for the left factor of a 3-d parallel splitting
*  diagonal scaling of prec to 1.0 has been assumed.
c  chapter 7.3.6: twisted in z-direction, and either Neumann series
c  (imet.eq.2) or diagonalwise computation (imet.eq.6) in (x,y) direction
      nz2=nz/2
      nxny=nx*ny
c$dir begin_tasks
      il=1
      iu=nxny
      do 10 j=il,iu
 10      px(j)=x(j)
      if (imet.eq.2) call precv(px,nxny,b,c,nx,.true.)
      if (imet.eq.6) call precd(px,nxny,b,c,nx,.true.)
      do 40 k=2,nz2
         ip=(k-1)*nxny
         il=ip+1
         iu=ip+nxny
c$dir no_recurrence
         do 30 j=il,iu
 30         px(j)=x(j)-d(j-nxny)*px(j-nxny)
          if (imet.eq.2)
     1       call precv(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.true.)
          if (imet.eq.6)
     1       call precd(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.true.)
 40   continue
c$dir next_task
      ip=(nz-1)*nxny
      il=ip+1
      iu=ip+nxny
      do 20 j=il,iu
 20      px(j)=x(j)
      if (imet.eq.2)
     1   call precv(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.true.)
      if (imet.eq.6)
     1   call precd(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.true.)
      nz22=nz2+2
      do 60 k=nz-1,nz22,-1
         ip=(k-1)*nxny
         il=ip+1
         iu=ip+nxny
c$dir no_recurrence
         do 50 j=il,iu
 50         px(j)=x(j)-d(j)*px(j+nxny)
         if (imet.eq.2)
     1      call precv(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.true.)
         if (imet.eq.6)
     1      call precd(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.true.)
 60   continue
c$dir end_tasks
      ip=nz2*nxny
      il=ip+1
      iu=ip+nxny
c$dir no_recurrence
      do 70 j=il,iu
 70      px(j)=x(j)-d(j)*px(j+nxny)-d(j-nxny)*px(j-nxny)
      if (imet.eq.2)
     1   call precv(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.true.)
      if (imet.eq.6)
     1   call precd(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.true.)
      return
      end
      subroutine pre3pr(nz,ny,nx,px,b,c,d)
      implicit double precision (a-h,o-z)
      dimension px(*),b(*),c(*),d(*)
      common /method/ imet
*  pre3pr solves for the right factor of a 3-d parallel splitting,
*  diagonal of splitting is assumed to be scaled to 1.0
*  chapter 7.3.6: twisted in z-direction, and either Neumann series
*  (imet.eq.2) or diagonalwise computation (imet.eq.6) in (x,y) direction
      nxny=nx*ny
      nz2=nz/2
      ip=nz2*nxny
      if (imet.eq.2)
     1   call precv(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.false.)
      if (imet.eq.6)
     1   call precd(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.false.)
c$dir begin_tasks
      do 20 k=nz2,1,-1
         ip=(k-1)*nxny
         il=ip+1
         iu=ip+nxny
c$dir no_recurrence
         do 10 j=il,iu
 10         px(j)=px(j)-d(j)*px(j+nxny)
         if (imet.eq.2)
     1      call precv(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.false.)
         if (imet.eq.6)
     1      call precd(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.false.)
 20   continue
c$dir next_task
      do 40 k=nz2+2,nz
         ip=(k-1)*nxny
         il=ip+1
         iu=ip+nxny
c$dir no_recurrence
         do 30 j=il,iu
 30         px(j)=px(j)-d(j-nxny)*px(j-nxny)
         if (imet.eq.2)
     1      call precv(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.false.)
         if (imet.eq.6)
     1      call precd(px(ip+1),nxny,b(ip+1),c(ip+1),nx,.false.)
 40   continue
c$dir end_tasks
      return
      end
      subroutine prec3p(x,px,n,nx,ny,nz,a,b,c,d)
      implicit double precision (a-h,o-z)
      dimension x(*),px(*),a(*),b(*),c(*),d(*)
*  preconditioning for 3-d case, parallel splitting in z-direction
      call pre3pl(nz,ny,nx,x,px,b,c,d)
      call pre3pr(nz,ny,nx,px,b,c,d)
      return
      end
      subroutine pdedi3(nx,ny,nz,nxny,n,xl,xu,yl,yu,zl,zu,diag,diagx,
     1     diagy,diagz,rhs,hx,hy,hzz,ibc,dc,ec,zc,cc,fc,mbnds,mbndw,
     2     mbndn,mbnde)
      implicit double precision (a-h,o-z)
      dimension diag(*),diagx(*),diagy(*),diagz(*),rhs(*),ibc(6),
     1     hx(*),hy(*)
      external dc,ec,cc,fc,mbnds,mbndw,mbndn,mbnde
*  discretisation of 3d pde.
*  mesh-sizes are assumed to be uniform
*  zc(  ) represents the coefficient of the second
*  derivative in z.
      ip=1
      n=nx*ny*nz
      do 10 i=1,n
         diag(i)=0.
         diagx(i)=0.
         diagy(i)=0.
         diagz(i)=0.
         rhs(i)=0.
 10   continue
      z=zl
      if (ibc(5).eq.1) z=zl+hzz
      do 1000 iz=1,nz
         nxc=nx
         nyc=ny
         nxnyc=nxny
         call pdees3(nxc,nyc,nxnyc,xl,xu,yl,yu,hx,hy,mbnds,mbndw,
     1        mbndn,mbnde,ibc,dc,ec,cc,fc,diag(ip),diagx(ip),diagy(ip),
     2        rhs(ip),z)
         call compr(nxc,nyc,nxnyc,ibc,diag(ip),diagx(ip),diagy(ip),
     1        rhs(ip))
         ij=ip
         zzl=z-hzz/2.
         zzu=z+hzz/2.
         yy=yl
         if (ibc(1).eq.1) yy=yl+hy(1)
         do 100 iy=1,nyc
            dy=hy(1)
            if (ibc(1).eq.2.and.iy.eq.1) dy=dy/2.
            if (ibc(3).eq.2.and.iy.eq.nyc) dy=dy/2.
            xx=xl
            if (ibc(2).eq.1) xx=xl+hx(1)
            do 20 ix=1,nxc
            if (ibc(5).eq.2.and.iz.eq.1) then
            d1=0.
            else
               d1=zc(xx,yy,zzl)
            endif
            if (ibc(6).eq.2.and.iz.eq.nz) then
            d2=0.
            else
               d2=zc(xx,yy,zzu)
            endif
               dx=hx(1)
               if (ibc(2).eq.2.and.ix.eq.1) dx=hx(1)/2.
               if (ibc(4).eq.2.and.ix.eq.nxc) dx=hx(1)/2.
               opp=dx*dy/(hzz*hzz)
               diag (ij)=diag(ij)+opp*(d1+d2)
               if (iz.ne.nz) diagz(ij)=-opp*d2
               ij=ij+1
               xx=xx+hx(1)
 20         continue
            yy=yy+hy(1)
 100     continue
         ip=ip+nxc*nyc
         z=z+hzz
 1000 continue
      nx=nxc
      ny=nyc
      nxny=nxnyc
      n=nx*ny*nz
      return
      end
      subroutine mbnds(alp,bet,gam,xij,z)
      implicit double precision (a-h,o-z)
      common /proble/ iprobl
*  boundary y=ylower (c1)
*  dirichlet u=sin(xij)
      if (iprobl.eq.1) then
         alp=1.
         bet=0.
         gam=sin(xij)
      endif
*  dirichlet u=1
      if (iprobl.eq.2) then
         alp=1.
         bet=0.
         gam=1.
      endif
      return
      end
      subroutine mbndw(alp,bet,gam,yij,z)
      implicit double precision (a-h,o-z)
      common /proble/ iprobl
*  boundary x=xlower (a1)
*  dirichlet u=sin(yij)
      if (iprobl.eq.1) then
         alp=1.
         bet=0.
         gam=sin(yij)
      endif
*  neumann du/dn=0.
      if (iprobl.eq.2) then
         alp=0.
         bet=1.
         gam=0.
      endif
      return
      end
      subroutine mbndn(alp,bet,gam,xij,z)
      implicit double precision (a-h,o-z)
      common /proble/ iprobl
*  boundary y=yupper (d1)
*  dirichlet u=sin(xij)
      if (iprobl.eq.1) then
         alp=1.
         bet=0.
         gam=sin(xij)
      endif
*  neumann du/dn=0.
      if (iprobl.eq.2) then
         alp=0.
         bet=1.
         gam=0.
      endif
      return
      end
      subroutine mbnde(alp,bet,gam,yij,z)
      implicit double precision (a-h,o-z)
      common /proble/ iprobl
*  boundary x=xupper (b1)
*  dirichlet u=sin(yij)
      if (iprobl.eq.1) then
         alp=1.
         bet=0.
         gam=sin(yij)
      endif
*  neumann du/dn=0.
      if (iprobl.eq.2) then
         alp=0.
         bet=1.
         gam=0.
      endif
      return
      end
      function dc(x,y,z)
      implicit double precision (a-h,o-z)
      common /proble/ iprobl
      if (iprobl.eq.1) dc=1.
      if (iprobl.eq.2) then
         dc=1.
         if(x.ge..25.and.x.le..75.and.y.ge..25.and.y.le..75.and.
     1      z.ge..25.and.z.le..75) dc=100.
      endif
      return
      end
      function zc(x,y,z)
      implicit double precision (a-h,o-z)
      zc=dc(x,y,z)
      return
      end
      function cc(x,y,z)
      implicit double precision (a-h,o-z)
      cc=0.
      return
      end
      function fc(x,y,z)
      implicit double precision (a-h,o-z)
      common/proble/ iprobl
      if (iprobl.eq.1) fc=0.
      if (iprobl.eq.2) then
         fc=0.
         if(x.ge..25.and.x.le..75.and.y.ge..25.and.y.le..75.and.
     1      z.ge..25.and.z.le..75) fc=100.
      endif
      return
      end
      subroutine pdees3(nx, ny, nxny, a, b, c, d, hx, hy, mbnds, mbndw,
     1 mbndn, mbnde, bc, dc, ec, cc, fc, diag, sdiag, bsdiag, rhs,
     2 z)
      implicit double precision (a-h,o-z)
      dimension hx(1), hy(1), bc(4), diag(1), sdiag(1), bsdiag(1),
     1 rhs(1)
      integer bc
      yij = c
      ij = 0
      do 120 j=1,ny
         h1 = 1.0
         h3 = 1.0
         if (j.ne.1) h1 = hy(j-1)
         if (j.ne.ny) h3 = hy(j)
         xij = a
         do 110 i=1,nx
            ij = ij + 1
            if (i.ne.1) go to 50
            xij = a
            d1 = 0.
            d2 = 0.
            e1 = 0.
            e2 = 0.
            f1 = 0.
            f2 = 0.
            coef1 = 0.
            coef2 = 0.
            h2 = 1.
   50       if (j.ne.1) go to 60
            yij = c
            d1 = 0.
            d4 = 0.
            e1 = 0.
            e4 = 0.
            f1 = 0.
            f4 = 0.
            coef1 = 0.
            coef4 = 0.
            h1 = 1.
   60       if (i.ne.nx) go to 70
            e3 = 0.
            e4 = 0.
            d3 = 0.
            d4 = 0.
            f3 = 0.
            f4 = 0.
            coef3 = 0.
            coef4 = 0.
            h4 = 1.
   70       if (j.ne.ny) go to 80
            e2 = 0.
            e3 = 0.
            d2 = 0.
            d3 = 0.
            f2 = 0.
            f3 = 0.
            coef2 = 0.
            coef3 = 0.
            h3 = 1.
   80       continue
            if (.not.(i.ne.nx .and. j.ne.ny)) go to 90
            h4 = hx(i)
            e3 = ec(xij+h4/2.,yij+h3/2.,z)
            d3 = dc(xij+h4/2.,yij+h3/2.,z)
            f3 = fc(xij+h4/2.,yij+h3/2.,z)
            coef3 = cc(xij+h4/2.,yij+h3/2.,z)
            diag(ij+nx) = e3
            sdiag(ij+nx) = d3
            bsdiag(ij+nx) = f3
            rhs(ij+nx) = coef3
   90       if (.not.(i.ne.nx .and. j.ne.1)) go to 100
            h4 = hx(i)
            e4 = diag(ij)
            d4 = sdiag(ij)
            f4 = bsdiag(ij)
            coef4 = rhs(ij)
  100       continue
            diag(ij) = ((h4*e4+h2*e1)/h1+(h2*e2+h4*e3)/h3+(h1*d1+h3*d2)/
     1       h2+(h3*d3+h1*d4)/h4)*0.5 + (coef1*h1*h2+coef2*h2*h3+coef3*
     2       h3*h4+coef4*h4*h1)*0.25
            sdiag(ij) = -(h3*d3+h1*d4)*0.5/h4
            bsdiag(ij) = -(h2*e2+h4*e3)*0.5/h3
            rhs(ij) = (f1*h1*h2+f2*h2*h3+f3*h3*h4+f4*h4*h1)*0.25
            xij = xij + h4
            h2 = h4
            e2 = e3
            d2 = d3
            f2 = f3
            coef2 = coef3
            e1 = e4
            d1 = d4
            f1 = f4
            coef1 = coef4
  110    continue
         yij = yij + h3
  120 continue
*   s-boundary.
*
      if (bc(1).eq.1) go to 140
      xij = a
       yij = c
      h2 = 1.
      h3 = hy(1)
      e2 = 0.
      do 130 i=1,nx
         call mbnds(alp, bet, gam, xij,z)
         h4 = 1.
         if (i.ne.nx) h4 = hx(i)
         e3 = 0.
         if (i.ne.nx) e3 = ec(xij+h4/2.,yij+h3/2.,z)
         diag(i) = diag(i) + (h2/2.*e2+h4/2.*e3)*alp/bet
         rhs(i) = rhs(i) + (h2/2.*e2+h4/2.*e3)*gam/bet
         e2 = e3
         h2 = h4
         xij = xij + h4
  130 continue
      go to 160
  140 continue
*
*   dirichlet-condition for s-boundary.
*
      ij = nx
      xij = a
      do 150 i=1,nx
         ij = ij + 1
         call mbnds(alp, bet, gam, xij,z)
         rhs(ij) = rhs(ij) - bsdiag(i)*gam
         if (i.eq.nx) go to 150
         xij = xij + hx(i)
  150 continue
  160 continue
*
*   w-boundary.
*
      xij = a
      yij = c
      d4 = 0.0
      h1 = 1.0
      h4 = hx(1)
      if (bc(2).eq.1) go to 180
      iw = 1
      do 170 i=1,ny
         call mbndw(alp, bet, gam, yij,z)
         h3 = 1.
         if (i.ne.ny) h3 = hy(i)
         d3 = 0.0
         if (i.ne.ny) d3 = dc(xij+h4/2.,yij+h3/2.,z)
         diag(iw) = diag(iw) + (h1/2.*d4+h3/2.*d3)*alp/bet
         rhs(iw) = rhs(iw) + (h1/2.*d4+h3/2.*d3)*gam/bet
         iw = iw + nx
         d4 = d3
         h1 = h3
         yij = yij + h3
  170 continue
      go to 200
  180 continue
*
*   dirichlet-condition for w-boundary.
*
      iw = 2
      do 190 i=1,ny
         call mbndw(alp, bet, gam, yij,z)
         rhs(iw) = rhs(iw) - sdiag(iw-1)*gam
         iw = iw + nx
         if (i.eq.ny) go to 190
         yij = yij + hy(i)
  190 continue
  200 continue
*
* the north-boundary..
*
      xij = a
      yij = d
      h1 = hy(ny-1)
      e1 = 0.0
      h2 = 1.0
      if (bc(3).eq.1) go to 220
      ij = nx*(ny-1) + 1
      do 210 i=1,nx
         call mbndn(alp, bet, gam, xij,z)
         h4 = 1.
         if (i.ne.nx) h4 = hx(i)
         e4 = 0.0
         if (i.ne.nx) e4 = ec(xij+h4/2.,yij-h1/2.,z)
         diag(ij) = diag(ij) + (h2/2.*e1+h4/2.*e4)*alp/bet
         rhs(ij) = rhs(ij) + (h2/2.*e1+h4/2.*e4)*gam/bet
         ij = ij + 1
         h2 = h4
         e1 = e4
         xij = xij + h4
  210 continue
      go to 240
  220 continue
      ij = nx*(ny-2)
      do 230 i=1,nx
         ij = ij + 1
         call mbndn(alp, bet, gam, xij, z)
         rhs(ij) = rhs(ij) - bsdiag(ij)*gam
         bsdiag(ij) = 0.0
         if (i.eq.nx) go to 230
         xij = xij + hx(i)
  230 continue
  240 continue
*
* east-boundary..
*
      xij = b
      yij = c
      d1 = 0.0
      h1 = 1.0
      h2 = hx(nx-1)
      if (bc(4).eq.1) go to 260
      ie = nx
      do 250 i=1,ny
         call mbnde(alp, bet, gam, yij, z)
         h3 = 1.
         if (i.ne.ny) h3 = hy(i)
         d2 = 0.0
         if ((i.ne.ny) .or. ((bc(3).eq.1) .and. (bc(1).ne.3))) d2 =
     1    dc(xij-h2/2.,yij+h3/2.,z)
         diag(ie) = diag(ie) + (h1/2.*d1+h3/2.*d2)*alp/bet
         rhs(ie) = rhs(ie) + (h1/2.*d1+h3/2.*d2)*gam/bet
         ie = ie + nx
         d1 = d2
         h1 = h3
         yij = yij + h3
  250 continue
      go to 290
*  dirichlet-condition on east-boundary..
*
  260 continue
*
      ie = nx - 1
      do 270 i=1,ny
         call mbnde(alp, bet, gam, yij, z)
         rhs(ie) = rhs(ie) - sdiag(ie)*gam
         sdiag(ie) = 0.0
         ie = ie + nx
         if (i.eq.ny) go to 270
         yij = yij + hy(i)
  270 continue
  290 continue
      nxny = nx*ny
      return
      end
      subroutine compr(nx, ny, nxny, ibc, diag, sdiag, bsdiag, rhs)
      implicit double precision (a-h,o-z)
      dimension ibc(4), diag(1), sdiag(1), bsdiag(1), rhs(1)
      if (ibc(1).ne.1) go to 20
*
*   elimination of south-boundary. system becomes nx smaller.
*
      ny = ny - 1
      nxny = nx*ny
      do 10 i=1,nxny
         inx = i + nx
         diag(i) = diag(inx)
         sdiag(i) = sdiag(inx)
         bsdiag(i) = bsdiag(inx)
         rhs(i) = rhs(inx)
   10 continue
   20 continue
      if (ibc(2).ne.1) go to 70
*
* west-boundary eliminated, system becomes ny smaller..
*
      isch = 1
      nx = nx - 1
      ij = 1
      do 60 i=1,ny
         do 50 j=1,nx
            ijisch = ij + isch
            diag(ij) = diag(ijisch)
            sdiag(ij) = sdiag(ijisch)
            bsdiag(ij) = bsdiag(ijisch)
            rhs(ij) = rhs(ijisch)
            ij = ij + 1
   50    continue
         isch = isch + 1
   60 continue
   70 continue
      if (.not.((ibc(1).eq.3) .or. (ibc(3).eq.1))) go to 100
*
* dirichlet-condition on north-boundary..
*
      ny = ny - 1
  100 continue
      if (.not.((ibc(2).eq.3) .or. (ibc(4).eq.1))) go to 130
*
* east-boundary is eliminated, system becomes ny smaller..
*
      nx = nx - 1
      isch = 1
      ij = nx + 1
      do 120 i=2,ny
         do 110 j=1,nx
            ijisch = ij + isch
            diag(ij) = diag(ijisch)
            sdiag(ij) = sdiag(ijisch)
            bsdiag(ij) = bsdiag(ijisch)
            rhs(ij) = rhs(ijisch)
            ij = ij + 1
  110    continue
         isch = isch + 1
  120 continue
  130 continue
      nxny = nx*ny
      return
      end
      subroutine indeks(mx,my,mz,n,m1,m2,l,ln,lw1,lw2,n0)
      implicit double precision (a-h,o-z)
*   written by ushiro
*   l(n),lw1(n),ln(n0),lw2(n0)
      dimension l(1),ln(1),lw1(1),lw2(1)
      n=mx*my*mz
      m1=mx
      m2=mx*my
      ln(1)=0
      n0=mx+my+mz-2
      do 20 i=1,n0
 20   lw2(i)=0
      do 60 k=0,mz-1
      do 60 j=0,my-1
      ind=k*m2+j*m1
      do 40 i=1,mx
 40   lw1(ind+i)=i+j+k
 60   continue
      do 100 j=1,n
 100  lw2(lw1(j))=lw2(lw1(j))+1
      ln(2)=ln(1)+lw2(1)
      lw2(1)=ln(1)
      do 120 i=2,n0
      ln(i+1)=ln(i)+lw2(i)
 120  lw2(i)=ln(i)
      do 140 j=1,n
      lw2(lw1(j))=lw2(lw1(j))+1
 140  l(lw2(lw1(j)))=j
      return
      end
      subroutine iccgu(mx,my,mz,b,x,r,p,ap,eps,nit,res)
      implicit double precision (a-h,o-z)
      dimension x(1),r(1),p(1),ap(1),b(1)
      dimension res(*)
      common /matrix/ a(-7000:281000,9)
      common /inde/ l(281000),lw1(281000),ln(500),lw2(500)
c  subroutine written by Ushiro
      n=mx*my*mz
      m1=mx
      m2=mx*my
      n0=mx+my+mz-2
      epe=eps*eps
       do 20 i=1,m2
      p(i)=0.
 20   ap(i)=0.
      do 19 i=1,m2
      p(i+n+m2)=0.
 19   ap(i+n+m2)=0.
      do 60 i=1,n
      x(i)=0.
 60   r(i+m2)=b(i)
      bnorm=0.
      do 65 i=1,n
 65   bnorm=bnorm+b(i)**2
      res(1)=sqrt(bnorm)
      call ldlt(n,m1,m2,r(1),p(1),l,ln,n0)
      r1=0.
      do 80 i=m2+1,n+m2
 80   r1=r1+r(i)*p(i)
      nn=nit
      do 200 k=1,nn
      call axsub(n,m1,m2,p(1),ap(1))
      r2=0.
      do 120 i=m2+1,n+m2
 120  r2=r2+p(i)*ap(i)
      alp=r1/r2
      do 140 i=1,n
      x(i)=x(i)+alp*p(i+m2)
 140  r(i+m2)=r(i+m2)-alp*ap(i+m2)
      serr=0.
      do 150 i=1,n
 150  serr=serr+r(i+m2)**2
      if (serr.lt.epe*bnorm) nit=k
      res(k+1)=sqrt(serr)
      if (k.eq.nit) return
      call ldlt(n,m1,m2,r(1),ap(1),l,ln,n0)
      r3=0.
      do 160 i=1+m2,n+m2
 160  r3=r3+r(i)*ap(i)
      beta=r3/r1
      r1=r3
      do 180 i=1+m2,n+m2
 180  p(i)=ap(i)+beta*p(i)
 200  continue
      return
      end
      subroutine axsub(n,m1,m2,x,y)
      implicit double precision (a-h,o-z)
      dimension x(1-m2:n+m2),y(1-m2:n+m2)
      common /matrix/ a(-7000:281000,9)
c   subroutine written by Ushiro
      do 100 i=1,n
 100  y(i)=a(i-m2,4)*x(i-m2)+a(i-m1,3)*x(i-m1)+a(i-1,2)*x(i-1)+
     1   a(i,1)*x(i)+a(i,2)*x(i+1)+a(i,3)*x(i+m1)+a(i,4)*x(i+m2)
      return
      end
      subroutine ldlt(n,m1,m2,r,px,l,ln,n0)
      implicit double precision (a-h,o-z)
      dimension r(1-m2:n+m2),px(1-m2:n+m2)
      dimension l(*),ln(*)
      common /matrix/ a(-7000:281000,9)
c  subroutine written by Ushiro
      do 100 j=1,n0
      is=ln(j)+1
      ie=ln(j+1)
c$dir no_recurrence
      do 50 k=is,ie
      i=l(k)
 50   px(i)=r(i)-a(i-1,2)*px(i-1)-a(i-m1,3)*px(i-m1)-a(i-m2,4)*px(i-m2)
 100  continue
      do 200 j=n0-1,1,-1
      is=ln(j)+1
      ie=ln(j+1)
c$dir no_recurrence
      do 150 k=is,ie
      i=l(k)
 150  px(i)=px(i)-a(i,2)*px(i+1)-a(i,3)*px(i+m1)-a(i,4)*px(i+m2)
 200  continue
      return
      end
      function second(t)
      implicit double precision (a-h,o-z)
c second(t) should deliver the accumulated CPU-time in seconds.
c to be replaced by appropriate function or call
      second=cputime(0.)
      return
      end
      function wall(t)
      implicit double precision (a-h,o-z)
      real *4 time
c  wct delivers the accumulated wall clock time in seconds (for CONVEX C-2)
c  to be replaced by appropriate call
      call wct(time)
      wall=time
      return
      end