program mglab
c ********************** MULTIGRID LABORATORY *****************************
c *                                                                       *
c *            Tutorial software to accompany the book:                   *
c *                                                                       *
c *              AN INTRODUCTION TO MULTIGRID METHODS                     *
c *                      by P.Wesseling                                   *
c *                  Wiley, Chichester, 1992                              *
c *************************************************************************
c *                       Main program                                    *
c *************************************************************************
c
c  Programmer    P.Wesseling
c  Version 1.2   date 18-04-92
c  Version 1.1   date 16-12-91  (F.N. van de Vosse: first revision)
c  Version 1.0   date 10-12-91  (developed at HP Vectra RS/20C
c
c  Copyright (c) 1992  P.Wesseling
c  Address:  Delft University of Technology
c            Department of Technical Mathematics and Informatics
c            Mekelweg 4,  2628 CD Delft,  The Netherlands
c            Telefax +3115 787209
c            email witawes@dutinfh.tudelft.nl
c  Permission to copy or distribute this software in hard copy or soft copy
c  is hereby granted by the programmer.
c
c **************************************************************************
c
c  Multigrid method for the solution of the following boundary value problem:
c    -(ay')' + (by)' + cy = s,   0 < x < 1,  a > 0
c  Nonlinear multigrid algorithm as described in section 8.7 of the book.
c  The problem solved is only one-dimensional, and the purpose of this
c  software is purely tutorial.
c
c **************************************************************************
c
c                  INPUT / OUTPUT PARAMETERS
c
      implicit none
      integer  histry(2,1000), ib(13), icase,  igal, ihis,
     $         iout(4), ism, it, levels, nc, nmg, np(11), ns(2)
      double precision aa,bb, cc, delta, f0, f1, om, spar, ss, tol
      integer idim
      parameter (idim = 1000)
      double precision amat(idim,3),dmat(idim,3),r(idim),u(idim),
     $                 ue(idim), wu(10), rhs(idim), s(10), ut(idim),
     $                 v(idim), uce(idim)
      character cycle
c **************************************************************************
c
c  aa        i  governs the diffusion coefficient in the diff. equation
c               see function a
c  amat      o  matrix (discretization of differential equation)
c                 igal=0: matrices on all grids created by matrhs
c                 igal=1: matrix on finest grid created by subroutine matrhs
c                         and initialized to 0.d0 on the other grids, where the
c                         matrices will be made by subroutine galerk
c  bb        i  governs the velocity coefficient in the diff. equation
c               see function b
c  cc        i  governs the third coefficient in the diff. equation
c               see function c
c  cycle     i   governs type of multigrid schedule
c                 cycle = 'A' : adaptive schedule
c                 cycle = 'F' : F-cycle
c                 cycle = 'V' : V-cycle
c                 cycle = 'W' : W-cycle
c  delta     i  used in setting tolerance eps in adaptive cycle according
c                to equation (8.3.1) in the book
c  dmat      o  matrix used when defect correction is applied: more accurate
c                discretization than amat
c  f0        i  value for dirichlet or neumann boundary condition at x = 0
c  f1        i  value for dirichlet or neumann boundary condition at x = 1
c  histry    o  gives sequence of grids visited and number of smoothings
c                 carried out during each visit of a grid
c                 histry(1,i) : number of grid visited
c                 histry(2,i) : number of smoothings carried out at
c                   visit to grid(histry(1,i))
c  ib        i  ib(1) = 1: Dirichlet at x = 0
c               ib(1) = 2: Neumann   at x = 0
c               ib(2) = 1: Dirichlet at x = 1
c               ib(2) = 2: Neumann   at x = 1
c               ib(3) = 1: central
c               ib(3) = 2: upwind
c               ib(3) = 3: hybrid with diffusion term deleted when switched to
c                          upwind
c               ib(3) = 4: hybrid with diffusion term maintained when switched
c                          to upwind
c               ib(4) = 1: vertex-centered
c               ib(4) = 2: cell-centered
c               ib(5) governs updating of ut in nonlinear multigrid method
c                 implemented in subroutine mg
c               ib(6) chooses type of prolongation - see subroutine prol:
c                 ic = ib(6)
c               ib(7) chooses type of restriction for residual - see
c                 subroutine restri: ic = ib(7)
c               ib(8) chooses type of restriction for ut - see
c                 subroutine restri: ic = ib(8)
c               ib(9) = 1: direct solver is used instead of multigrid
c               ib(10) = n>0: defect correction is applied n times
c               ib(11) = n>0: nested iteration is applied with gamma~=n
c                 multigrid iterations (see program 1 in section 8.4)
c               ib(12) chooses type of prolongation in nested iteration
c               ib(13) chooses type of restriction for coarse right-hand-
c                 sides in nested iteration
c  icase     i  identification number given by user to his problem
c                icase = 1: interface problem: eq. (3.3.1) with
c                           discontinuous diffusion coefficient a(x)
c                           defined in function a
c                icase = 2: convection diffusion eq. (6.6.1) (1-dimensional)
c  idim      *  length of arrays reserved for matrix and grid vectors on
c                all grids together
c  igal      i  igal = 0: coarse grid discretization; matrices are made by
c                       discretization on all grids
c               igal = 1: Galerkin coarse grid discretization; only finest
c                       grid matrix is made and coarse grid matrices are
c                       initialized to 0.d0 before call of Galerkin
c  ihis      o   total number of entries in histry
c  iout      i   governs amount of output; iout larger -> more output
c                 iout(1) governs output in main program  mglab
c                 iout(2) and iout(3) govern output in subroutine mg
c                 iout(4) governs output in subroutines matrhs and galerk
c  ism       i   chooses smoothing method; see subroutine smoothing
c  it        o   gives current multigrid iteration number
c  levels    i   number of grids in multigrid method; < 10
c  nc        i   number of grid points on coarsest grid
c  nmg       i   maximum number of multigrid iterations
c  np        o   array of pointers to start of grids and to 1+end of finest grid
c                 np(1)=1     : start of coarsest grid
c                 np(levels)  : start of finest grid
c                 np(levels+1): end of finest grid + 1
c  ns        i   ns(1):  number of pre-smoothing iteraties
c                ns(2):  number of post-smoothing iterations
c  om        i  underrelaxation parameter in smoothing method
c  rhs       o  right-hand-side of discretization on finest grid made by
c                subroutine matrhs
c  s        i   underrelaxation parameters in the nonlinear multigrid
c                 algorithm. See algorithm TG in section 8.2 of the book.
c  spar      i  used in generating underrelaxation parameters s(k) in
c               the nonlinear multigrid algorithm. See algorithm TG in
c               section 8.2 of the book.
c  ss        i  governs the right-hand-side in the diff. equation
c               see function s
c  tol       i  accuracy requirement:  |Au-rhs)| < tol*|rhs| on finest grid.
c                Governs also termination ofcoarse grid correction in
c                adaptive cycle
c  u         o  new approximation of solution
c  uce       o  exact solution of differential equation
c  ue        o  exact discrete solution
c  ut        i  u~ in nonlinear multigrid algorithm.
c                 On coarse grids input is optional.
c                 It may or may not be updated during multigrid iteration
c                 depending on ib(5) = 1 : ut is updated else not.
c                 For linear problems the treatment of ut makes no difference.
c                 If ut is input on coarse grids it may have been obtained
c                 from nested iteration
c  v         o  initial approximation of solution
c  wu        o  wu(k) = work units expended on grid k
c
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i1, i2, ibb
      double precision anorm,bdef(idim),errmax,errl2,rdef(idim),ub(idim)
c
c  anorm     name of function that computes norms
c  bdef      scratch array for defect correction
c  errmax    scratch variable
c  err12     scratch variable
c  i1        input device number
c  i2        output device number
c  ibb       scratch variable
c  rdef      right-hand-side for second order discretization (def. corr.)
c  ub        scratch array for defect correction
c
c ***************************************************************************
c
c                   I/O
c
c  input file:  mglab.dat  device number = i1   free format
c  output file: mglab.res  device number = i2   free format
c
c **************************************************************************
c
c            SUBROUTINES CALLED BY MAIN PROGRAM
c
c  axplsy      u = a*ut + u;  a:real,  u, ut: gridfunctions
c  copy        copies gridfunctions
c  defres      computes residual for defect correction
c  dirsol      computes solution by direct method
c  err         puts out error messages
c  galerk      galerkin coarse grid approximation
c  info        user-provided subroutine that prints information about
c                his case; body may be empty
c  inout       machine dependent subroutine that opens input and output
c                files
c  input       reads and writes input
c  matrhs      computes matrix and right-hand-side on finest grid and matrices
c              on coarse grid in case of discretization coarse grid
c              approximation
c  mg          multigrid algorithm
c  nested      nested iteration: program 1 of section 8.4
c  pointr      computes pointers to starting addresses of grids
c  smooth      smoothing
c  uexact      computes exact solution of differential equation
c  vecout      prints gridfunctions
c
c ***************************************************************************
c
c                   FUNCTIONS CALLED BY MAIN PROGRAM
c
c                    none
c
c ***************************************************************************
c
c                 COMPLETE LIST OF SUBROUTINES
c
c       axplsy    chrfun    copy      defres    defrp1    diagn     dirsol
c       err       errmg     galerk    gener     ibchck    info      inout
c       input     matout    matrhs    mg        nested    pointr    prol
c       resid     restri    scal      smooth    vecout
c
c ***************************************************************************
c
c                 COMPLETE LIST OF FUNCTIONS
c
c       a    anorm   b     c    omega    s      u
c
c ***************************************************************************
c
c                METHOD
c
c  Multigrid method for the solution of the following boundary value
c    problem:
c    -(ay')' + (by)' + cy = s,   0<x<1, a>0
c
c  Nonlinear multigrid algorithm as described in section 8.7 of the book.
c
c  The diffusion coefficient a(x) is supposed to be possibly discontinuous
c    at grid points. The functions b(x) and c(x) are supposed to be possibly
c    discontinuous in the middle between at grid points. The user may put
c    discontinuities elsewhere (and will often have to on some coarse grids
c    if igal = 0) at his own risk.
c
c  Finite volume discretization of 1-dimensional second order boundary
c    value problem.
c  Upwind, central or hybrid discretization of convection (first order)
c    term, depending on ib(3).
c  Cell-centered or vertex-centered discretization, depending on ib(4).
c
c*************************************************************************
c
c                      STRUCTURE DIAGRAM
c
c  Statements that are not represented in the structure diagram are bracketed
c    by c1**** and c2*****
c
c #===========================================================================#
c #  matrhs                                                                   #
c #___________________________________________________________________________#
c # \                                                                       / #
c # F \                           ib (9) eq 1                             / T #
c #_____\_______________________________________________________________/_____#
c # \                                   /|                                    #
c # F \         igal eq 1             / T| exact solution of differential eq. #
c #_____\___________________________/____|                                    #
c #                   |  galerk          | direct solution of discrete system #
c #___________________|__________________|                                    #
c #   ut = v = 0                         | smoothing on finest grid only      #                                   |
c #______________________________________|                                    #
c # \                                   /|                                    #
c # F \         ib (11) > 0           / T|                                    #
c #_____\___________________________/____|                                    #
c #                   | nested iteration |                                    #
c #___________________|__________________|                                    #
c # \                                   /|                                    #
c # F \         ib (10) eq 0          / T|                                    #
c #_____\___________________________/____|                                    #
c # central discret.  | direct solution  |                                    #
c # defect correction | multigrid        |                                    #
c #===========================================================================#
c
c******************************************************************************
      parameter( i1=11, i2=12 )
c1*********************************
      write(*,*)
     $ '                ************ MULTIGRID LABORATORY ************'
      write(*,*) 
     $ '                *   Tutorial software to accompany the book  *'
      write(*,*) 
     $ '                *    AN INTRODUCTION TO MULTIGRID METHODS    *'
      write(*,*) 
     $ '                *              by P.Wesseling                *'
      write(*,*) 
     $ '                *          Wiley, Chichester, 1991           *'
      write(*,*) 
     $ '                **********************************************'
      write(*,*) ' '
      write(*,*) ' '
c
c open the in- and output files
c
      call inout( i1, i2 )
c
c read the input
c
      call input( i1, i2, ib, icase, igal, iout, ism, levels, nc, nmg, 
     $            ns, aa, bb, cc, delta, f0, f1, om, spar, ss, tol, 
     $            cycle)
c
c close the input file
c
      close( i1 )
c
c call user-provided subroutine that prints information about his case
c
      call info( icase, ib )
c
c set pointers to starting adresses of grids
c
      call pointr( ib(4), iout(1), levels, nc, np, i2 )
c
c check of workspace reserved for amat
c
      if ( np(levels+1) .gt. idim) then
         call err( i1, i2, 4 )
      endif
c
c compute matrix and right-hand-side on finest grid and matrices
c on coarse grid in case of discretization coarse grid approximation
c
c2*********************************
      call matrhs( ib, icase, idim, igal, iout(4), i2, levels, nc,
     $             np, aa, amat, bb, cc, f0, f1, rhs, ss)
c
c if ib(9)=1  a single grid solution method is applied else multigrid solution
c
      if ( ib(9) .ne. 1 ) then
c
c galerkin coarse grid approximation if igal=1
c
         if ( igal .eq. 1 ) then
            call galerk( ib, idim, iout(4), i2, levels, np, amat, 1 )
         endif
c
c initialization of solution and exact solution
c
         do 5 ibb = 1, np(levels+1)
           ut(ibb) = 0d0
           ue(ibb) = 0d0
    5    continue
         do 6 ibb = np(levels), np(levels+1) - 1
           v(ibb) = ut(ibb)
    6    continue
c1*********************************
         ihis = 0
         do 7 ibb = 1, 10
           wu(ibb) = 0.d0
    7    continue
c2*********************************
c
c nested iteration if ib(11)>0
c
         if ( ib(11) .gt. 0 ) then
            call nested ( amat, rhs, cycle, delta, histry, ib, idim,
     $                    ihis, iout, it, ism, i2, levels, nmg, np, ns, 
     $                    om, r, s, tol, u, ue, ut, v, wu )
c1***************************************************************
c
c Output of results
c
            if (iout(1) .gt. 1) then
        write(i2,*)'Multigrid solution on finest grid after nested it.'
              call vecout( i2, idim, levels, np, u )
            endif
c2***************************************************************
         endif
         if ( ib(10) .eq. 0 ) then
c
c  calculate the exact discrete solution on the finest grid
c
            call dirsol( idim, i2, levels, np, amat, ue, rhs )
c1***************************************************************
            if (iout(1) .gt. 0) then
              write(i2,*) 'Exact discrete solution on finest grid'
              call vecout( i2, idim, levels, np, ue )
            endif
c2***************************************************************
            call mg( amat, rhs, cycle, delta, histry, ib, idim, ihis,
     $               iout, it, ism, i2, levels, nmg, np, ns, om, r, s, 
     $               tol, u, ue, ut, v, wu )

         else
c
c defect correction on central discretization
c
            ibb = ib(3)
            ib(3) = 1
            call matrhs( ib, icase, idim, 1, iout(4), i2, levels, nc,
     $                   np, aa, dmat, bb, cc, f0, f1, rdef, ss )
            ib(3) = ibb
c
c  calculate the exact discrete solution on the finest grid
c
            call dirsol( idim, i2, levels, np, dmat, ue, rdef )
c1***************************************************************
            if (iout(1) .gt. 0) then
              write(i2,*) 'Exact discrete solution on finest grid'
              call vecout( i2, idim, levels, np, ue )
            endif
c2***************************************************************
c
c defect correction according to section 4.6 of the book
c one multigrid iteration is applied
c
            call mg( amat, rhs, cycle, delta, histry, ib, idim, ihis,
     $               iout, it, ism, i2, levels, 1, np, ns, om, r, s, 
     $               tol, ub, ue, ut, v, wu )
            do 10 ibb = 1, ib(10)
               call defres( idim, iout(4), i2, levels, np, amat, dmat,
     $          rhs, bdef, ub)
               call copy( idim, levels, np, v, ub )
               call mg( amat, bdef, cycle, delta, histry, ib, idim, 
     $                  ihis, iout, it, ism, i2, levels, 1, np, ns, om, 
     $                  r, s, tol, ub, ue, ut, v, wu )
   10       continue
            call copy( idim, levels, np, u, ub )
         endif
c1***************************************************************
c
c Output of results
c
            if (iout(1) .gt. 1) then
              write(i2,*) 'Multigrid solution on finest grid'
              call vecout( i2, idim, levels, np, u )
            endif
            call axplsy( idim, levels, np, -1d0, ue, u )
            if (iout(1) .gt. 0) then
              write(i2,*) 'Local error on finest grid'
              call vecout( i2, idim, levels, np, u )
            endif
            errmax = anorm( 1, idim, u, levels, np )
            errl2 = anorm( 2, idim, u, levels, np )
            write(i2,'(a,2e13.5)')
     $       'Maximum and L2 norm of the error = ', errmax, errl2
            write(*,'(a,2e13.5)')
     $       'Maximum and L2 norm of the error = ', errmax, errl2
            if (iout(1) .gt. 1) then
              write(i2,*) 'Work unit distribution over the grids'
              do 11 ibb = 1,levels
                 write(i2,'(i3,e10.2)') ibb, wu(ibb)
  11          continue
            endif
c2***************************************************************
      else
c
c computation of the exact solution of the differential equation
c
         call uexact( i2, icase, idim, levels, ib,np,uce,f0,f1,aa,bb)
c1***************************************************************
         if (iout(1) .gt. 0) then
           write(i2,*) 'Exact continuous solution on finest grid'
           call vecout( i2, idim, levels, np, uce )
         endif
c2***************************************************************
c
c computation of exact discrete solution by a direct method
c
         call dirsol( idim, i2, levels, np, amat, ue, rhs )
c1***************************************************************
         if (iout(1) .gt. 0) then
           write(i2,*) 'Direct solver solution on finest grid'
           call vecout( i2, idim, levels, np, ue )
         endif
         call axplsy( idim, levels, np, -1d0, ue, uce )
         if (iout(1) .gt. 0) then
           write(i2,*) 'Local direct solver error on finest grid'
           call vecout( i2, idim, levels, np, uce )
         endif
         errmax = anorm( 1, idim, uce, levels, np )
         errl2 = anorm( 2, idim, uce, levels, np )
         write(i2,'(a,2e13.5)') 
     $         'Maximum and L2 norm of the error = ',errmax, errl2
         write(*,'(a,2e13.5)') 
     $         'Maximum and L2 norm of the error = ',errmax, errl2
c2***************************************************************
c
c test the smoother and the discretization error
c
         do 15 ibb = np(levels), np(levels+1) -1
           v(ibb) = 0d0
   15    continue
         call smooth( i2, idim, iout, ism, levels, np, ns(1), amat,
     $                levels, om, rhs, v, u )
c1***************************************************************
         write(i2,*) '  Solution by smoother on finest grid'
         call vecout( i2, idim, levels, np, u )
         call axplsy( idim, levels, np, -1d0, ue, u )
        write(i2,*)' Error on finest grid after solution with smoother'
         call vecout( i2, idim, levels, np, u )
         errmax = anorm( 1, idim, u, levels, np )
         errl2 = anorm( 2, idim, u, levels, np )
         write(i2,'(a,2e13.5)') 
     $         'Maximum and L2 norm of the error = ',errmax, errl2
         write(*,'(a,2e13.5)') 
     $         'Maximum and L2 norm of the error = ',errmax, errl2
      endif
      write(*,*)'                         **************************'
      write(*,*)'                          results are in mglab.res'
      write(*,*)'                         **************************'
      close(i2)
c2***************************************************************
      end
      double precision function a(icase,aa,x)
c***************************************************************
c
c User defined function to set the diffusion coefficient
c
c***************************************************************
      integer icase
      double precision aa,x,y
      if ( icase .eq. 1 ) then
c
c interface problem according to eqs. (3.3.1) and (3.3.6); xstar=aa,
c epsilon = 1d-3
c
         if ( x. le. aa) then
            y = 1d-3
         else
            y= 1d0
         endif
         a = y
      else if ( icase .eq. 2 ) then
c
c convection-diffusion problem
c
         a = aa
      else
         a = aa
      endif
      end
      double precision function anorm( ic, idim, b, k, np )
c **************************************************************************
c
c     computes norms of vector b on grid k
c         ic = 1:    maximum norm
c         ic = 2:    l2-norm scaled by dimension of b
c ****************************************************************************
c
c                  INPUT  PARAMETERS
c
      implicit none
      integer ic, idim, k, np(10)
      double precision b(idim)
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i
      double precision a
c ***************************************************************************
c
c                 I/O
c                none
c
c ***************************************************************************
c
c            FUNCTIONS
c
c              none
c
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c               none
c
c ***************************************************************************
c
c            METHOD
c
c   Can be done with BLAS subroutines IDAMAX, DNRM2 and SCAL; has not been
c   implemented
c
c ***************************************************************************
      a = 0.d0
      if ( ic .eq. 1 ) then
         do 10 i = np(k), np(k+1) - 1
            a = max(a, abs(b(i)))
  10     continue
         anorm = a
      else if ( ic .eq. 2 ) then
         do 20 i = np(k), np(k+1)-1
            a = a + b(i)*b(i)
  20     continue
         anorm = sqrt(a/(np(k+1)-np(k)))
      endif
c     end of anorm
      end
      subroutine axplsy( idim, k, np, a, ut, u )
c***********************************************************************
c
c     u = a*ut + u    on grid k
c                     structure analogous to BLAS routine DAXPY
c
c ****************************************************************************
c
c                  INPUT/OUTPUT PARAMETERS
c
      implicit none
      integer idim, k, np(11)
      double precision a, u(idim), ut(idim)
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c   BLAS routine DAXPY may be used on vector computers - not implemented
c
c ***************************************************************************
      do 10 i = np(k), np(k+1)-1
         u(i) = u(i) + a*ut(i)
  10  continue
c     end of axplusy
      end
      double precision function b(icase,bb,x)
c ***************************************************************************
c
c User defined function to set the velocity coefficient
c
c ***************************************************************************
      integer icase
      double precision bb,x
      if (icase .eq. 1) then
         b = bb
      else if ( icase .eq. 2 ) then
         b = bb
      else
         b = bb
      endif
      end
      double precision function c(icase,cc,x)
c ***************************************************************************
c
c User defined subroutine to set the third coefficient
c
c ***************************************************************************
      implicit none
      integer icase
      double precision cc,x
      if (icase .eq. 1) then
         c = cc
      else if ( icase .eq. 2 ) then
         c = cc
      else
         c = cc
      endif
      end
      subroutine chrfun( i2, ib, ca, cab, cp, cr, scpmtx )
c******************************************************************************
c
c  This subroutine defines the characteristic functions of A, A(bar), P and R,
c  and computes the lower and upper bound of the structure of A on coaser grids
c  according to the choice of P and R
c  Used in Galerkin coarse grid approximation
c
c******************************************************************************
c
c  Input/Output parameters
c
      implicit none
      integer i2, ib(10), ca(-1:1), cab(-1:1), cp(-2:1),
     $        cr(-2:1), scpmtx
c******************************************************************************
c
c  i2      i   logical output device number
c  ib      i   for further information, see subroutine galerk
c  ca      o   characteristic function of fine grid operator A _
c  cab     o   characteristic function of coarse grid operator A
c  cp      o   characteristic function of prolongation operator P
c  cr      o   characteristic function of restriction operator R
c  scpmtx  o/i -scpmtx and scpmtx are the assumed lower and upper bound of the
c              structure of the matrix A. In our cases here, for every possible
c              combination of P and R, scpmtx has only two possible values:
c              either 1 or 2.
c
c******************************************************************************
c
c  Local quantities
      integer i
c
c  i           do-loop variable 
c
c******************************************************************************
c
c Initialization
c
      do 5 i=-2,1
         cr(i)=0
         cp(i)=0
 5    continue
      do 6 i=-1,1
         ca(i)=0
         cab(i)=0
 6    continue
c
c Define cr, cp, ca and cab according to ib(6) and ib(7)
c
      if ( ib(6) .eq. 1 ) then
         cp(-1)=1
         cp(0)=1
         cp(1)=1
      else if ( ib(6) .eq. 2 ) then
         cp(-1)=1
         cp(0)=1
      else if ( ib(6) .eq. 3 .or. ib(6) .eq. 4 ) then
         cp(-2)=1
         cp(-1)=1
         cp(0)=1
         cp(1)=1
      else
         write(i2,*) 'chrfun : no such restriction available'
         stop
      end if
c
      if ( ib(7) .eq. 1 .or. ib(7) .eq. 2 ) then
         cr(-1)=1
         cr(0)=1
         cr(1)=1
      else if ( ib(7) .eq. 3 ) then
         cr(-1)=1
         cr(0)=1
      else if ( ib(7) .eq. 4 .or. ib(7) .eq. 5 ) then
         cr(-2)=1
         cr(-1)=1
         cr(0)=1
         cr(1)=1
      else
         write(i2,*) 'chrfun: no such prolongation available'
         stop
      end if
c
      do 10 i=-scpmtx,scpmtx
         ca(i)=1
 10   continue
c
c If enlargement of stencil occurs, then scpmtx=2
c
      if ( ib(6) .eq. 3 .or. ib(6) .eq. 4 ) then
         if ( ib(7) .ne. 3 ) scpmtx=2
      end if
      do 15 i=-scpmtx,scpmtx
         cab(i)=1
 15   continue
c
c end of subroutine chrfun
c
      return
      end
      subroutine copy( idim, k, np, u, v )
c***********************************************************************
c
c     u = v    on grid k
c
c ****************************************************************************
c
c                  INPUT  PARAMETERS
c
      implicit none
      integer idim, k, np(11)
      double precision u(idim), v(idim)
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c   BLAS routine DCOPY may be used on vector computers - has been
c     commented out with cv
c
c ***************************************************************************
      do 10 i = np(k), np(k+1)-1
         u(i) = v(i)
  10  continue
c
c  vector version
c
cv      call DCOPY (np(k+1) - np(k),u(np(k)),1,v(np(k)),1)
c     end of copy
      end
      subroutine defres( idim, io, i2, k, np, amat, dmat, b, bdef, ub )
c **************************************************************************
c
c     residual computation for defect correction according to section 4.6
c     of the book
c
c     bdef = b - dmat*ub + amat*ub            on grid k
c
c     no attempt has been made to emulate BLAS routines
c
c ****************************************************************************
c
c                  INPUT  PARAMETERS
c
      implicit none
      integer idim, io, i2, k, np(11)
      double precision amat(idim,3), dmat(idim,3), b(idim), bdef(idim),
     $                 ub(idim)
c
c  amat    less accurate discretization matrix
c  dmat    more accurate discretization matrix
c  b       right-hand-side of more accurate discretization
c  bdef    defect correction residual
c  io      governs output
c  i2      output device number
c  ub      solution to which defect correction is applied
c
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i
c
c ***************************************************************************
      do 5 i = np(k), np(k+1)-1
         bdef(i) = b(i) - (dmat(i,2)-amat(i,2))*ub(i)
    5 continue
      do 6 i = np(k), np(k+1)-2
         bdef(i) = bdef(i) - (dmat(i,3)-amat(i,3))*ub(i+1)
    6 continue
      do 7 i = np(k)+1, np(k+1)-1
         bdef(i) = bdef(i) - (dmat(i,1)-amat(i,1))*ub(i-1)
    7 continue
      if ( io .gt. 2 ) then
         write(i2,*)'Right hand side for defect correction'
         call vecout( i2, idim, k, np, bdef )
      endif
c     end of defres
      end
      subroutine defrp1( i2, ib, p, r )
c
c******************************************************************************
c
c  Subroutine defrp1 defines operator-independent prolongation and restriction
c  Used in Galerkin coarse grid approximation
c
c******************************************************************************
c
c  Input/Output
c
      implicit none
      integer i2, ib(10)
      double precision p(-2:1,3), r(-2:1,3)
c******************************************************************************
c
c  i2     i   logical output device number
c  ib     i   for further information, see subroutine galerk
c  p      o   array (-2:1,1:3), stores the elements of prolongation operator;
c             the operator for the interior points are stored in p(*,2); that 
c             for the left boundary in p(*,1) and that for the right boundary 
c             in  p(*,3)
c  r      o   array (-2:1,1:3), stores the elements of restriction operator;
c             the storage arrangement is similar to p
c
c ***************************************************************************
c
c  LOCAL QUANTITIES
c
      integer idx1, idx2
c
c  idx1,idx2,  scratch variables
c
c ***************************************************************************
c
c Initialization
c
      do 6 idx1 = 1, 3
         do 7 idx2 = -2, 1
            p(idx2,idx1) = 0d0
            r(idx2,idx1) = 0d0
    7    continue
    6 continue
c
c Define prolongation operator
c
      if ( ib(6) .eq. 1 ) then
         p(-1,1)=0d0
         p(0,1)=1d0
         p(1,1)=.5d0
         p(-1,2)=.5d0
         p(0,2)=1.d0
         p(1,2)=.5d0
         p(-1,3)=.5d0
         p(0,3)=1.d0
         p(1,3)=0.d0
      else if ( ib(6) .eq. 2 ) then
         p(-1,1)=1.d0
         p(0,1)=1.d0
         p(-1,2)=1.d0
         p(0,2)=1.d0
         p(-1,3)=1.d0
         p(0,3)=1.d0
      else if ( ib(6) .eq. 3 ) then
         p(-2,1)=0.d0
         p(-1,1)=.5d0
         p(0,1)=.75d0
         p(1,1)=.25d0
         p(-2,2)=.25d0
         p(-1,2)=.75d0
         p(0,2)=.75d0
         p(1,2)=.25d0
         p(-2,3)=.25d0
         p(-1,3)=.75d0
         p(0,3)=.5d0
         p(1,3)=0.d0
      else if ( ib(6) .eq. 4 ) then
         p(-2,1)=0.d0
         p(-1,1)=1.d0
         p(0,1)=.75d0
         p(1,1)=.25d0
         p(-2,2)=.25d0
         p(-1,2)=.75d0
         p(0,2)=.75d0
         p(1,2)=.25d0
         p(-2,3)=.25d0
         p(-1,3)=.75d0
         p(0,3)=1.d0
         p(1,3)=0.d0
      else
        write(i2,*) 'defrp1: ib(6) wrong, this option not implemented'
        stop
      end if
c
c  Define restriction operator
c
      if ( ib(7) .eq. 1 ) then
         r(-1,1)=0.d0
         r(0,1)=.5d0
         r(1,1)=.25d0
         r(-1,2)=.5d0
         r(0,2)=1.d0
         r(1,2)=.5d0
         r(-1,3)=.25d0
         r(0,3)=.5d0
         r(1,3)=0.d0
      else if ( ib(7) .eq. 2 ) then
         r(-1,1)=0.d0
         r(0,1)=1.5d0
         r(1,1)=.5d0
         r(-1,2)=.5d0
         r(0,2)=1.d0
         r(1,2)=.5d0
         r(-1,3)=.5d0
         r(0,3)=1.5d0
         r(1,3)=0.d0
      else if ( ib(7) .eq. 3 ) then
         r(-1,1)=1.d0
         r(0,1)=1.d0
         r(-1,2)=1.d0
         r(0,2)=1.d0
         r(-1,3)=1.d0
         r(0,3)=1.d0
      else if ( ib(7) .eq. 4 ) then
         r(-2,1)=0.d0
         r(-1,1)=.75d0
         r(0,1)=.75d0
         r(1,1)=.25d0
         r(-2,2)=.25d0
         r(-1,2)=.75d0
         r(0,2)=.75d0
         r(1,2)=.25d0
         r(-2,3)=.25d0
         r(-1,3)=.75d0
         r(0,3)=.75d0
         r(1,3)=0.d0
      else if ( ib(7) .eq. 5 ) then
         r(-2,1)=0.d0
         r(-1,1)=1.d0
         r(0,1)=0.75d0
         r(1,1)=.25d0
         r(-2,2)=.25d0
         r(-1,2)=.75d0
         r(0,2)=.75d0
         r(1,2)=.25d0
         r(-2,3)=.25d0
         r(-1,3)=.75d0
         r(0,3)=1.d0
         r(1,3)=0.d0
      else
         write(i2,*) 'defrp1: ib(7) wrong, this option not implemented'
         stop
      end if
c
c end of subroutine defrp1
      return
      end
      subroutine diagn( i, idim, i2, j, k, np, v )
c***********************************************************************
c
c     performs diagnostic output
c
c ****************************************************************************
c
c                  INPUT  PARAMETERS
c
      implicit none
      integer i, idim, i2, j, k, np(10)
      double precision v(idim)
c
c   i     i    chooses text for output
c   j     i    determines amount of output
c   k     i    grid level
c
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      double precision a1, a2, anorm
c ***************************************************************************
c
c                 I/O
c     output to logical unit number i2
c
c ***************************************************************************
c
c            FUNCTIONS
c
c              none
c
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c               vecout
c
c ***************************************************************************
      write(i2,*)' diagnostic #',i
      if ( i .eq. 1 ) then
         if ( j .gt. 1 ) then
            write(i2,*) ' maxnorm, l2-norm of right-hand-side on',
     $                  ' coarsest grid (level',k,')'
            a1 = anorm( 1, idim, v, k, np )
            a2 = anorm( 2, idim, v, k, np )
            write(i2,'(2e11.3)') a1, a2
         endif
         if ( j .gt. 2 ) then
            write(i2,*)'  right-hand-side on coarsest grid (level',k,')'
            call vecout( i2, idim, k, np, v )
         endif
      else if ( i .eq. 2 ) then
         write(i2,*)' maxnorm, l2-norm of residual after direct ',
     $              'solution'
         write(i2,*)' on coarsest grid (level=',k,')'
         a1 = anorm( 1, idim, v, k, np )
         a2 = anorm( 2, idim, v, k, np )
         write(i2,'(2e11.3)') a1, a2
      else if ( i .eq. 3 ) then
         write(i2,*)' current solution',
     $              ' just before move to finer grid (level',k,')'
         call vecout( i2, idim, k, np, v )
      else if ( i .eq. 4 ) then
         write(i2,*)' correction',
     $              ' just before move to finer grid (level',k,')'
         call vecout( i2, idim, k, np, v )
      else if ( i .eq. 5 ) then
         write(i2,*)' prolongated correction',
     $              ' just after move to finer grid (level',k,')'
         call vecout( i2, idim, k, np, v )
      else if ( i .eq. 6 ) then
         write(i2,*)' maxnorm, l2-norm of residual',
     $              ' after coarse grid (level',k,')'
         write(i2,*)' correction'
         a1 = anorm( 1, idim, v, k, np )
         a2 = anorm( 2, idim, v, k, np )
         write(i2,'(2e11.3)') a1, a2
      else if ( i .eq. 7 ) then
         write(i2,*)' current solution',
     $              ' after coarse grid correction (level',k,')'
         call vecout( i2, idim, k, np, v )
      else if ( i .eq. 8 ) then
         write(i2,*)' maxnorm, l2-norm of residual',
     $              ' after post-smoothing (level',k,')'
         a1 = anorm( 1, idim, v, k, np )
         a2 = anorm( 2, idim, v, k, np )
         write(i2,'(2e11.3)') a1, a2
      else if ( i .eq. 9 ) then
         write(i2,*)' maxnorm, l2-norm of residual',
     $              ' before pre-smoothing (level',k,')'
         a1 = anorm( 1, idim, v, k, np )
         a2 = anorm( 2, idim, v, k, np )
         write(i2,'(2e11.3)') a1, a2
      else if ( i .eq. 10 ) then
         write(i2,*)' maxnorm, l2-norm of residual',
     $              ' after pre-smoothing (level',k,')'
         a1 = anorm( 1, idim, v, k, np )
         a2 = anorm( 2, idim, v, k, np )
         write(i2,'(2e11.3)') a1, a2
      else if ( i .eq. 11 ) then
         write(i2,*)' maxnorm, l2-norm of error',
     $              ' at moment of leaving finest grid (level',k,')'
         a1 = anorm( 1, idim, v, k, np )
         a2 = anorm( 2, idim, v, k, np )
         write(i2,'(2e11.3)') a1, a2
      else if ( i .eq. 12 ) then
         write(i2,*)' maxnorm, l2-norm of final error'
         a1 = anorm( 1, idim, v, k, np )
         a2 = anorm( 2, idim, v, k, np )
         write( i2,'(2e11.3)') a1, a2
      else if ( i .eq. 13 ) then
         write(i2,*)' maxnorm, l2-norm (level',k,')'
         a1 = anorm( 1, idim, v, k, np )
         a2 = anorm( 2, idim, v, k, np )
         write( i2,'(2e11.3)') a1, a2
      else
      endif
c     end of diagn
      end
      subroutine dirsol( idim, i2, k, np, amat, u, b )
c **************************************************************************
c
c     computes solution of amat*u = b   on grid k by direct solution method
c
c ****************************************************************************
c
c                  INPUT  PARAMETERS
c
      implicit none
      integer idim, i2, k, np(11)
      double precision amat(idim,3), u(idim), b(idim)
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer j0, j, n, nmax
      parameter (nmax = 1000)
      double precision bet, gam(nmax)
      if ( idim .gt. nmax ) then
         write(i2,*)' dirsol: nmax < idim '
         stop
      endif
      j0 = np(k)-1
      n = np(k+1) - np(k)
      bet = amat(j0+1,2)
      if ( bet .eq. 0d0 ) then
         write(i2,*) 'dirsol: zero pivot found in row ',1
         write(i2,*) '        take a look at your matrix'
         stop
      endif
      u(j0+1) = b(j0+1)/bet
      do 10 j = j0+2, j0+n
         gam(j) = amat(j-1,3)/bet
         bet = amat(j,2) - amat(j,1)*gam(j)
         if ( bet .eq. 0d0 ) then
            write(i2,*) 'dirsol: zero pivot found in row ',j-j0
            write(i2,*) '        take a look at your matrix'
            stop
         endif
         u(j) = ( b(j) - amat(j,1) * u(j-1) ) / bet
   10 continue
      do 20 j = j0+n-1, j0+1, -1
         u(j) = u(j) - gam(j+1)*u(j+1)
   20 continue
c     end of dirsol
      end
      subroutine err(i1,i2,i)
c***********************************************************************
c
c   Write error message for mglab program and stop the program
c
c***********************************************************************
      integer i

      write( i2, '(a)' ) '********** ERROR ***********'
      if (i .eq. 1) then
         write( i2, '(a)' )
     $          ' levels too large: increase dimension of np to '
         write( i2, '(a)' )' levels+1'
         goto 10
      else if (i .eq. 3) then
         write( i2, '(a)' )' ib(4) out of range '
         goto 10
      else if (i .eq. 4) then
         write( i2, '(a)' )' dimension of amat too small'
         goto 10
      else if (i .eq. 5) then
         write( i2, '(a)' )' igal out of range'
         goto 10
      endif
  10  write(*,*)'******************************************'
      write(*,*)' !!! error  !!!! results are in discr.res'
      write(*,*)'******************************************'
      write( i2, '(a)' ) '***** PROGRAM STOPPED ******'
      close(i2)
      stop
c     end of err
      end
      subroutine errmg (i2,i)
c***********************************************************************
c
c   Write error message for mglab program and stop the program
c
c***********************************************************************
      implicit none
      integer i2,i
      if (i.eq.1) then
          write(i2,*) ' igamma out of range in subroutine smooth'
          write(*,*)' aborted - error in smoothing'
          stop
       else
          write(i2,*) ' i out of range in subroutine errmg'
          write(*,*)' aborted - error in errmg'
       endif
c      end of errmg
       end
      subroutine galerk( ib, idim, iout, i2, levels, np, amat, scpmtx )
c **************************************************************************
c
c  computation of coarse grid matrices by Galerkin approximation
c
c **************************************************************************
c
c  INPUT / OUTPUT PARAMETERS:
c
      implicit none
      integer ib(10), idim, iout, i2, levels, np(11), scpmtx

      double precision amat(idim,-1:1)
c
c   ib      i  ib(6) chooses type of prolongation - see subroutine prol:
c               ic = ib(6)
c   ib(6) = 1:  vertex-centered linear interpolation: equation (5.3.1) of 
c               the book: (1/2)*[1 2 1]
c   ib(6) = 2:  cell-centered piecewise constant interpolation:
c               equation (5.3.5) of the book: [1 1]
c   ib(6) = 3:  cell-centered linear interpolation with Dirichlet boundary
c               modification:  equation (5.3.6) of the book with w=0
c               in equation (5.3.24) at boundaries: (1/4)*[w 2+w 2+e e]
c   ib(6) = 4:  cell-centered linear interpolation with Neumann boundary
c               modification:  equation (5.3.6) of the book with stencil
c               [P*] = (1/4)*[ 0  4  3  1 ] at boundary x=0 and similar at x=1
c   ib(6) = 5:  vertex-centered operator dependent prolongation
c               equations (5.4.2), (5.4.3) of the book: NOT IMPLEMENTED
c
c               ib(7) chooses type of restriction for residual - see
c               subroutine restri: ic = ib(7)
c   ib(7) = 1:  vertex-centered restriction according to the one-
c               dimensional version of equation (5.3.19) of the book
c               stencil = 0.5 * [1 2 1]
c               boundary modification type Dirichlet: 
c               at x=0: stencil = 0.5 * [0 2 1];
c               at x=1: stencil = 0.5 * [1 2 0]
c   ib(7) = 2:  vertex-centered restriction according to the one-
c               dimensional version of equation (5.3.19) of the book
c               stencil = 0.5 * [1 2 1]
c               boundary modification type Neumann:
c               at x=0: stencil = 0.5 * [0 3 1];
c               at x=1: stencil = 0.5 * [1 3 0]
c   ib(7) = 3:  cell-centered restriction according to equation (5.3.23)
c               of the book
c               stencil = [1  1]
c   ib(7) = 4:  cell-centered restriction according to equation (5.3.24)
c               of the book
c               stencil = (1/4)*[1 3 3 1]
c               boundary modification type Dirichlet: 
c               at x=0: stencil = (1/4)*[0 3 3 1];
c               at x=1: stencil = (1/4)*[1 3 3 0]
c   ib(7) = 5:  cell-centered restriction according to equation (5.3.24)
c               of the book
c               stencil = (1/4)*[1 3 3 1]
c               boundary modification type Neumann:
c               at x=0: stencil = (1/4)*[0 4 3 1];
c               at x=1: stencil = (1/4)*[1 3 4 0]
c   ib(7) = 6:  vertex-centered operator-dependent restriction according
c               to equation (5.4.22) of the book
c               stencil = [a1 a2 a3]
c               boundary modification type Neumann: 
c               at x=0: a1 = 0, a2:= 2*a2-a3
c               at x=1: a3=0, a2:=2*a2-a1: NOT IMPLEMENTED
c   ib(7) = 7:  vertex-centered operator-dependent restriction according
c               to equation (5.4.22) of the book
c               stencil = [a1 a2 a3]
c               boundary modification type Dirichlet: 
c               at x=0: a1 = 0; at x=1 : a3 = 0: NOT IMPLEMENTED
c  idim   *   parameter that passes the first dimension of amat
c  iout   i   governs amount of output; iout larger -> more output
c  i2     i   output device number
c  levels i   number of grids in multigrid method; < 16
c  np     i   array of pointers to start of grids and to 1+end of finest grid
c             np(1)=1     : start of coarsest grid
c             np(levels)  : start of finest grid
c             np(levels+1): end of finest grid + 1
c  amat   o   matrix (discretization of differential equation) on coarse grids
c  scpmtx i/o -scpmtx and scpmtx are the assumed lower and upper bound of the 
c             structure of the matrix A. The matrix A on the finest grid is
c             assumed to have three-point stencil, which implies scpmtx=1. It
c             may be changed to 2 if enlargement of stencils of A occurs.
c
c ***************************************************************************
c
c  LOCAL QUANTITIES:
c
      integer ca(-1:1), cab(-1:1), cp(-2:1), cr(-2:1),
     $     i, idx1, idx2, idx3, idx4, ip, k, level, m, n, q
      double precision p(-2:1,1:3), r(-2:1,1:3)
c ***************************************************************************
c
c  ca         characteristic function of fine grid operator A _
c  cab        characteristic function of coarse grid operator A
c  cp         characteristic function of prolongation operator P
c  cr         characteristic function of restriction R
c  i          do-loop variable over the coarse grid points
c  idx1,idx2, scratch variables
c  idx3,idx4 
c  ip         ip=1, if vertex-centered coarsening is used; 
c             ip=0, if cell-centered coarsening is used. 
c             For vertex-centered grid, the starting number of grid points
c             is zero; the final number is the number of grid points
c             minus 1. But for cell-centered grid, the starting number 
c             of grid points is one; the final number is the number of
c             grid points.
c  k          do-loop variable over the structure of matrix A on the 
c             fine grid
c  level      do-loop variable over grid levels
c  m          do-loop variable over the structure of restriction operator
c  n          do-loop variable over the structure of matrix A on the
c             coarse grid
c  q          variable whose range is within the scope of the structure of 
c             the transpose of prolongation operator
c  p          array (-2:1,1:3), stores the elements of prolongation operator;
c             the operator for the interior points are stored in p(*,2); that 
c             for the left boundary in p(*,1) and that for the right boundary 
c             in  p(*,3)
c  r          array (-2:1,1:3), stores the elements of restriction operator;
c             the storage arrangement is similar to p
c
c****************************************************************************
c
c  FUNCTIONS CALLED: none
c
c****************************************************************************
c
c  SUBROUTINES CALLED:
c
c  chrfun: defines the characteristic functions of A, A(bar), P and R, resp.,
c          and computes the lower and upper bound of the structure of A on
c          coarser grids according to the choice of P and R
c  defrp1: defines operator-independent prolongation and restriction operator 
c  defrp2: defines operator-dependent prolongation and restriction operator
c  ibchck: checks whether the choices of ib(6) and ib(7) are consistent, and
c          computes the flag ip
c
c****************************************************************************
c
c  METHOD:
c
c  Galerkin coarse grid approximation according to algorithm CALRAP
c    in section 6.2 of the book
c
c****************************************************************************
c
c Check whether combination of ib(6) and ib(7) is correct and compute ip
c
      call ibchck( i2, ib, ip )
c
c Define operator-independent prolongation and restriction operator
c
      call defrp1( i2, ib, p, r )
c
c Define the characteristic functions of A, A(bar), P and R, and compute the
c possible lower and upper bound of the structure of A on the coaser grid.
c Here, cp and cr are independent of grid level, but cab is not (stencils of A
c could be enlarged). In our case, the bound does not exceed 2.
c  
      call chrfun( i2, ib, ca, cab, cp, cr, scpmtx )
      do 100 level = levels-1,1,-1
c
c Initialize the matrix on the coarse grid to zero
c
        do 5 idx1 = np(level),np(level+1)-1
          do 10 idx2 = -1,1
            amat(idx1,idx2) = 0.d0
   10     continue
    5   continue
c
c CALRAP procedure:
c compute the matrix on the coarse grid
c loop over the structure of A on the coarse grid
c
        do 15 n = -1,1
          if ( cab(n) .eq. 1 ) then
c
c loop over the structure of restriction operator
c
            do 20 m = -2,1
              if ( cr(m) .eq. 1 ) then
c
c loop over the structure of A on the fine grid
c
                do 25 k = -1,1
                  if ( ca(k) .eq. 1 ) then
                    q = m+k-2*n
                    if ( q .ge. -2 .and. q .le. 1 ) then
c
c q belongs to the structure of the transpose of prolongation operator
c
                      if ( cp(q) .eq. 1 ) then
c
c Now idx1 and idx2 are the number of coarse gird and fine grid points, 
c respectively
c
                        idx1 = np(level+1)-np(level)
                        idx2 = np(level+2)-np(level+1)
c
c treat vertex-centered case (grid points denoted by *)
c             numbering on fine grid        *  *  *  *  *  *  *
c                                           0  1  2  3  4  5  6
c             numbering on coarse grid      *     *     *     *
c                                           0     1     2     3
c
                        if ( ip .eq. 1 ) then
c
c loop over the interior coarse grid points
c i is local coarse grid coordinate; see picture above
c
                          do 30 i=1,np(level+1)-np(level)-2
c
c  i+n is a coarse grid point and 2i+m is a fine grid point
c
                            if ( (i+n.ge.0 .and. i+n.le.idx1-1) .and. 
     $                          (2*i+m.ge.0 .and. 2*i+m.le.idx2-1)) then
                              idx3 = np(level)+i
                              idx4 = np(level+1)+2*i+m
c
c if p or r is operator-dependent, call defrp2 (not implemented)
c
c                             call defrp2(i, ib, im, idim, level, ip, 
c                                         np, scpmtx, amat, solut, p, r)
c
                              amat(idx3,n) = amat(idx3,n) + 
     $                                       r(m,2)*amat(idx4,k)*p(q,2)
                            end if
 30                       continue
c
c left boundary point
c
                          i = 0
                          if ( (i+n.ge.0.and.i+n.le.idx1-1) .and.
     $                       (2*i+m.ge.0.and.2*i+m.le.idx2-1) ) then
                            idx3 = np(level)
                            idx4 = np(level+1)+2*i+m
c
c if p or r is operator-dependent, call defrp2 (not implemented)
c
c                           call defrp2( i, ib, im, idim, level, ip, np,
c                                        scpmtx, amat, solut, p, r )
c
c                           operator p for the boundary point is used
c
                            if ( i+n .eq. 0 ) then
                              amat(idx3,n) = amat(idx3,n)+
     $                                       r(m,1)*amat(idx4,k)*p(q,1)
c
c operator p for the interior point is used
c
                            else
                              amat(idx3,n) = amat(idx3,n) +
     $                                       r(m,1)*amat(idx4,k)*p(q,2)
                            end if
                          end if
c
c right boundary point
c
                          i = idx1-1
                          if ( (i+n.ge.0.and.i+n.le.idx1-1) .and.
     $                       (2*i+m.ge.0.and.2*i+m.le.idx2-1)) then
                            idx3 = np(level+1)-1
                            idx4 = np(level+1)+2*i+m
c
c if p or r is operator-dependent, call defrp2 (not implemented)
c
c                           call defrp2( i, ib, im, idim, level, ip,
c     $                                 np, scpmtx, amat, solut, p, r )
c
c operator p for the boundary point is used
c
                            if ( i+n .eq. idx1-1 ) then
                              amat(idx3,n) = amat(idx3,n) +
     $                                       r(m,3)*amat(idx4,k)*p(q,3)
c
c operator p for the interior point is used
c
                            else
                              amat(idx3,n) = amat(idx3,n) +
     $                                       r(m,3)*amat(idx4,k)*p(q,2)
                            end if
                          end if
c
c treat cell-centered case (grid points denoted by *)
c               numbering on fine grid     | * | * | * | * | * | * |
c                                            1   2   3   4   5   6
c               numbering on coarse grid   |   *   |   *   |   *   |
c                                              1       2       3
c
                        else 
c
c loop over the interior coarse grid points. i is local coarse grid 
c coordinate; see picture above
c
                          do 35 i = 2,np(level+1)-np(level)-1
c
c i+n is a coarse grid point and 2i+m is a fine grid point
c
                            if ( (i+n.ge.1.and.i+n.le.idx1) .and.
     $                         (2*i+m.ge.1.and.2*i+m.le.idx2) ) then
                              idx3 = np(level)+i-1
                              idx4 = np(level+1)+2*i+m-1
c
c if p or r is operator-dependent, call defrp2 (not implemented)
c
c                             call defrp2( i, ib, im, idim, level, ip,
c    $                                  np, scpmtx, amat, solut, p, r )
c
c operator p for left boundary point is used
c
                              if ( i+n .eq. 1 ) then
                                amat(idx3,n) = amat(idx3,n) +
     $                                     r(m,2)*amat(idx4,k)*p(q,1)
c
c operator p for right boundary point is used
c
                              else if (i+n .eq. idx1 ) then
                                amat(idx3,n) = amat(idx3,n) +
     $                                      r(m,2)*amat(idx4,k)*p(q,3)
c
c operator p for interior points
c
                              else
                                amat(idx3,n) = amat(idx3,n) +
     $                                      r(m,2)*amat(idx4,k)*p(q,2)
                              end if
                            end if
 35                       continue
c
c left boundary point
c
                          i = 1
                          if ( (i+n.ge.1.and.i+n.le.idx1) .and.
     $                       (2*i+m.ge.1.and.2*i+m.le.idx2) ) then
                            idx3 = np(level)
                            idx4 = np(level+1)+2*i+m-1
c
c if p or r is operator-dependent, call defrp2 (not implemented)
c
c                           call defrp2( i, ib, im, idim, level, ip,
c     $                                 np, scpmtx, amat, solut, p, r )
c
c operator p for the boundary point is used
c
                            if ( i+n .eq. 1 ) then
                              amat(idx3,n) = amat(idx3,n) +
     $                                   r(m,1)*amat(idx4,k)*p(q,1)
c
c operator p for the interior point is used
c
                            else
                              amat(idx3,n) = amat(idx3,n) +
     $                                    r(m,1)*amat(idx4,k)*p(q,2)
                            end if
                          end if
c
c right boundary point
c
                          i = idx1
                          if ( (i+n.ge.1.and.i+n.le.idx1) .and.
     $                       (2*i+m.ge.1.and.2*i+m.le.idx2) ) then
                            idx3 = np(level+1)-1
                            idx4 = np(level+1)+2*i+m-1
c
c if p or r is operator-dependent, call defrp2 (not implemented)
c
c                           call defrp2( i, ib, im, idim, level, ip, np,
c     $                                  scpmtx, amat, solut, p, r )
c
c operator p for the boundary point is used
c
                            if ( i+n.eq.idx1 ) then
                              amat(idx3,n) = amat(idx3,n) +
     $                                    r(m,3)*amat(idx4,k)*p(q,3)
c
c operator p for the interior point is used
c
                            else
                              amat(idx3,n) = amat(idx3,n) +
     $                                   r(m,3)*amat(idx4,k)*p(q,2)
                            end if
                          end if
                        end if
                      end if
                    end if
                  end if
   25           continue
              end if
   20       continue
          end if
   15   continue
c
c Output:
c write the matrix for grid level=level
c
        write(i2,*) 'galerk: matrix at grid=',level
        do 40 i = np(level),np(level+1)-1
          write(i2,'(i3,3e11.3)') 
     $          i-np(level)+1,(amat(i,idx1),idx1 = -scpmtx,scpmtx)
   40 continue
c
  100 continue
c
c end of subroutine galerkin
c
      return
      end
      subroutine gener( i, ib, idim, icase, np, amat, aa, bb, cc, 
     $                  f0, f1, scal, rhs, ss )
c***********************************************************************
c
c  Generate matrix amat (tri-diagonal) on a given grid depending on
c  ib(4)
c
c***********************************************************************
c
c               INPUT / OUTPUT PARAMETERS
c
      implicit none
      integer i, ib(13), idim, icase, np(11)
      double precision aa, amat(1:idim,-1:1), bb, cc, f0, f1,
     $ rhs(1:idim), scal, ss
c
c  i       i  grid number
c  ib      i  see subroutine matrhs
c  idim    i  see subroutine matrhs
c  icase   i  see subroutine matrhs
c  np      i  see subroutine matrhs
c  a          diffusion coefficient (function)
c  aa      i  parameter for diffusion coefficient
c  amat    o  see subroutine matrhs
c  b          velocity coefficient (function)
c  bb      i  parameter for velocity coefficient
c  c          coefficient (function)
c  cc      i  parameter for c coefficient
c  f0      i  see subroutine matrhs
c  f1      i  see subroutine matrhs
c  rhs     o  see subroutine matrhs
c  s          right-hand-side of differential equation (function)
c  scal    i  scaling factor for dirichlet condition in vertex-centered case
c  ss      i  parameter for function s
c
c****************************************************************************
c
c            LOCAL QUANTITIES
c
      integer j
      double precision  h, a, al, ar, b, bl, br, c, fm, fp, om, omdif,
     $ omega, pe, s, w, x
c
c  j          grid point index
c  h          mesh size
c  a          diffusion coefficient (function)
c  al         diffusion coefficient
c  ar         diffusion coefficient
c  b          velocity coefficient (function)
c  bl         velocity coeficient
c  br         velocity coeficient
c  c          coefficient (function)
c  fm         coefficient in flux function: flux = fp*u(j+1) + fm*u(j)
c  fp         see fm
c  om         switching variable between upwind, central, hybrid
c  omdif      switching variable between upwind, central, hybrid
c  omega      switching variable between upwind, central, hybrid (function)
c  pe         mesh peclet number
c  s          right-hand-side of differential equation (function)
c  w          harmonic average of al, ar
c  x          x-coordinate
c
c ***************************************************************************
c
c            FUNCTIONS
c
c  a(icase,aa,x)  see subroutine matrhs
c  b(icase,bb,x)  see subroutine matrhs
c  c(icase,cc,x)  see subroutine matrhs
c  s(icase,ss,x)  see subroutine matrhs
c  omega(pe,type) switching function for hybrid scheme
c
c **************************************************************************
c
c                METHOD
c
c  Finite volume discretization of 1-dimensional second order boundary
c    value problem.
c    See chapter 3 and section 9.7 of the book
c  Upwind, central or hybrid discretization of convection (first order)
c    term, depending on ib(3).
c  Cell-centered or vertex-centered discretization, depending on ib(4)
c
c  Equation: -(ay')'+(by)'+cy = s
c
c  Boundary conditions:
c     Dirichlet: y(0) = f0  or  Neumann: -ay'(0) = f0
c     Dirichlet: y(1) = f1  or  Neumann: +ay'(1) = f1
c     The gridfunction values at the boundary points are not eliminated,
c     as would be possible in the vertex-centered case. Instead,
c     the corresponding equations are multiplied with scal. If scal >> 1
c     then the matrix is made symmetric by adding an artificial element.
c
c  The diffusion coefficient a(x) is allowed to be discontinuous at x = j*h
c    with h the stepsize. The user may put discontinuities elsewhere (and
c    will often have to on some coarse grids if igal = 0) at his own risk.
c
c ***********************************************************************
c
      if ( ib(4) .eq. 1 ) then
c
c=======================================================================
c  Vertex-centered
c=======================================================================
         h = 1d0/(dble( np(i+1) - np(i) ) - 1d0 )
c
         do 10 j = np(i), np(i+1)-1
               x = ( j - np(i) )*h
c
c  visit vertices
c
            amat(j,0) =  c( icase, cc, x ) * h
            rhs(j) =  s( icase, cc, x ) * h
  10     continue
c
c  the boundary finite volumes have half size
c
         amat(np(i),0) = 0.5d0*amat(np(i),0)
         amat(np(i+1)-1,0) = 0.5d0*amat(np(i+1)-1,0)
         rhs(np(i)) = 0.5d0*rhs(np(i))
         rhs(np(i+1)-1) = 0.5d0*rhs(np(i+1)-1)
c
c  visit interior cell interfaces
c
         do 11 j = np(i), np(i+1)-2
            x = (j - np(i) + 0.5)*h
            w = a(icase,aa,x)
            pe = h*b(icase,bb,x)/w
            om = omega (pe, ib(3))
            if ( ib(3).eq.2 .or. ib(3).eq.4 ) then
c
c  diffusion term not switched off
c
              omdif = 0d0
            else
              omdif = om
            endif
            bl = b(icase,bb,x-0.5d0*h)
            br = b(icase,bb,x+0.5d0*h)
c
c  flux = fp*u(j+1) + fm*u(j)
c
            fp =  -w*(1d0-omdif)/h + 0.5d0*((1d0-om)*br+om*(br-abs(br)))
            fm =   w*(1d0-omdif)/h + 0.5d0*((1d0-om)*bl+om*(bl+abs(bl)))
            amat(j,1) = fp
            amat(j+1,-1) =- fm
            amat(j,0) = amat(j,0)+ fm
            amat(j+1,0) = amat(j+1,0) - fp
  11     continue
c
c x = 0: boundary condition
c
         j = np(i)
         amat(j,-1) = 0d0
         if ( ib(1) .eq. 1 ) then
c
c Scale matrix with scal at Dirichlet boundary
c
            amat(j, 0) = scal*a(icase,aa,0d0)/h
            amat(j, 1) = 0d0
            rhs(j) = scal*a(icase,aa,0d0)*f0/h
c
c make matrix symmetric if scal large
c
            if ( scal .ge. 1d6) then
              amat(j,1) = amat(j+1,-1)
            endif
         else
c
c Neumann condition
c
            amat(j,0) = amat(j,0) - b(icase,bb,0d0)
            rhs(j) = rhs(j) + f0
         endif
c
c x = 1: boundary condition
c
         j = np(i+1) - 1
         amat(j,1) = 0d0
         if ( ib(2) .eq. 1 ) then
c
c Scale matrix with scal at Dirichlet boundary
c
            amat(j, 0) = scal*a(icase,aa,1d0)/h
            amat(j, -1) = 0d0
            rhs(j) = scal*a(icase,aa,1d0)* f1/h
c
c make matrix symmetric if scal large
c
            if ( scal .ge. 1d6) then
              amat(j, -1) = amat(j-1,1)
            endif
         else
c
c Neumann condition
c
            amat(j,0) = amat(j,0) + b(icase,bb,1d0)
            rhs(j) = rhs(j) + f1
         endif
      else if ( ib(4) .eq. 2 ) then
c
c=======================================================================
c  Cell-centered
c=======================================================================
c
         h = 1d0/dble( np(i+1) - np(i) )
c
c visit cell centers
c
         do 20 j = np(i), np(i+1)-1
            x = (j - np(i) + 0.5)*h
            amat(j,0) =  c( icase, cc, x ) * h
            rhs(j) =  s( icase, cc, x ) * h
  20     continue
c
c visit interior cell interfaces
c
         do 21 j = np(i), np(i+1)-2
c
c  consider right face of cell j
c
            x = (j - np(i) + 0.5)*h
            al = a(icase,aa,x)
            ar = a(icase,aa,x+h)
            w = 2*al*ar/(al+ar)
            pe = h*b(icase,bb,x+h/2d0)/w
            om = omega (pe, ib(3))
            if ( ib(3).eq.2 .or. ib(3).eq.4 ) then
c
c  diffusion term not switched off
c
              omdif = 0d0
            else
              omdif = om
            endif
            bl = b(icase,bb,x)
            br = b(icase,bb,x+h)
c
c  flux = fp*u(j+1) + fm*u(j)
c
            fp =  -w*(1d0-omdif)/h+ 0.5d0*((1d0-om)*br+om*(br-abs(br)))
            fm =  w*(1d0-omdif)/h+ 0.5d0*((1d0-om)*bl+om*(bl+abs(bl)))
            amat(j,1) = fp
            amat(j+1,-1) =- fm
            amat(j,0) = amat(j,0)+ fm
            amat(j+1,0) = amat(j+1,0) - fp
  21     continue
c
c x = 0: boundary condition
c
         j = np(i)
         amat(j,-1) = 0d0
         w = a(icase,aa,0.5d0*h)
         bl = b(icase,bb,0d0)
         br = b(icase,bb,0.5d0*h)
         pe = h*bl/w
         om = omega (pe, ib(3))
         if ( ib(3).eq.2 .or. ib(3).eq.4) then
c
c  diffusion term not switched off
c
              omdif = 0d0
            else
              omdif = om
            endif
         if ( ib(1) .eq. 1 ) then
c
c Dirichlet condition
c
c  flux = fm*f0 + fp*u(j)
c
            fm = 2d0*w*(1d0-omdif)/h+ (1d0-om)*bl+0.5d0*om*(bl+abs(bl))
            fp = -2d0*w*(1d0-omdif)/h+ 0.5d0*om*(br-abs(br))
         else
c
c Neumann condition
c
c  flux = fm*f0 + fp*u(j)
c
            fm =1d0-omdif+(1d0-om)*0.5d0*pe+om*0.25d0*h*(bl+abs(bl))/w
            fp = (1d0-om)*bl + om*0.5d0*(bl+abs(bl)+br-abs(br))
         endif
         amat(j,0) = amat(j,0) - fp
         rhs(j) = rhs(j) + fm*f0
c
c x = 1: boundary condition
c
         j = np(i+1)-1
         amat(j,1) = 0d0
         w = a(icase,aa,1d0-0.5d0*h)
         bl = b(icase,bb,1d0-0.5d0*h)
         br = b(icase,bb,1d0)
         pe = h*br/w
         om = omega (pe, ib(3))
         if ( ib(3).eq.2 .or. ib(3).eq.4 ) then
c
c  diffusion term not switched off
c
           omdif = 0d0
         else
           omdif = om
         endif
         if ( ib(2) .eq. 1 ) then
c
c Dirichlet condition
c
c  flux = fp*f1 + fm*u(j)
c
            fm = 2d0*w*(1d0-omdif)/h+0.5d0*om*(bl+abs(bl))
            fp = -2d0*w*(1d0-omdif)/h+ 0.5d0*om*(br-abs(br))+(1d0-om)*br
         else
c
c Neumann condition
c
            fp =-1d0+omdif+(1d0-om)*0.5d0*pe+om*0.25d0*h*(br+abs(br))/w
            fm = (1d0-om)*br + om*0.5d0*(bl+abs(bl)+br-abs(br))
         endif
            amat(j,0) = amat(j,0) + fm
            rhs(j) = rhs(j) - fp*f1
      endif
c     end of gener
      end
      subroutine ibchck( i2, ib, ip )
c*****************************************************************************
c
c  This subroutine checks whether combination of ib(6) and ib(7) is correct
c  and compute ip
c
c*****************************************************************************
c
c  Input/Output parameters
c
      implicit none
      integer i2,ib(10),ip
c*****************************************************************************
c
c  i2     i   logical output device number
c  ib     i   for more information, see subroutine galerk
c  ip     o   flag for type of grid used: ip=1, vertex-centered grid is used;
c             ip=0, cell-centered grid is used
c 
c*****************************************************************************
c
c  Check the choice of prolongation
c
      if ( ib(6) .eq. 1 .or. ib(6) .eq. 5 ) then
         ip = 1
         if ( ib(7) .gt. 2 ) then
            write( i2, * ) 
     $      'ibchck: vertex-centered P matches cell-centered R'
            stop
         end if
      else if ( ib(6) .eq. 2 .or. ib(6) .eq. 3 .or. ib(6) .eq. 4 ) then
         ip = 0
         if ( ib(7) .eq. 1 .or. ib(7) .eq. 2
     $          .or. ib(7) .eq.6 .or. ib(7) .eq. 7 ) then
            write(i2,*)
     $      'ibchck: cell-centered P matches vertex-centered R'
            stop
         end if
      end if
c
c Usually, enlargement of stencils is not expected
c
      if ( ib(6) .eq. 3 .or. ib(6) .eq. 4 ) then
         if ( ib(7) .ne. 3 ) write( i2, * )
     $      'ibchck: stencil enlarged under RAP, you really want so?'
      end if
c
c end of subroutine ibchck
c
      return
      end
      subroutine info(icase,ib)
c***********************************************************************
c
c   User provided subroutine to print information concerning mglab
c
c***********************************************************************
      integer icase, ib(*)
      if (icase.eq.1) then
      else
      endif
c     end of info
      end
      subroutine inout( i1, i2 )
c***********************************************************************
c
c   Open the mglab input and output files
c
c***********************************************************************
      implicit none
      integer i1, i2

      open( i1, file='mglab.dat' )
      open( i2, file='mglab.res' )

      end
      subroutine input( i1, i2, ib, icase, igal, iout, ism, levels, nc,
     $                  nmg, ns, aa, bb, cc, delta, f0, f1, om, spar,
     $                  ss, tol, cycle )
c
c ***************************************************************************
c
c   read and write the user provided input for mglab
c
c ***************************************************************************

      implicit none
      integer ib(13), icase, igal, iout(4), ism, i1, i2, levels, nc, 
     $        nmg, ns(2)
      double precision aa, bb, cc, delta, f0, f1, om, spar, ss, tol
      character cycle
c ***************************************************************************
c
c       Description of parameters in main program
c       This subroutine reads and prints input
c
c ***************************************************************************
      integer i
      character*32 string

      read( i1, '(a)' ) string
      read( i1, * ) ( ib(i), i=1,13 )
      read( i1, '(a)' ) string
      read( i1, * ) icase, igal, ( iout(i), i=1,4 ), ism, levels
      read( i1, '(a)' ) string
      read( i1, * ) nc, nmg, ns(1), ns(2)
      read( i1, '(a)' ) string
      read( i1, * ) aa, bb, cc, ss, f0, f1
      read( i1, '(a)' ) string
      read( i1, * ) om, spar, delta, tol
      read( i1, '(a)' ) cycle
      write( i2, '(a)' ) 
     $ '********** TUTORIAL MULTIGRID LABORATORY ********************'
      write( i2, '(a)' ) ' '
      write( 12, '(a)' )
     $ '   ib1  ib2  ib3  ib4  ib5  ib6  ib7  ib8  ib9 ib10 ib11 ib12  i
     $b13'
      write( i2, '(13i5)' ) (ib(i),i=1,13)
      write( i2, '(a)' ) 'icase igal iout              ism levels   nc'
      write( i2, '(9i5)' ) icase, igal, ( iout(i), i=1,4),ism,levels,nc
      write( i2, '(a)' ) 'nmg  ns(1) ns(2)'
      write( i2, '(3i5)' ) nmg, ns(1), ns(2)
      write( i2, '(a)' ) 
     $       '   aa        bb        cc        ss        f0       f1'
      write( i2, '(6E10.3)' ) aa, bb, cc, ss, f0, f1
      write( i2, '(a)' ) '   om        spar      delta     tol   '
      write( i2, '(4E10.3)' ) om, spar, delta, tol
      write( i2, '(a,a)' ) '  cycle = ', cycle
      if( levels .gt. 10 ) then
         call err( i1, i2, 1 )
      endif
c     end of input
      end
      subroutine matout( i2, idim, k, np, a )
c **************************************************************************
c
c     prints matrix a  on grid k
c
c ****************************************************************************
c
c                  INPUT  PARAMETERS
c
      implicit none
      integer idim, i2, k, np(11)
      double precision a(idim,3)
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i, n, m
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c                none
c ***************************************************************************
c
c                 I/O
c    output goes to logical unit number i2
c
c ***************************************************************************
      write( i2, * ) ' matrix at grid', k
      n = np(k+1)-np(k)
      m = n/4
      do 10 i= np(k),np(k+1)-1
         write( i2, '(i3,3e11.3)' ) i-np(k)+1, a(i,1), a(i,2), a(i,3)
  10  continue
c     end of matout
      end
      subroutine matrhs(ib, icase, idim, igal, iout, i2, levels, nc, 
     $                  np, aa, amat, bb, cc, f0, f1, rhs, ss )
c **************************************************************************
c
c  Discretization of 1-dimensional boundary value problem on finest or on
c   all grids for application of multigrid methods
c   domain: 0 < x < 1
c   -d/dx(adu/dx) + d/dx(bu) + cu = s
c
c **************************************************************************
c
c                  INPUT / OUTPUT PARAMETERS
c
      implicit none
      integer ib(10), icase, idim, igal, iout, i1, i2, levels, nc, 
     $        np(11)
      double precision aa, amat(idim,3), bb, cc, f0, f1, rhs(idim), ss
c
c  aa      i  governs the diffusion coefficient in the diff. equation
c  amat    o  discretization matrix
c  bb      i  governs the velocity coefficient in the diff. equation
c  cc      i  governs the third coefficient in the diff. equation
c  f0      i  value for dirichlet or neumann boundary condition at x = 0
c  f1      i  value for dirichlet or neumann boundary condition at x = 1
c  s       o  right-hand-side
c  a       i  diffusion constant (function)
c  ss      i  governs the right-hand-side in the diff. equation
c  ib      i  governs type of boundary conditions and discretzation of
c             convection term and choice between vertex-centered and
c             cell-centered discretization
c             ib(1) = 1: Dirichlet at x = 0
c             ib(1) = 2: Neumann   at x = 0
c             ib(2) = 1: Dirichlet at x = 1
c             ib(2) = 2: Neumann   at x = 1
c             ib(3) = 1: central
c             ib(3) = 2: upwind
c             ib(3) = 3: hybrid with diffusion term deleted when switched to
c                        upwind
c             ib(3) = 4: hybrid with diffusion term maintained when switched
c                        to upwind
c             ib(4) = 1: vertex-centered
c             ib(4) = 2: cell-centered
c  icase   i  identification number given by user to his problem
c  idim    *  parameter that passes the first dimension of amat to gener
c  igal    i  igal = 0: matrices are made on all grids
c             igal = 1: matrix is made on finest grid only, and coarse
c                       grid matrices are initialized to 0d0
c  iout   i   governs amount of output; iout larger -> more output
c  i2     i   output device number
c  levels i   number of gids in multigrid method; < 16
c  nc     i   number of grid points on coarsest grid
c  np     i   array of pointers to start of grids and to 1+end of finest grid
c             np(1)=1     : start of coarsest grid
c             np(levels)  : start of finest grid
c             np(levels+1): end of finest grid + 1
c  amat   o   matrix (discretization of differential equation) on finest
c             grid  and initialized to 0.d0 on the other grids, or matrices
c             on all grids, depending on igal
c  f0     i   boundary value at x = 0
c  f1     i   boundary value at x = 1
c  rhs    o   right-hand-side on finest grid
c
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i, j
      double precision scal
c
c  i,j     general loop variables or scratch variables
c  scal    see subroutine gener
c
c ***************************************************************************
c
c                 I/O
c
c  output file: discr.res  device number = i2   free format
c
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c  gener(i,ib,idim,icase,np,amat,aa,bb,cc,f0,f1)  generates matrix on grid i
c   
c  info(icase,ib)  user-provided subroutine that prints information about
c                   his case; body may be empty
c
c  matout(i2,idim,levels,np,amat)  prints amat for inspection
c
c  err(i)         gives error messages
c
c ***************************************************************************
c
c                METHOD
c
c  See subroutine gener
c
c ************************************************************************
      scal = 2d0
      if ( igal .eq. 1 ) then
c
c Galerkin: generate finest grid matrix
c
         call gener( levels, ib, idim, icase, np, amat, aa, bb, cc, 
     $               f0, f1, scal, rhs, ss )
c
c Initialize amat for coarse grids
c
         do 11 j = 1, 3
            do 10 i = 1, np(levels) - 1
               amat(i,j) = 0d0
   10       continue
   11    continue
      else if ( igal .eq. 0 ) then
c
c Generate grids for all levels
c
         do 12 i = 1, levels
            call gener( i, ib, idim, icase, np, amat, aa, bb, cc, 
     $                   f0, f1, scal, rhs, ss )
   12    continue
      else
c
c Wrong value of igal
c
         call err( i1, i2, 5 )
      endif
      if ( iout .gt. 2 ) then
         do 13 i = 1, levels
            call matout( i2, idim, i, np, amat )
  13     continue
      endif
      if ( iout .gt. 2 ) then
         write(i2,*)'Right hand side'
         call vecout( i2, idim, levels, np, rhs )
      endif
c     end of matrhs
      end
      subroutine mg( amat, b, cycle, delta, histry, ib, idim, ihis,
     $               iout, it, ism, i2, levels, nmg, np, ns, om, r, s, 
     $               tol, u, ue, ut, v, wu )
c
c **************************************************************************
c
c  Nonlinear multigrid algorithm as described in section 8.7 of the book.
c  The problem to be solved is assumed to be linear.
c
c **************************************************************************
c
c                  INPUT / OUTPUT PARAMETERS
c
      implicit none
      integer histry(2,1000), ib(10), idim, iout(4), ism, it, i2,
     $        levels, np(11), nmg, ns(2)
      double precision amat(idim,3), b(idim), delta, om, r(idim), s(10),
     $                 tol, u(idim), ue(idim), ut(idim), v(idim), wu(10)
      character cycle
c
c  amat     i   matrices on all grids
c  b        i   right-hand-sides on grid(levels)
c               Remark. The right-hand-sides on the coarse grids, called
c                     f in subroutine MG in section 8.7 in the book, are
c                     also stored in b
c  cycle    i   governs type of multigrid schedule
c                 cycle = 'A' : adaptive schedule
c                 cycle = 'F' : F-cycle
c                 cycle = 'V' : V-cycle
c                 cycle = 'W' : W-cycle
c  delta    i   used in setting tolerance eps in adaptive cycle according
c                 to equation (8.3.1) in the book
c  histry  i,o gives sequence of grids visited and number of smoothings
c                carried out during each visit
c                histry(1,i) : grid number
c                histry(2,i) : number of smoothings carried out at
c                  visit to grid(histry(1,i))
c  ib       i   ib(5) governs updating of ut -- see information on ut
c               ib(6) chooses type of prolongation
c               ib(7) chooses type of restriction for residual
c               ib(8) chooses type of restriction for ut
c  idim     i   size of matrix and grid vectors
c  iout     i   governs output
c                 iout(2) governs diagnostic output and output in
c                    subroutine smooth
c                 iout(3) governs performance output
c  ism      i   chooses smoothing method
c  it       o   gives current multigrid iteration number
c  i2       i   logical unit number
c  levels   i   total number of grids
c                 1 : coarsest grid;  levels: finest grid
c  nmg      i   maximum number of iterations
c  np       i   array of pointers to start of grids and to 1+end of finest
c                 grid
c                 np(1)=1     : start of coarsest grid
c                 np(levels)  : start of finest grid
c                 np(levels+1): end of finest grid + 1
c  ns       i   ns(1):  aantal pre-smoothing iteraties
c               ns(2):  number of post-smoothing iterations
c  om       i   underrelaxation parameter in smoothing method
c  r        *   residual b-Au (local)
c  s        i   underrelaxation parameters in the nonlinear multigrid
c                 algorithm. See algorithm TG in section 8.2 of the book.
c  tol      i   accuracy requirement:     |Au-b)| < tol*|b|
c  u        o   new approximation of solution
c  ue       i   exact discrete solution
c  ut       i   u~ in nonlinear multigrid algorithm.
c                 On coarse grids input is optional.
c                 It may or may not be updated during multigrid iteration
c                 depending on ib(5) = 1 : ut is updated else not.
c                 For linear problems the treatment of ut makes no difference.
c                 If ut is input on coarse grids it may
c                 have been obtained from nested iteration
c  v        i   initial approximation of solution.
c  wu       i,o wu(k) = work units expended on grid k
c
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      logical goon, finer, test
      integer i, igamma, ihis, n(10), k
      double precision aa, eps(10), t(10), res0, res1, res2, rfac, anorm
c
c  ****       Remark. The right-hand-sides on the coarse grids, called
c                     f in subroutine MG in section 8.7 in the book, are
c                     stored in b
c  finer      true:   go to finer grid
c             false:  go to coarser grid
c  goon       if (goon) then continue multigrid iteration else terminate
c  test       governs output
c  aa         scratch space
c  i          general loop variable or scratch variable
c  igamma     maximum number of coarse grid corrections
c  ihis       counter for histry array
c  k          general loop variable or scratch variable
c  n          counter of coarse grid iterations
c  eps        bound for residual norm to be reached on finest grid
c               and bound for residual norm to be reached on coarse grids
c               with adaptive schedule
c  t          t(k) < 0 implies coarse grid convergence within tolerance
c                in adaptive schedule
c  res0      l2-norm of initial residual on finest grid
c  res1      l2-norm of old residual on finest grid
c  res2      l2-norm of new residual on finest grid
c  rfac      reduction factor of l2-norm of residual
c
c ***************************************************************************
c
c                 I/O
c
c  output goes to logical unit number i2
c
c ***************************************************************************
c
c            FUNCTIONS
c
c  anorm
c
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c  axplsy copy diagn dirsol errmg prol resid restri scal smooth
c  
c ***************************************************************************
c
c                METHOD
c
c  Nonlinear multigrid algorithm based on structure diagrams given in
c  section 8.7 of the book.
c  The structure diagram has been modified slightly in order to obtain a
c  correct F-cycle.
c  An additional termination criterion has been built in, namely that the
c    residual on the finest grid satisfies  |Au-b)| < tol*|b|.
c  Additional statements for output, testing etc. that are not included in the
c    structure diagram are bracketed by c1******* and c2*******.
c  The problem to be solved is assumed to be linear.
c  The rudiments of CPU time measurements are included and are commented out
c  by ctime
c ***************************************************************************
c
c         STRUCTURE DIAGRAM OF NONLINEAR MULTIGRID ALGORITHM
c         Implemented in subroutine mg
c         The while statement has been implemented in the standard way with
c           a goto
c
c #==========================================================================#
c #  Choose ut(k), k = 1,2,...K            /* K is index of finest grid */   #
c #  Initialize v(K) = ut(K)               /* K is called levels        */   #
c #__________________________________________________________________________#
c # \                                                                       /#
c # F \                          cycle eq A                               / T#
c #_____\_______________________________________________________________/____#
c # \                                /|   gamma = 7                          #
c # F \       cycle eq V           / T|   eps(K) = tol*|b|                   #
c #_____\________________________/____|   r(K) = A*v -b                      #
c #  gamma = 2      |  gamma = 1      |   t(K) = |r| - eps                   #
c #_________________|_________________|______________________________________#
c #  k = K                                                                   #
c #  n(K) = nmg                                                              #
c #__________________________________________________________________________#
c # \                                                                       /#
c # F \                      cycle eq A                                   / T#
c #_____\_______________________________________________________________/____#
c #  goon = n(K) ge 0                 |    goon = n(K) ge 0 and t(K) gt 0    #
c #___________________________________|______________________________________#
c #  while (goon) do                                                         #
c #  ________________________________________________________________________#
c #  |  finer = n(k) eq 0  or  k eq 1                                        #
c #  |-----------------------------------------------------------------------#
c #  | \                                                                   / #
c #  | F \                      cycle eq A                               / T #
c #  |_____\___________________________________________________________/_____#
c #  |                                |     finer = finer  or  t(K) lt 0     #
c #  |-----------------------------------------------------------------------#
c #  | \                                                                   / #
c #  | F \                        finer                                  / T #
c #  |_____\___________________________________________________________/_____#
c #  |  S ( v ,u, b )                   |\                                  /#
c #  | /*  S: smoothing          */     |F \            k eq 1            / T#
c #  | /*  v:old; u:new; b:rhs   */     |____\__________________________/____#
c #  |  r = b - A*u                     |    |         direct solution       #
c #  |  Choose ut                       |    |-------------------------------#
c #  |  r(k-1) = Rr                     |    | \                           / #
c #  |  b(k-1) = A*ut + s*r(k-1)        |    | F \      cycle eq F       / T #
c #  |  v(k-1) = ut                     |    |_____\___________________/_____#
c #  |----------------------------------|    |      |    gamma = 1           #
c #  | \                              / |__ _|______|________________________#
c #  | F \       cycle eq A         / T | \                                / #
c #  |___|________________________/_____| F \           k lt K           / T #
c #  |   | t(k-1)=|r(k)|-eps(k)         |_____\________________________/_____#
c #  |   | eps(k-1)=delta*s(k-1)*|r(k)| |     |                              #
c #  |___|______________________________|     |         k =k + 1             #
c #  |         k = k - 1                |     |         u=u+P(u-ut)/s        #
c #  |         n(k) = gamma             |     |         S ( u, v, b )        #
c #  |                                  |_____|______________________________#
c #  |                                  |   n(k) = n(k) - 1                  #
c #  |                                  |   u = v                            #
c #  |                                  |------------------------------------#
c #  |                                  | \                                / #
c #  |                                  | F \       cycle eq A           / T #
c #  |                                  |_____\________________________/_____#
c #  |                                  |      |   r = b - Au                #
c #  |                                  |      |   t = |r| - eps             #
c #  |                                  |------------------------------------#
c #  |                                  | \                                / #
c #  |                                  | F \    cycle eq F and k eq K   / T #
c #  |                                  |_____\________________________/_____#
c #  |                                  |      |        gamma = 2            #
c #  |__________________________________|______|_____________________________#
c #  | \                                                                   / #
c #  | F \                       cycle eq A                              / T #
c #  |_____\___________________________________________________________/_____#
c #  |  goon = n(K) ge 0                |  goon = t(K) gt 0  and  n(K) ge 0  #
c #==========================================================================#
c
c ***************************************************************************
c
c Reset the time
c
ctime      t0 = time
c
      test = .false.
      do 2 i = 1, 10
         s(i) = 1d0
    2 continue
      if ( cycle .eq. 'A' ) then
c
c Adaptive schedule : max number of coarse grid corrections allowed is 7
c
         igamma = 7
         eps(levels) = tol * anorm( 2, idim, b, levels, np )
         call resid( idim, levels, np, b, amat, v, r )
         t(levels) = anorm( 2, idim, r, levels, np ) - eps(levels)
      else if ( cycle .eq. 'V' ) then
         igamma = 1
      else if ( cycle .eq. 'W' .or. cycle .eq. 'F' ) then
         igamma = 2
      else
         call errmg ( i2, 1 )
      endif
      k = levels
      n(levels) = nmg

c1**************************
      call resid( idim, levels, np, b, amat, v, r )
      res0 = anorm( 2, idim, r, levels, np )
      res1 = res0
      res2 = res1
      write( i2, '(a,e11.3)' ) ' l2-norm of initial residual',res1
c2**************************

      if ( cycle .eq. 'A' ) then
         goon = ( n(levels) .ge. 0 .and.  t(levels) .gt. 0d0 )
      else
         goon = ( n(levels) .ge. 0 )
      endif

   10 if ( goon ) then
         finer = ( n(k) .eq. 0 .or. k .eq. 1 )
         if ( cycle .eq. 'A' ) then
            finer = ( finer .or. t(k) .le. 0d0 )
         endif
         if ( finer ) then

c1************
            if (test) then
             write(i2,*) ' GOING TO A FINER GRID (from k = ',k,' )'
            endif
c2***********

            if ( k .eq. 1 ) then

c1**************************
               if ( iout(2) .gt. 1 ) then
c
c norm of right-hand-side on coarsest grid
c
                  call diagn( 1, idim, i2, iout(2), k, np, b )
               endif
c2**************************

               call dirsol( idim, i2, k, np, amat, u, b )

c1*****************
cc possibility of non-acurate solving on coarse grid instead of solving
cc with dirsol
cc
cc             call smooth( i2, idim, iout(2), ism, k, np, ns(2),
cc   $                         amat, levels, om, b, v, u )
cc
               if (test) then
                write(i2,*)' k=',k,' solution  A'
                call vecout(i2,idim,k,np,u)
               endif
c2*****************
               if ( cycle .eq. 'F' ) then
                  igamma = 1
               endif
c1**************************
               if ( ihis .lt. 1000 ) then
                  ihis = ihis + 1
                  histry(1,ihis) = k
                  histry(2,ihis) = 1
c                  write( i2, * ) ' history: level = ',histry(1,ihis),
c     $                           ' smoothings = ',histry(2,ihis)
c               else
c                  write( i2, * ) ' array histry too small'
               endif
               if ( iout(2) .gt. 1 ) then
c  
c norm of residual after solution on coarsest grid
c 
                  call resid( idim, k, np, b, amat, u, r )
                  call diagn( 2, idim, i2, iout(2), k, np, r )
               endif
               wu(k) = wu(k) + 2d0**(k-levels)
ct1
               if (test) then
                call resid( idim, k, np, b, amat, u, r )
                write(i2,*)' k=',k,' residual  B'
                call vecout(i2,idim,k,np,r)
               endif
ct2
c2**************************
            endif
            if ( k .lt. levels ) then
c
c go to finer grid
c
c1**************************
               if ( iout(2) .gt. 3 ) then
c
c current solution just before move to finer grid
c
                  call diagn( 3, idim, i2, iout(2), k, np, u )
               endif
               if (test) then
                write(i2,*)' k=',k,' solution before move to finer  C'
                call vecout(i2,idim,k,np,u)
               endif
c2****************************
               k = k + 1
c
c start of part B of multigrid algorithm
c u = u -ut
c
               call axplsy( idim, k-1, np, -1d0, ut, u )
c1**************************
               if ( iout(2) .gt. 3 ) then
c
c correction just before move to finer grid
c
                  call diagn( 4, idim, i2, iout(2), k-1, np, u )
               endif
               if (test) then
                write(i2,*)' k=',k,' correction before move to finer  D'
                call vecout(i2,idim,k-1,np,u)
               endif
c2**************************
c
c  v = Pu
c
               call prol( amat, ib(6), idim, i2, k, np, u, v )
c1**************************
               if ( iout(2) .gt. 3 ) then
c
c prolongated correction just after move to finer grid
c
                  call diagn( 5, idim, i2, iout(2), k, np, v )
               endif
               if (test) then
                write(i2,*)' k=',k,' prolongated correction  E'
                call vecout(i2,idim,k,np,v)
               endif
c2**************************
c
c  u = v/s + u
c
               call axplsy( idim, k, np, 1d0/s(k-1), v, u )
c1**************************
               if ( iout(2) .gt. 1 ) then
c
c residual after coarse grid correction
c
                  call resid( idim, k, np, b, amat, u, r )
                  call diagn( 6, idim, i2, iout(2), k, np, r )
               endif
               if ( iout(2) .gt. 3 ) then
c
c current solution after coarse grid correction
c
                  call diagn( 7, idim, i2, iout(2), k, np, u )
               endif
               if (test) then
                write(i2,*)' k=',k,' current sol. after cg corr.  F'
                call vecout(i2,idim,k,np,u)
               endif
c2**************************
c
c now follows post-smoothing
c
ctime          t1 = time
c
c  u is old, v is new approximation
c
               call smooth( i2, idim, iout(2), ism, k, np, ns(2),
     $                         amat, levels, om, b, u, v)
c1******************************
               if (test) then
                write(i2,*)' k=',k,' sol. after post smoothing  G'
                call vecout(i2,idim,k,np,v)
               endif
ctime          t2 = time
ctime          t2 = (t2-t1)/ns(2)
ctime          write( i2, e11.3 ) ' CPU time for 1 WU =',t2
               if ( ihis .lt. 1000 ) then
                  ihis = ihis + 1
                  histry(1,ihis) = k
                  histry(2,ihis) = ns(2)
c                  write( i2, * ) ' history: level = ',histry(1,ihis),
c     $                           ' smoothings = ',histry(2,ihis)
c               else
c                  write( i2, * ) ' array histry too small'
               endif
               if ( iout(2) .gt. 1 ) then
c
c residual after post-smoothing
c
                  call resid( idim, k, np, b, amat, v, r )
                  call diagn( 8, idim, i2, iout(2), k, np, r )
               endif
               if (test) then
                write(i2,*)' k=',k,' residual after post smoothing  H'
                call resid( idim, k, np, b, amat, v, r )
                call vecout(i2,idim,k,np,r)
               endif
               wu(k) = wu(k) + ns(2)*(2d0**(k-levels))
c2**************************
c
c end of part B of multigrid algorithm
c
            endif
            n(k) = n(k) - 1
c
c  u = v
c
            call copy( idim, k, np, u, v )
c1**************************
c            if ( ib(5) .eq. 1 ) then
c
c ut = u
c
c               call copy( idim, k, np, ut, u )
c            endif
c2******************************
            if ( cycle .eq. 'A' ) then
               call resid( idim, k, np, b, amat, u, r )
               t(k) = anorm( 2, idim, r, k, np ) - eps(k)
            endif
            if (k.eq.levels .and. cycle.eq.'F') then
               igamma = 2
            endif
         else
c
c go to coarser grid
c
c1*************************
            if (test) then
              write(i2,*) ' GOING TO A COARSER GRID (from k = ',k,' )'
            endif
            if ( iout(2) .gt. 1 ) then
c
c              residual before pre-smoothing
c
               call resid( idim, k, np, b, amat, v, r )
               call diagn( 9, idim, i2, iout(2), k, np, r )
            endif
c2**************************
c
c now follows pre-smoothing
c
c1****************************
               if (test) then
              write(i2,*) ' grid number',k,' v before presmoothing  I'
                 call vecout( i2, idim, k, np, v )
                 call resid( idim, k, np, b, amat, v, r )
                 write(i2,*)' k=',k,' residual before presmoothing  J'
                 call vecout(i2,idim,k,np,r)
               endif
c2******************************
            call smooth( i2, idim, iout(2), ism, k, np, ns(1),
     $                      amat, levels, om, b, v, u )
c1******************************
            if (test) then
             write(i2,*) ' solution after presmoothing            K'
             call vecout( i2, idim, k, np, u )
            endif
c2*******************************
            call resid( idim, k, np, b, amat, u, r )
c1********************************
            if (test) then
             write(i2,*) ' residual after presmoothing            L'
             call vecout( i2, idim, k, np, r )
            endif
            if ( ihis .lt. 1000 ) then
               ihis = ihis + 1
               histry(1,ihis) = k
               histry(2,ihis) = ns(1)
c               write( i2, * ) ' history: level = ',histry(1,ihis),
c     $                        ' smoothings = ',histry(2,ihis)
c            else
c               write( i2, * ) ' array histry too small'
            endif
            if ( iout(2) .gt. 1 ) then
c
c print residual after pre-smoothing
c
               call diagn( 10, idim, i2, iout(2), k, np, r )
            endif
            wu(k) = wu(k) + ns(1)*(2d0**(k-levels))
c2**************************

            if ( ib(5) .eq. 1 ) then
c
c ut = restriction(u)
c
               call restri( ib(8), idim, i2, k, np, ut, u )
            endif
            call restri( ib(7), idim, i2, k, np, r, r )

c1**************************
            if (test) then
            write(i2,*) ' restriction of residual after presmoothing M'
             call vecout( i2, idim, k-1, np, r )
            endif
c2**************************

c
c  r = -s*r
c
            call scal( idim, k-1, np, -s(k-1), r )
c
c  b = r -amat*ut
c
            call resid( idim, k-1, np, r, amat, ut, b )
c
c  b = -b
c
            call scal( idim, k-1, np, -1d0, b )
            call copy( idim, k-1, np, v, ut)

c1****************************
            if (test) then
             write(i2,*) ' righthandside for coarser grid          N'
             call vecout( i2, idim, k-1, np, b )
             write(i2,*) ' end of part A of multigrid algorithm'
            endif
c2*****************************

c
c end of part A of multigrid algorithm
c
            if ( cycle .eq. 'A' ) then
               aa = anorm( 2, idim, r, k, np )
               t(k-1) = aa - eps(k)
               eps(k-1) = delta*s(k-1)*aa
            endif
            k = k - 1
            n(k) = igamma
         endif
         if ( cycle .eq. 'A' ) then
            goon = ( t(levels) .gt. 0d0 .and. n(levels). ge. 0 )
         else
            goon = (n(levels) .ge. 0 )
         endif

c1**************************
         if (k.eq.levels) then
               call copy(idim,k,np,ut,u)
               if (test) then
                  write(i2,*) ' new ut'
                  call vecout(i2,idim,k,np,ut)
               endif
               if ( iout(3) .gt. -1 ) then
                  call resid( idim, levels, np, b, amat, u, r )
                  res1 = res2
                  res2 = anorm( 2, idim, r, levels, np )
                  rfac = max(1d-20,res2)/max(1d-20,res1)
                  it = nmg - n(levels)
                  if (it .le. nmg) then
                    write( i2, '(a,i2,a,e11.3)' ) ' iteration number ',
     $                 it, ' reduction factor ', rfac
                    write( i2,'(a,e11.3)')' l2-norm of residual ',res2
                  endif
               endif
               if ( iout(2) .gt. 1 ) then
c
c error at moment of leaving finest grid
c
                  call axplsy( idim, k, np, -1d0 , ue, v )
                  call diagn( 11, idim, i2, iout(2), k, np, v )
                  call copy( idim, k, np, v, u )
               endif
         endif
c2**************************

         goto 10
      endif

c1**************************
      call resid( idim, levels, np, b, amat, u, r )
      res2 = anorm( 2, idim, r, levels, np )
      write(i2,'(a,e11.3)')' l2-norm of res. when leaving mg ',res2
      it = nmg - n(levels)-1
      rfac = (max(1d-12,res2)/max(1d-12,res0))**(1d0/it)
      write(i2,'(a,i2,a,e11.3)')' total iterations ',it,
     $                     ' average reduction factor ', rfac
      if (iout(2) .gt. 0) then
        write(i2,*)' Schedule history'
        do 20 i =1, ihis
          write(i2,'(2i3)') histry(1,i),histry(2,i)
  20    continue
      endif
      if ( iout(2) .gt. 1 ) then
c
c final error
c
         call copy( idim, k, np, v, u )
         call axplsy( idim, k, np, -1d0, ue, v )
         call diagn( 12, idim, i2, iout(2), k, np, v )
      endif
ctime t2 = time
ctime t2 = t2 - t0
ctime write(i2,'(a,e11.3)')' total CPU time = ',t2
c2**************************
c     end of mg
      end

      subroutine nested( amat, b, cycle, delta, histry, ib, idim, ihis,
     $                   iout,  it, ism, i2, levels, nmg, np, ns, om, r, 
     $                   s, tol, u, ue, ut, v, wu )
c **************************************************************************
c
c  Nested iteration algorithm as described in section 8.4 of the book.
c
c **************************************************************************
c
c                  INPUT / OUTPUT PARAMETERS
c
      implicit none
      integer histry(2,1000), ib(13), idim, ihis, iout(4), ism, it, i2,
     $        levels, np(11), nmg, ns(2)
      double precision amat(idim,3), b(idim), delta, om, r(idim), s(10),
     $                 tol, u(idim), ue(idim), ut(idim), v(idim), wu(10)
      character cycle
c
c  amat     i   matrices on all grids
c  b        i   right-hand-side on grid(levels)
c           *   the right-hand-sides on coarse grids are computed by
c               restriction
c  cycle    i   passed to subroutine mg
c  delta    i   passed to subroutine mg
c  histry  o   output of subroutine mg
c  ib       i   passed to subroutine mg
c               ib(11) number of multigrid iterations used in nested
c                 iteration (=gamma~ in program 1 of section 8.4)
c               ib(12) chooses type of prolongation in nested iteration
c               ib(13) chooses type of restriction for coarse right-hand-
c                 sides in nested iteration
c  idim     i   size of matrix and grid vectors
c  ihis     i,o counter for histry array
c  iout     i   governs output
c                 iout(2) governs diagnostic output and output in
c                    subroutine smooth
c                 iout(3) governs performance output
c  ism      i   passed to subroutine mg
c  it       o   passed to subroutine mg
c  i2       i   logical unit number
c  levels   i   total number of grids
c                 1 : coarsest grid;  levels: finest grid
c  np       i   array of pointers to start of grids and to 1+end of finest
c                 grid
c                 np(1)=1     : start of coarsest grid
c                 np(levels)  : start of finest grid
c                 np(levels+1): end of finest grid + 1
c  ns       i   passed to subroutine mg
c  om       i   passed to subroutine mg
c  r        *   passed to subroutine mg
c  s        i   passed to subroutine mg
c  tol      i   passed to subroutine mg
c  u        o   new approximation of solution
c  ue       i   exact discrete solution
c  ut       i,o initial and final approximations on all grids
c  v        *   scratch space (local)
c  wu       i,o   wu(k) = work units expended on grid k
c
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer  k
c
c ***************************************************************************
c
c                 I/O
c
c  output goes to logical unit number i2
c
c ***************************************************************************
c
c            FUNCTIONS none
c
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c   copy  mg  prol  restri  smooth
c  
c ***************************************************************************
c
c                METHOD
c
c  Nested iteration according to program 1 of section 8.4
c  Additional statements for output, testing etc. that are not included in the
c    description given in the book are bracketed by c1******* and c2*******.
c  The problem to be solved is assumed to be linear.
c  The rudiments of CPU time measurements are included and are commented out
c  by ctime
c
c ****************************************************************************
c
c Reset the time
c
ctime      t0 = time
c
c compute coarse right-hand-sides
c
      do 10 k = levels, 2, -1
         call restri( ib(13), idim, i2, k, np, b, b )
  10  continue
c
c1**************************
      if ( iout(2) .gt. 1 ) then
c
c norm of right-hand-side on coarsest grid
c
         write(i2,*)' nested iteration, grid 1'
         call diagn( 1, idim, i2, iout(2), 1, np, b )
      endif
c2**************************
c
      call smooth( i2, idim, iout, ism, 1, np, ns, amat, levels,
     $                   om, b, ut, v )
      call copy( idim, 1, np, ut, v )
c
c1**************************
      if ( iout(2) .gt. 1 ) then
c  
c norm of residual after solution on coarsest grid
c 
         call resid( idim, 1, np, b, amat, ut, r )
         write(i2,*)' nested iteration, grid 1'
         call diagn( 8, idim, i2, iout(2), 1, np, r )
      endif
      k = 1
      wu(k) = wu(k) + 2d0**(k-levels)
      if ( ihis .lt. 1000) then
         ihis=ihis+1
         histry(1,ihis)=k
         histry(2,ihis)=1
      else
         write(i2,*)' array histry too small'
      endif
c2**************************
c
      do 20 k = 2, levels
         call prol( amat, ib(12), idim, i2, k, np, ut, ut )
c
c1**************************
         if ( iout(2) .gt. 1 ) then
c  
c norm of residual before multigrid improvement
c 
            call resid( idim, k, np, b, amat, ut, r )
            write(i2,*)
     $          ' nested iteration, grid ',k, 'res. before mg corr.'
            call diagn( 13, idim, i2, iout(2), k, np, r )
         endif
c2**************************
c
         call copy( idim, k, np, v, ut )
         call mg( amat, b, cycle, delta, histry, ib, idim, ihis, iout,
     $               it, ism, i2, k, ib(11), np, ns, om, r, s, tol,
     $               u, ue, ut, v, wu )
         call copy( idim, k, np, ut, u )
c
c1**************************
         if ( iout(2) .gt. 1 ) then
c  
c norm of residual after multigrid improvement
c 
            call resid( idim, k, np, b, amat, ut, r )
            write(i2,*)
     $         ' nested iteration, grid ',k, 'res. after mg corr.'
            call diagn( 13, idim, i2, iout(2), k, np, r )
         endif
c2**************************
c
   20 continue
c
c end of nested
c
      end
      double precision function omega( p, t )
c***********************************************************************
c
c   Switching function between central, hybrid and upwind scheme
c
c***********************************************************************
c
c       INPUT/OUTPUT PARAMETERS
c
      implicit none
      integer  t
      double precision p
c
c    t      i  t = 1:  central scheme
c              t = 2:  upwind scheme
c              t = 3:  hybrid scheme
c              t = 4:  hybrid scheme
c              t = something else:  central scheme
c    p      i  mesh peclet number
c
c***********************************************************************
c
c       LOCAL QUANTITIES
c
      double precision om
c
c***********************************************************************
      if( t.eq.1 ) then
         om = 0d0
      else if( t.eq.2 ) then
         om = 1d0
      else if( t.eq.3 .or. t.eq.4 ) then
         om = 5d0*p - 9.5d0
         om = min(om,1d0)
         om = max(om,0d0)
      else
         om = 0d0
      endif
      omega = om
c  end of omega
      end

      subroutine pointr( idis, iout, levels, nc, np, i2 )
c***********************************************************************
c
c        computation of pointer array np
c
c***********************************************************************
c
c       INPUT/OUTPUT PARAMETERS
c
      implicit none
      integer idis, iout, levels, nc, np(11), i2
c
c    idis   i  idis = 1:  vertex-centered multigrid
c              idis = 2:  cell-centered multigrid
c    iout   i  governs output
c    levels i  description in main program
c    nc     i  description in main program
c    np     o  description in main program
c
c***********************************************************************
c
c       LOCAL QUANTITIES
c
      integer i, j
c
c***********************************************************************

      np(1) = 1
      j=nc
      if ( idis .eq. 1 ) then
c
c Vertex centered
c
         do 10 i = 2,levels
            np(i) = np(i-1) + j
            j = 2*j - 1
  10     continue
         np(levels+1) = np(levels) + j
      else if ( idis .eq. 2 ) then
c
c Cell centered
c
         do 11 i = 2,levels
            np(i)= np(i-1) + j
            j = 2*j
  11     continue
         np(levels+1) = np(levels) + j
      else
c
c Wrong value of idis = ib(4)
c
         write( i2, * ) ' idis wrong in subroutine pointer'
      endif
c
c Check for output
c
      if ( iout .gt. 2 ) then
c
c Output required
c
         write( i2, * ) ' grid pointer array'
         write( i2, '(2i5)' ) ( i,np(i), i=1,levels)
      endif
c
c end of pointer
c
      end
      subroutine prol( amat, ic, idim, i2, k, np, u, v )
c **************************************************************************
c
c     computes prolongation of u from grid k-1 to grid k
c       v = Pu
c
c ****************************************************************************
c
c                  INPUT/OUTPUT PARAMETERS
c
      implicit none
      integer ic, idim, i2, k, np(11)
      double precision amat(idim,3), u(idim), v(idim)

c
c     ic   i  governs choice of type of prolongation; see below
c     u    i  function on grid(k-1) to be prolongated
c     v    o  prolongation of u
c
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i, ipu, ipv, nu
c ***************************************************************************
c
c                 I/O
c    output goes to logical unit number i2
c
c ***************************************************************************
c
c            FUNCTIONS
c
c             none
c
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c     none;  possible to build this with BLAS routines
c
c ***************************************************************************
c
c            METHOD
c
c   ic = 1:  vertex-centered linear interpolation: equation (5.3.1) of the book
c   ic = 2:  cell-centered piecewise constant interpolation:
c               equation (5.3.5) of the book
c   ic = 3:  cell-centered linear interpolation with Dirichlet boundary
c               modification:  equation (5.3.6) of the book with w=0
c               in equation (5.3.24) at boundaries
c   ic = 4:  cell-centered linear interpolation with Neumann boundary
c               modification:  equation (5.3.6) of the book with stencil
c               [P*] = (1/4)[ 0  4  3  1 ] at boundary x=0 and similar at x=1
c   ic = 5:  vertex-centered operator dependent prolongation
c                  equations (5.4.2), (5.4.3) of the book
c
c ***************************************************************************
      ipv = np(k) - 1
      ipu = np(k-1) - 1
      nu = ipv - ipu
c
c many of the following loops can be done with the BLAS routines DCOPY,
c DAXPY and DSCALE. So the loops are splitted into simple parts.
c
      if ( ic .eq. 1 ) then
c
c vertex centered linear interpolation:
c
c        v: 1   2   3   4   5   6   7   8   9
c           | /   \ | /   \ | /   \ | /   \ |  [0 2 1] [1 2 1] [1 2 0]  *.5
c        u: 1       2       3       4       5
c
         do 10 i = 1, nu
            v(ipv+2*i-1) = u(ipu+i)
  10     continue
         do 12 i = 1, nu
            v(ipv+2*i) = 0.5d0*u(ipu+i)
  12     continue
         do 14 i = 1, nu
            v(ipv+2*i) = v(ipv+2*i) + 0.5d0*u(ipu+i+1)
  14     continue
      else if ( ic .eq. 2 ) then
c
c cell centered piecewise constant interpolation:
c
c        v: 1   2   3   4   5   6   7   8   9   
c            \ /     \ /     \ /     \ /
c        u:   1       2       3       4
c
         do 20 i = 1, nu
            v(ipv+2*i-1) = u(ipu+i)
   20    continue
         do 22 i = 1, nu
            v(ipv+2*i) = u(ipu+i)
   22    continue
      else if ( ic .eq. 3 .or. ic .eq. 4 ) then
c
c cell centered linear interpolation Dirichlet/Neumann boundary
c
c        v: 1   2   3   4   5   6   
c              ____/ \ / \____        [ 0 2 3 1 ] [ 1 3 3 1 ] [ 1 3 2 0 ] *.25
c        u:   1       2       3      
c
         do 30 i = 1, 2*nu
            v(ipv+i) = 0d0
   30    continue
         do 32 i = 1, nu
            v(ipv+2*i-1) = v(ipv+2*i-1) + .75d0*u(ipu+i)
   32    continue
         do 34 i = 1, nu
            v(ipv+2*i) = v(ipv+2*i) + .75d0*u(ipu+i)
   34    continue
         do 36 i = 2, nu
            v(ipv+2*i-2) = v(ipv+2*i-2) + .25d0*u(ipu+i)
   36    continue
         do 38 i = 1, nu - 1
            v(ipv+2*i+1) = v(ipv+2*i+1) + .25d0*u(ipu+i)
   38    continue
         if ( ic .eq. 3 ) then
c
c Dirichlet boundary correction
c
            v(ipv+1) = 0.5*u(ipv+1)
            v(ipv+2*nu) = 0.5*u(ipv+nu)
         else
c
c Neumann boundary correction
c
            v(ipv+1) = u(ipv+1)
            v(ipv+2*nu) = u(ipv+nu)
         endif
      else
         write(i2,'(a,i2,a)') 'prol: ic = ',ic,' out of range '
         write(*,*) ' aborted'
         stop
      endif
c
c     end of prol
c
      end
      subroutine resid( idim, k, np, b, amat, u, r )
c **************************************************************************
c
c     computes residual r = b - amat*u  on grid k
c
c ****************************************************************************
c
c                  INPUT  PARAMETERS
c
      implicit none
      integer idim, k, np(11)
      double precision b(idim), amat(idim,3), u(idim), r(idim)
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i
c ***************************************************************************
c
c                 I/O
c                none
c
c ***************************************************************************
c
c            FUNCTIONS
c
c             none
c
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c   BLAS level 2 routine _GBMV for vector computers; has not yet been
c    implemented
c
c ***************************************************************************
      i=np(k)
      r(i) = b(i) - amat(i,2)*u(i) - amat(i,3)*u(i+1)
      do 10 i = np(k)+1, np(k+1)-2
         r(i) = b(i) - amat(i,1)*u(i-1) - amat(i,2)*u(i) - 
     $                 amat(i,3)*u(i+1)
  10  continue
      i = np(k+1)-1
      r(i) = b(i) - amat(i,1)*u(i-1) - amat(i,2)*u(i)
c     end of resid
      end
      subroutine restri( ic, idim, i2, k, np, v, u )
c***********************************************************************
c
c     computes restriction of u from grid k to grid k-1
c       v = Ru
c
c ****************************************************************************
c
c                  INPUT/OUTPUT  PARAMETERS
c
      implicit none
      integer ic, idim, i2, k, np(11)
      double precision u(idim), v(idim)
c
c  ic   i    chooses type of restriction
c            = 1:  vertex-centered restriction according to the one-
c                    dimensional version of equation (5.3.19) of the book
c                    stencil = 0.5 * [1 2 1]
c                    boundary modification type Dirichlet: at x=0:
c                    stencil = 0.25 * [0 2 1];
c                    at x=1 :stencil = 0.25 * [1 2 0]
c            = 2:  vertex-centered restriction according to the one-
c                    dimensional version of equation (5.3.19) of the book
c                    stencil = 0.5 * [1 2 1]
c                    boundary modification type Neumann: at x=0:
c                    stencil = 0.5 * [0 3 1];
c                    at x=1 :stencil = 0.5 * [1 3 0]
c            = 3:  cell-centered restriction according to equation (5.3.23)
c                    of the book
c                    stencil =  [1  1]
c            = 4:  cell-centered restriction according to equation (5.3.24)
c                    of the book
c                    stencil = (1/4)*[1 3 3 1]
c                    boundary modification type Dirichlet: at x=0:
c                    stencil = (1/4)*[0 3 3 1];
c                    at x=1 :stencil = 0.5*[1 3 3 0]
c            = 5:  cell-centered restriction according to equation (5.3.24)
c                    of the book
c                    stencil = (1/4)*[1 3 3 1]
c                    boundary modification type Neumann: at x=0:
c                    stencil = (1/4) * [0 4 3 1];
c                    at x=1 :stencil = 0.5 * [1 3 4 0]
c            = 6:  vertex-centered operator-dependent restriction according
c                    to equation (5.4.22) of the book
c                    stencil = [a1 a2 a3]
c                    boundary modification type Neumann: at x=0:
c                    a1 = 0, a2:= 2*a2-a3
c                    at x=1 : a3=0, a2:=2*a2-a1
c            = 7:  vertex-centered operator-dependent restriction according
c                    to equation (5.4.22) of the book
c                    stencil = [a1 a2 a3]
c                    boundary modification type Dirichlet: at x=0: a1 = 0
c                    at x=1 : a3 = 0
c
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i, ipu, ipv, nv
c ***************************************************************************
c
c                 I/O
c    output goes to logical unit number i2
c
c ***************************************************************************
c
c            FUNCTIONS
c
c             none
c
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c     version for vector computers with BLAS routines
c     has been commented out with cv
c
c ***************************************************************************
c
c            METHOD
c
c   See above description of input parameter ic
c   The algorithm has been designed to be easily vectorizable and can be
c     replaced immediately by calls of BLAS routines.
c   Calls to BLAS have been commented out with cv
c
c ***************************************************************************
      ipu = np(k) - 1
      ipv = np(k-1) - 1
      nv = ipu - ipv
c
c much of the following loops can be done with the BLAS routines DCOPY, 
c DAXPY and DSCALE. So the loops are splitted into simple parts.
c
      if ( ic .eq. 1 .or. ic.eq.2 ) then
c
c vertex centered restriction Dirichlet/Neumann boundary:
c
c        v: 1     2     3     4     5
c           | \ / | \ / | \ / | \ / |     [0 2 1] [1 2 1] [1 2 0] *.5
c        u: 1  2  3  4  5  6  7  8  9
c
         v(ipv+1) = 0d0
         do 10 i = 2, nv
            v(ipv+i) = 0.5d0*u(ipu+2*i-2)
  10     continue
         do 12 i = 1, nv
            v(ipv+i) = v(ipv+i) + u(ipu+2*i-1)
  12     continue
         do 14 i = 1, nv - 1
            v(ipv+i) = v(ipv+i) + 0.5d0*u(ipu+2*i)
  14     continue
         if ( ic .eq. 1 ) then
c
c vertex centered restriction Dirichlet boundary:
c
            v(ipv+1) = 0.5d0*v(ipv+1)
            v(ipv+nv) = 0.5d0*v(ipv+nv)
         endif
         if ( ic .eq. 2 ) then
c
c vertex centered restriction Neumann boundary:
c
c           v: 1     2     3     4     5
c              | \ / | \ / | \ / | \ / |     [0 3 1] [1 2 1] [1 3 0] *.5
c           u: 1  2  3  4  5  6  7  8  9
c
            v(ipv+1) = v(ipv+1)+0.5*u(ipu+1)
            v(ipv+nv) = v(ipv+nv)+0.5*u(ipu+2*nv-1)
         endif
      else if ( ic .eq. 3 ) then
c
c cell centered restriction piecewise constant
c
c        v:   1       2       3       4
c            / \     / \     / \     / \      [ 1 1 ] [ 1 1 ] [ 1 1 ]
c        u: 1   2   3   4   5   6   7   8   9   
c
         do 30  i = 1, nv
            v(ipv+i) = u(ipu+2*i-1)
   30    continue
         do 32 i = 1, nv
            v(ipv+i) = v(ipv+i) + u(ipu+2*i)
   32    continue
      else if ( ic .eq. 4 .or. ic .eq. 5 ) then
c
c cell centered restriction Dirichlet/Neumann boundary
c
c        v:   1   ___ 2 ___   3      
c                /   / \   \          [ 0 3 3 1 ] [ 1 3 3 1 ] [ 1 3 3 0 ] *.25
c        u: 1   2   3   4   5   6   
c
         v(ipv+1) = 0d0
         do 40 i = 2, nv
            v(ipv+i) = 0.25d0*u(ipu+2*i-2)
  40     continue
         do 42 i = 1, nv
            v(ipv+i) = v(ipv+i) + 0.75d0*u(ipu+2*i-1)
  42     continue
         do 44 i = 1, nv
            v(ipv+i) = v(ipv+i) + 0.75d0*u(ipu+2*i)
  44     continue
         do 46 i = 1, nv - 1
            v(ipv+i) = v(ipv+i) + 0.25d0*u(ipu+2*i+1)
  46     continue
         if ( ic .eq. 5 ) then
c
c cell centered restriction Neumann boundary
c
c           v:   1   ___ 2 ___   3      
c                   /   / \   \       [ 0 4 3 1 ] [ 1 3 3 1 ] [ 1 3 4 0 ] *.25
c           u: 1   2   3   4   5   6   
c
            v(ipv+1) = v(ipv+1) + 0.25d0*u(ipu+1)
            v(ipv+nv) = v(ipv+nv) + 0.25d0*u(ipu+2*nv)
         endif
      else
         write(i2,'(a,i2,a)')'restri: ic = ',ic,' out of range '
         write(*,*) ' aborted'
         stop
      endif
c
c     end of restri
c
      end
      double precision function s(icase,ss,x)
c***********************************************************************
c
c     user provided function to fill the right hand side as a function
c     of the x-coordinate
c
c***********************************************************************
      integer icase
      double precision ss,x
      if ( icase .eq. 1 ) then
         s = ss
      else if ( icase .eq. 2) then
         s = ss
      else if ( icase .eq. 3 ) then
         s = ss
      else
         s = ss
      endif
      end
      subroutine scal( idim, k, np, a, u )
c***********************************************************************
c
c     u = a*u    on grid k
c
c ****************************************************************************
c
c                  INPUT  PARAMETERS
c
      implicit none
      integer idim, k, np(11)
      double precision a, u(idim)
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c   BLAS routine DSCAL may be used on vector computers - has been
c    commented out with cv
c
c ***************************************************************************
c
      do 10 i = np(k), np(k+1)-1
         u(i) = a*u(i)
  10  continue
c
c   vector version
c
cv      call DSCAL(np(k+1)-np(k),a,u(np(k)),1)
c     end of scal
      end
      subroutine smooth( i2, idim, iout, ism, k, np, ns, a, levels,
     $                   om, b, u, v )
c **************************************************************************
c
c   smoothing of equation Au = b  on grid k
c   old approximate solution is u
c   new approximate solution is v
c
c **************************************************************************
c
c                  INPUT / OUTPUT PARAMETERS
c
      implicit none
      integer i2, idim, iout, ism, k, np(11), ns, levels
      double precision a(idim,-1:1), b(idim), om, u(idim), v(idim)
c
c  a        i   matrix
c  b        i   right-hand-side
c  om       i   underrelaxation parameter in smoothing method
c  u        i   old approximation of solution
c  v        o   new approximation of solution
c  i2       i   logical device number
c  idim     i   size of matrix and grid vectors
c  iout     i   governs output
c  ism      i   chooses smoothing method
c               ism = 1  Jacobi with damping parameter om; section 4.3
c                  of the book and equation (4.1.7); damping is not applied
c                  at boundaries
c               ism = 2  forward Gauss-Seidel; section 4.3 of the book
c               ism = 3  backward Gauss-Seidel; section 4.3 of the book
c               ism = 4  symmetric Gauss-Seidel; section 4.3 of the book
c               ism = 5  white-black; section 4.3 of the book
c               ism = 6  Jacobi with damping parameter om; section 4.3
c                  of the book and equation (4.1.7); damping is applied
c                  uniformly in the interior and at the boundaries.
c  k        i   grid level
c  np       i   array of pointers to start of grids and to 1+end of finest
c                 grid
c                 np(1)=1     : start of coarsest grid
c                 np(levels)  : start of finest grid
c                 np(levels+1): end of finest grid + 1
c  ns       i   number of smoothing iterations
c
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i, n, ifirst, ilast, itotal
      double precision  om1
      logical odd
c ***************************************************************************
c
c                 I/O
c
c  output goes to logical unit number i2
c
c ***************************************************************************
c
c            FUNCTIONS     none
c
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c                 copy
c  
c ***************************************************************************
c
c                METHOD
c
c   see information on ism above
c
c ****************************************************************************
      ifirst = np(k)
      ilast = np(k+1) - 1
      itotal = ilast - ifirst
      odd = .false.
      if ( mod(itotal,2) .eq. 1 ) odd = .true.
      if ( a(ifirst,-1) .ne. 0d0 .or. a(ilast,1) .ne. 0d0 ) then
         write(i2,*)' smooth: matrix a wrong at boudaries'
         write(*,*)' aborted'
         stop
      endif
c
c  first copy the initial solution u to v
c
      call copy( idim, k, np, v, u )
      if ( ism .eq. 1 ) then
c
c  damped Jacobi; no damping at boundaries
c
         do 10 n = 1, ns
             i = np(k)
             v(i) = (b(i)-a(i,1)*u(i+1))/a(i,0)
             i = np(k+1)-1
             v(i) = (b(i)-a(i,-1)*u(i-1))/a(i,0)
             do 5 i = np(k)+1, np(k+1) -2
               v(i) = (b(i)-a(i,-1)*u(i-1)-a(i,1)*u(i+1))/a(i,0)
               v(i) = om*v(i)+(1d0-om)*u(i)
c       write(i2,'(a,e11.3)')' v after',v(i)
    5       continue
c            write(i2,*)' v after smoothing',n
c            call vecout( i2, idim, k, np, v )
            call copy( idim, k, np, u, v )
   10    continue
c
      else if ( ism .eq. 2 ) then
c
c  forward Gauss-Seidel
c
         do 20 n = 1, ns
            i = ifirst
            v(i) = ( b(i) - a(i,1)*v(i+1) ) / a(i,0)
            do 15 i = ifirst + 1, ilast - 1
               v(i) = ( b(i) - a(i,1)*v(i+1) - a(i,-1)*v(i-1) ) / a(i,0)
   15       continue
            i = ilast
            v(i) = ( b(i) - a(i,-1)*v(i-1) ) / a(i,0)
   20    continue
      else if ( ism .eq. 3 ) then
c
c  backward Gauss-Seidel
c
         do 30 n = 1, ns
            i = ilast
            v(i) = ( b(i) - a(i,-1)*v(i-1) ) / a(i,0)
            do 25 i = ilast - 1, ifirst + 1, -1
               v(i) = ( b(i) - a(i,1)*v(i+1) - a(i,-1)*v(i-1) ) / a(i,0)
   25       continue
            i = ifirst
            v(i) = ( b(i) - a(i,1)*v(i+1) ) / a(i,0)
   30    continue
      else if ( ism .eq. 4 ) then
c
c  symmetric Gauss-Seidel ( first forward and then backward )
c
         i = ifirst
         v(i) = ( b(i) - a(i,1)*v(i+1) ) / a(i,0)
         do 40 n = 1, ns
            do 34 i = ifirst + 1, ilast - 1
               v(i) = ( b(i) - a(i,1)*v(i+1) - a(i,-1)*v(i-1) ) / a(i,0)
   34       continue
            i = ilast
            v(i) = ( b(i) - a(i,-1)*v(i-1) ) / a(i,0)
            do 36 i = ilast - 1, ifirst + 1, -1
               v(i) = ( b(i) - a(i,1)*v(i+1) - a(i,-1)*v(i-1) ) / a(i,0)
   36       continue
            i = ifirst
            v(i) = ( b(i) - a(i,1)*v(i+1) ) / a(i,0)
   40    continue
      else if ( ism .eq. 5 ) then
c
c  damped white-black
c
         if ( om .le. 1d-12 ) then
            write(*,*) 'smooth: om = ',om, 'has wrong value'
            stop
         endif
         om1 = (1d0 - om)/om
         do 50 n = 1, ns
c
c odd numbers first
c
            i = ifirst
            v(i) = ( om1*a(i,0)*v(i) - a(i,1)*v(i+1) + b(i) ) /a(i,0)
            do 42 i = ifirst + 2, ilast - 1, 2
               v(i) =  om*( - a(i,-1)*v(i-1) + om1*a(i,0)*v(i)
     $                               - a(i,1)*v(i+1) + b(i) ) /a(i,0)
   42       continue
            if ( odd ) then
               i = ilast
               v(i) =  om*( - a(i,-1)*v(i-1) 
     $                      + om1*a(i,0)*v(i) + b(i) ) /a(i,0)
            endif
c
c even numbers next
c
            do 44 i = ifirst + 1, ilast - 1, 2
               v(i) =  om*( - a(i,-1)*v(i-1) + om1*a(i,0)*v(i)
     $                           - a(i,1)*v(i+1) + b(i) ) /a(i,0)
   44       continue
            if ( .not. odd ) then
               i = ilast
               v(i) =  om*( - a(i,-1)*v(i-1) 
     $                      + om1*a(i,0)*v(i) + b(i) ) /a(i,0)
            endif
   50    continue
c
      else if ( ism .eq. 6 ) then
c
c  damped Jacobi
c
         do 60 n = 1, ns
c            write(i2,*)' b'
c            call vecout( i2, idim, k, np, b )
c            write(i2,*)' u before smoothing'
c            call vecout( i2, idim, k, np, u )
c            write(i2,*)' v before smoothing'
c            call vecout( i2, idim, k, np, v )
             i = np(k)
             v(i) = (b(i)-a(i,1)*u(i+1))/a(i,0)
             v(i) = om*v(i)+(1d0-om)*u(i)
             i = np(k+1)-1
             v(i) = (b(i)-a(i,-1)*u(i-1))/a(i,0)
             v(i) = om*v(i)+(1d0-om)*u(i)
             do 65 i = np(k)+1, np(k+1) -2
               v(i) = (b(i)-a(i,-1)*u(i-1)-a(i,1)*u(i+1))/a(i,0)
               v(i) = om*v(i)+(1d0-om)*u(i)
c       write(i2,'(a,e11.3)')' v after',v(i)
   65       continue
c            write(i2,*)' v after smoothing',n
c            call vecout( i2, idim, k, np, v )
            call copy( idim, k, np, u, v )
   60    continue
      else
         write(i2,*)'smooth: ism out of range'
         write(*,*)' aborted'
         stop
      endif
c
c end of smooth
c
      end

      double precision function u(i2,ib,icase,x,f0,f1,aa,bb)
c
c  Exact solution as a function of x of convection-diffusion equation
c  Implemented only for icase = 2; c = 0.
c  u = c1 + c2 * exp( bb * x / a)
      implicit none
      integer icase, ib(13), i2
      double precision x, f0, f1, aa,bb, e, c1, c2
      if ( icase .eq. 2 ) then
         if ( ib(1).eq.1 .and. ib(2).eq.1 ) then
             e = exp(-bb/aa)
             c2 = (f1 - f0)/(1 - e)
             c1 = f0 - c2*e
             u = c1 + c2 * exp (bb*(x-1)/aa)
         else if ( ib(1).eq.1 .and. ib(2).eq.2) then
             c2 = f1*aa/bb
             c1 = f0 - c2*exp(-bb/aa)
             u = c1 + c2 * exp (bb*(x-1)/aa)
         else if ( ib(1).eq.2 .and. ib(2).eq.1 ) then
             c2 = aa*f0/bb
             c1 = f1 - c2*exp(bb/aa)
             u = c1 + c2 * exp (bb*x/aa)
         else
            u=0d0
            write(i2,*) ' Exact solution not implemented for this case'
         endif
      else
         u = 0d0
         write( i2,* )  ' Exact solution not implemented for this case'
      endif
      end
      subroutine uexact( i2, icase, idim, k, ib, np, uce,f0,f1,aa,bb)
c***********************************************************************
c
c  Programmer    F.N. van de Vosse
c  Version 1.0   date 18-01-92 
c
c***********************************************************************
c
c     uce = exact continous solution on grid k
c
c ****************************************************************************
c
c                  INPUT  PARAMETERS
c
      implicit none
      integer i2, icase, idim, k, np(11), ib(13)
      double precision f0, f1, uce(idim),aa,bb
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i
      double precision h, x, u
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c   u(icase,x) user provided function giving the exact solution in x
c
c ***************************************************************************
      h = 1d0/dble( np(k+1) - np(k) - 1 )
      if ( ib(4) .eq. 2 ) h = 1d0/dble( np(k+1) -np(k) )
      do 10 i = np(k), np(k+1)-1
         x = dble(i-np(k))*h
         if ( ib(4) .eq. 2 ) x = (dble(i-np(k))+.5d0)*h
         uce(i) = u(i2,ib,icase,x,f0,f1,aa,bb)
  10  continue
c
c end of uexact
c
      end
      subroutine vecout( i2, idim, k, np, v )
c **************************************************************************
c
c     prints vector v  on grid k
c
c ****************************************************************************
c
c                  INPUT  PARAMETERS
c
      implicit none
      integer idim, i2, k, np(11)
      double precision v(idim)
c ***************************************************************************
c
c                 LOCAL QUANTITIES
c
      integer i, n, m, ii
c **************************************************************************
c
c            SUBROUTINES CALLED
c
c                none
c ***************************************************************************
c
c                 I/O
c    output goes to logical unit number i2
c
c ***************************************************************************
      write( i2, * )' vector at grid', k
      n = np(k+1)-np(k)
      m = n/4
      do 10 i= 1, 4*m, 4
         ii = i+np(k)-1
         write( i2, '(4(i3,e11.3))' )
     $          i, v(ii), i+1, v(ii+1), i+2, v(ii+2), i+3, v(ii+3)
  10  continue
c
      write( i2, '(4(i3,e11.3))' ) (i,v(i+np(k)-1),i=4*m+1,n)
c
c end of vecout
c
      end