subroutine dbihar(a,b,m,bda,bdb,bdc,bdd,c,d,n,f,idf,
+ alpha,beta,iflag,tol,itcg,w,lw)
c
integer m,n,idf,iflag,itcg,lw
double precision a,b,c,d,alpha,beta,tol
double precision bda(*),bdb(*),bdc(*),bdd(*),f(idf,*),w(*)
c
c
c this subroutine solves the equation
c
c u + 2*u + u + alpha*(u +u ) + beta*u = f(x,y)
c xxxx xxyy yyyy xx yy
c
c
c in the rectangle a < x < b and c < y < d ,
c
c subject to boundary conditions of the first
c kind, u and the exterior normal derivative
c of u must be specified at the boundary.
c
c the solution time is essentially proportional to the
c number of gridpoints when the conjugate gradient
c option is used.
c
c this is version 2.0, april 1984.
c
c this code is written by petter bjorstad, the author
c acknowledges paul n swarztrauber for the use of his
c fast fourier transform package and the linpack project
c for the use of linear equation solvers.
c
c this code and the mathematical theory behind it, is
c described in detail in:
c
c numerical solution of the biharmonic equation.
c
c ph.d. dissertation, stanford university 1980.
c copyright 1980, petter e. bjorstad.
c
c
c any comments, questions or suggestions are most welcome
c and should be directed to :
c
c petter e. bjorstad
c
c veritas research
c p.o. box 300
c n-1322 hovik
c norway
c
c description of input variables.
c
c a,b,c and d. defines the rectangle.
c see above.
c
c m the number of interior grid points
c in the x-direction. the interval
c a.le.x.le.b is divided in (m+1) panels
c each of width (b-a)/(m+1).
c m must be odd and at least 3.
c the method is more efficient
c if m+1 is a product of small primes.
c
c n the number of interior grid points
c in the y-direction. the interval
c c.le.y.le.d is divided in (n+1) panels
c each of width (d-c)/(n+1).
c n must be odd and at least 3. the method is
c somewhat faster if n+1 is a product of small
c primes. it is also advisable to take n less
c than or equal to m.
c (for max speed and min storage.)
c
c alpha the constant alpha in the equation.
c
c beta the constant beta in the equation.
c
c bda array of dimension n containing the values
c of the exterior normal derivative on the
c side x=a ,y=c+j*(d-c)/(n+1) ,j=1,..n .
c
c bdb array of dimension n containing the values
c of the exterior normal derivative on the
c side x=b ,y=c+j*(d-c)/(n+1) ,j=1,..n .
c
c bdc array of dimension m containing the values
c of the exterior normal derivative on the
c side y=c ,x=a+i*(b-a)/(m+1) ,i=1,..m .
c
c bdd array of dimension m containing the values
c of the exterior normal derivative on the
c side y=d ,x=a+i*(b-a)/(m+1) ,i=1,..m .
c
c f array of dimension at least (m+2,n+2).
c f(i,j) must contain the values of the right
c hand side function f(x,y) evaluated at the
c points (x ,y ), where
c i j
c x =a+(i-1)*(b-a)/(m+1) ,i=2,..m+1 .
c i
c y =c+(j-1)*(d-c)/(n+1) ,j=2,..n+1 .
c j
c
c f(1,j) must contain the boundary values along
c the side x=a, y=y ,j=2,..n+1 .
c j
c f(m+2,j) must contain the boundary values along
c the side x=b, y=y ,j=2,..n+1 .
c j
c f(i,1) must contain the boundary values along
c the side y=c, x=x ,i=2,..m+1 .
c i
c f(i,n+2) must contain the boundary values along
c the side y=d, x=x ,i=2,..m+1 .
c i
c
c idf rowdimension of array f as declared in
c the calling program.
c
c tol the conjugate gradient iteration is
c terminated using this parameter.
c the error in the solution of the discrete
c approximation (not to be confused with the
c truncation error) can be expected to be of the
c same order of magnitude provided that the
c function f has norm of magnitude unity.
c a good choice for tol is taking it a few
c orders of magnitude less than the expected
c truncation error.
c if the right hand side f in the given problem
c is many orders of magnitude smaller or bigger
c than one, then it is recommended that the user
c scales his problem.
c tol is a dummy variable if iflag is 3 or 4.
c
c iflag
c =1 this option is not available in version 2.0 .
c
c =2 if this is the first solution on this grid
c using conjugate gradients with fourier trans-
c forms in both coordinate directions.
c there are no parameter restrictions, but
c the routine is only guaranteed to work when
c the discrete approximation is positive
c definite. this is always the case when alpha
c is nonpositive and beta is nonnegative.
c in other cases the user should monitor the
c output parameter itcg. if it stays larger
c than 15, the alternative iflag=4 is
c recommended.
c a call with iflag=2 restarts the low rank
c approximations used to speed up convergence.
c
c =3 if this is the first solution on this grid
c using cholesky factorization.
c the parameters must satisfy alpha.le.0 ,
c beta.ge.0. (if this is violated the code
c will change iflag to 4.)
c
c =4 if this is the first solution on this grid
c using an indefinite symmetric factorization.
c there are no parameter restrictions, but
c an error return will occur if the discrete
c system is computationally singular.
c
c description of output variables.
c
c f contains the solution u of the discrete
c system in all gridpoints (x ,y ),i=1,..m+2 ,
c i j j=1,..n+2 .
c
c iflag
c
c normal returns. iflag will return with a value appropriate
c for repeated calls. it need normally not
c be changed by the user between calls to
c dbihar.
c however, if a sequence of problems is being
c solved where the parameters alpha or beta
c changes then itcg should be monitored and
c iflag should be reset to 2 when itcg
c increases (say above 15).
c also a change between the direct and
c iterative version must be set explicitly.
c
c if the code changes the value of iflag due
c to a change in the user input, a warning is
c printed indicating the reason for the change.
c
c iflag=2 new initial solution (see above).
c iflag=3 new initial solution (see above).
c iflag=4 new initial solution (see above).
c iflag=6 repeated solution after iflag=2.
c iflag=7 repeated solution after iflag=3.
c iflag=8 repeated solution after iflag=4.
c
c error returns. if iflag returns a negative value then
c an error was detected. the computed f(i,j)
c should be considered wrong. an error message
c giving the value of iflag is printed.
c
c =-1 n and/or m was even or less than 3.
c =-2 a.ge.b and/or c.ge.d .
c =-3 idf.lt.m+2 or lw is too small.
c =-4 linpack failure in cholesky-factorization.
c this should not occur,check input carefully.
c =-5 linpack detected a computationally singular
c system using the symmetric indefinite
c factorization.
c =-6 the conjugate gradient iteration failed to
c converge in 30 iterations. the probable
c cause is an indefinite or near singular
c system. try using iflag=4. note that tol
c returns an estimate of the residual in
c the current conjugate gradient iteration.
c
c tol an upper bound on the residuals obtained in
c the conjugate gradient iterations.
c tol will therefore normally be unchanged.
c
c itcg the number of conjugate gradient iterations.
c if this is large (say 20) then a direct
c solution using iflag =4 may be considered.
c
c description of workspace.
c
c lw integer indicating the number of elements
c supplied in the workspace w.
c lw depends on iflag in the following way:
c (this can be improved slightly,version 3.0 ??)
c
c iflag= 2: lw must be at least max(7*n,3*m)+2*(n+m)+19.
c if only one problem is solved on the grid then
c lw should be given its minimum value. (for
c maximum speed.)
c if several problems are to be solved then
c any larger lw will reduce the execution
c time for subsequent problems.(a low rank
c correction will be computed and used to
c improve the preconditioning that is used.)
c the code will not make use of lw larger than
c max(7*n,3*m)+2*(n+m)+19+20*(n+3) under any
c circumstance.
c
c iflag=3,4: lw must be at least max(3*m,4*n)+4*n+2*m
c +0.5*(n+1)**2+19 .
c
c
c w a one dimensional array with at least
c lw elements.
c
c subroutines and functions
c included in this package.
c
c 1. biharmonic: dbihar, dstart, dftrnx, dftrny,
c dbislf, dbisld, dpentf, dmatge,
c dtrigi, dhzeri, dhzero, dconju,
c dcmult, dpreco, dupdat.
c
c 2. fourier dsinti, drffti, drfti1,
c transform: dsint, drfftf, drftf1, dradf2,
c dradf3, dradf4, dradf5, dradfg.
c (by p.n. swarztrauber.)
c
c 3. linpack: dspfa, dspsl, dppfa, dppsl.
c
c 4. blas: idamax, daxpy, dcopy, ddot,
c dscal, dswap.
c
c 5. fortran: mod,min,max,abs,
c atan,cos,sin,sqrt.
c
c local.
c
c biharmonic: dstart, dftrnx, dftrny, dbislf, dbisld
c fortran: mod,min,max
c
integer i1,i2,i3,i4,i5,i6,i7,i8
integer nold,mold,iwf,iwl,il1,il2
integer maxi
double precision dx,dy,del,alf,bet
double precision dxo,dyo,alfo,beto,wfo,wlo
c
c check input for mistakes.
c
il1 = max(7*n,3*m)+2*(n+m)
il2 = max(4*n,3*m)+4*n+2*m+(n+1)**2/2+19
if(n.lt.3.or.m.lt.3) iflag = -1
if(mod(m,2).eq.0.or.mod(n,2).eq.0) iflag = -1
if(a.ge.b.or.c.ge.d) iflag = -2
if(idf.lt.m+2.or.lw.lt.il1) iflag = -3
if(iflag.lt.0) go to 1000
c
dx = (b-a)/(m+1.0d0)
dy = (d-c)/(n+1.0d0)
del = (dy/dx)**2
alf = alpha*dy*dy
bet = beta*dy**4
maxi = (lw-il1)/(2*n+6)
maxi = min(maxi,10)
c
call dstart(m,n,alf,bet,f,idf,bda,bdb,bdc,bdd,dx,dy,del)
call dftrnx(m,n,f(2,2),idf,w)
c
if(iflag.eq.3.and.lw.lt.il2) go to 10
if(iflag.le.4) go to 40
if(n .ne.nold) go to 20
if(m .ne.mold) go to 20
if(dx.ne.dxo) go to 20
if(dy.ne.dyo) go to 20
if(w(iwf).ne.wfo.or.w(iwl).ne.wlo) go to 30
if(iflag.le.6) go to 50
if(alfo.eq.alpha.and.beto.eq.beta) go to 50
iflag= iflag-4
write(6,2) iflag,alpha,beta
go to 40
10 iflag= 2
write(6,5) iflag
go to 40
20 iflag= iflag-4
write(6,1) iflag,n,m,a,b,c,d
go to 40
30 iflag= iflag-4
write(6,3) iflag,iwf,iwl,wfo,wlo,w(iwf),w(iwl)
40 nold = n
mold = m
dxo = dx
dyo = dy
alfo = alpha
beto = beta
50 go to (60,70,80,90,100,110,120,130),iflag
go to 1000
60 iwf = max(4*n,3*m)+1
iwl = il1+(n+3)*maxi
iflag= 2
write(6,2) iflag,alpha,beta
70 iwf = max(4*n,3*m)+1
iwl = il1+(n+3)*maxi
go to 200
80 iwf = max(3*m,4*n)+1
iwl = il2
if(alpha.le.0.0d0.and.beta.ge.0.0d0) go to 200
iflag= 4
write(6,2) iflag,alpha,beta
90 iwf = max(3*m,4*n)+1
iwl = il2
go to 200
100 iflag= 2
write(6,2) iflag,alpha,beta
110 go to 200
120 go to 200
130 go to 200
200 call dftrny(m,n,f(2,2),idf,w)
if(iflag.eq.6) go to 250
if(iflag.ne.2) go to 300
i1 = 1
i2 = i1+(n+1)/2
i3 = i2+(n+1)/2
i4 = i3+(n+1)/2
i5 = i4+(n+1)/2
i6 = max((5*m)/2,(7*n)/2)+17
i7 = i6+2*(m+n)
i8 = i7+2*maxi*(n+3)
250 call dbislf(m,n,maxi,iflag,del,tol,alf,bet,itcg,idf,f(2,2),
+ w(i1),w(i2),w(i3),w(i4),w(i5),w(i6),w(i7),w(i8))
if(iflag.lt.0) go to 1000
if(iflag.eq.2) iflag = 6
go to 400
300 if(iflag.eq.7.or.iflag.eq.8) go to 350
i1 = 1
i2 = i1+(n+1)/2
i3 = i2+(n+1)/2
i4 = max((5*m)/2,(7*n)/2)+17
i5 = i4+2*(m+n)
350 call dbisld(m,n,iflag,del,alf,bet,idf,f(2,2),
+ w(i1),w(i2),w(i3),w(i4),w(i5))
if(iflag.lt.0) go to 1000
if(iflag.eq.3) iflag = 7
if(iflag.eq.4) iflag = 8
400 call dftrny(m,n,f(2,2),idf,w)
call dftrnx(m,n,f(2,2),idf,w)
wfo = w(iwf)
wlo = w(iwl)
return
1000 write(6,4) iflag
1 format(1x,'*warning*,iflag changed to ',i3,'n,m,maxi,a,b,c,d=',
+ 2i6,2x,4e12.2)
2 format(1x,'*warning*,iflag changed to ',i3,'alpha,beta=',2e16.6)
3 format(1x,'*warning*,iflag changed to ',i3,/1x,'element no ',
+ i6,' and ',i6,' of w changed from ',2e16.6,' to ',
+ 2e16.6,' by user. ')
4 format(/5x,'***error in dbihar, iflag= ',i6/)
5 format(1x,'*warning*,iflag changed to ',i3,
+ ' workspace needed, given : ',2i8)
return
end