#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  aicm.tex doc.txt smmp.f
# Wrapped by douglas@blondie on Mon May 10 15:32:19 1993
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'aicm.tex' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'aicm.tex'\"
else
echo shar: Extracting \"'aicm.tex'\" \(26303 characters\)
sed "s/^X//" >'aicm.tex' <<'END_OF_FILE'
X% Documentation written February, 1988
X%           and updated May, 1992
X%           and updated March - April, 1993
X%
X% To produce a .dvi file, run this file through LaTeX:
X%
X%       latex aicm
X%
X% LaTeX has to be run at least twice in order to get the cross references
X% and citations correct.  Then run the output through your local printing
X% filter, e.g.,
X%
X%       dvips aicm | lpr
X%
X 
X%\documentstyle[12pt,fixup,numinsec]{siam}
X\documentstyle[fixup,numinsec]{siam}
X\pretolerance=800
X\tolerance=10000
X
X% ----------------------------------------------------------------------
X 
X\title{Sparse Matrix Multiplication Package (SMMP)\thanks{%
X%           Draft of April 2, 1993.
X            To appear in Advances in Computational Mathematics,
X            1 (1993), first issue.
X            The algorithms and routines described here were developed
X            while both authors were visiting the Center for Applied
X            Mathematics, Department of Mathematics, Purdue University.
X            }
X      }
X\author{Randolph E. Bank\thanks{University of California at San Diego,
X                Department of Mathematics C012, P. O. Box 109,
X                LaJolla, CA 92093.
X                E-mail:  {\it rbank@ucsd.edu}}
X   \and 
X        Craig C. Douglas\thanks{
X                Department of Computer Science,
X                Yale University,
X                P. O. Box 2158 Yale Station,
X                New Haven, CT 06520-2158
X                and
X                Mathematical Sciences Department,
X                IBM Research Division,
X                Thomas J. Watson Research Center,
X                P. O. Box 218,
X                Yorktown Heights, NY 10598-0218,
X                USA.
X                E-mail:  {\it na.cdouglas@na-net-ornl.gov.}}
X       }
X
X\begin{document}
X\bibliographystyle{siam}
X\maketitle
X 
X% ----------------------------------------------------------------------
X 
X\begin{abstract}
XRoutines callable from FORTRAN and C are described which implement
Xmatrix--matrix multiplication and transposition for a variety of sparse matrix
Xformats.
XConversion routines between various formats are provided.
X\end{abstract}
X
X\begin{keywords}
Xsparse matrices, matrix multiplication, transposition, numerical linear
Xalgebra
X\end{keywords}
X
X\begin{AMSMOS}
X{\bf Numerical Analysis}: Numerical Linear Algebra
X\end{AMSMOS}
X 
X% ----------------------------------------------------------------------
X 
X\section{Introduction}
X\label{Sec:Introduction}
X 
XThe routines described here perform matrix-matrix multiplies, transposes, and
Xformat conversions for sparse matrices.  There are three formats supported.
XThese include the old and new Yale sparse matrix package formats 
X\cite{NewYSMP,YSMP1,YSMP2} and the more
Xefficient one for square matrices advocated by Bank and Smith
X\cite{BankSmith87}.  In each case, only the nonzeros are stored, not the zeros
X(or as few as possible).
X 
XThe principal use of these routines will probably be in implementing parallel
Xcomputation algorithms.  For example, in one variant of parallel multigrid
X\cite{DouglasMiranker87,DouglasSmith88}, as the number of coarse grid problems
Xper level grows, it becomes increasingly difficult to generate the coefficient
Xmatrices based on grids, as a serial multigrid solver does.
X 
XIn \S\ref{Sec:SparseMatrixFormats}, we describe the sparse matrix formats we
Xsupport.  In \S\ref{Sec:CallingSequences}, we describe the structure of the
Xpackage and the calling sequences of each routine.  In \S\ref{Sec:Algorithms},
Xwe describe the algorithms implemented by the package.  Finally, in
X\S\ref{Sec:Source_Code} are instructions for getting a copy of the code.
X 
XThe routines described here differ from those in either SPARSKIT (see
X\cite{Saad90}) or the proposed sparse BLAS (see \cite{DuffMarrRadi92} and
X\cite{Heroux}).
XThere is some overlap with SPARSKIT, but we are more interested in a data
Xformat not supported in SPARSKIT.
XThe sparse BLAS have interfaces for multiplying a sparse matrix by a dense
Xone, but not for multiplying two sparse matrices.
XAlso, our routines are in the public domain with absolutely no restrictions
Xon their use while the others are not.
X
X% ----------------------------------------------------------------------
X 
X\section{Sparse matrix formats}
X\label{Sec:SparseMatrixFormats}
X 
XIn this section, we define three methods for storing a sparse matrix $M$.  Let
X\mbox{$M = D + L + U$}, where $D$ is the main diagonal part of $M$, $L$ is the
Xstrictly lower triangular part of $M$, and $U$ is the strictly upper
Xtriangular part of $M$.  We define the number of nonzeros of $M$ as $NZ(M)$.
X 
XThe old Yale sparse matrix format \cite{YSMP1,YSMP2} requires three vectors:
X\[
X\begin{array}{llcl}\cline{2-2}
XIA:  & \multicolumn{1}{|l|}{IA\hspace*{0.9in}}
X                                & {\rm length} & N + 1\\ \cline{2-2}
X     &                          &              & \\ \cline{2-2}
XJA:  & \multicolumn{1}{|l|}{JA\hspace*{3.5in}}
X                                & {\rm length} & NZ(M)\\ \cline{2-2}
X     &                          & {\rm or}     & NZ(D+U)\\
X     &                          &              & \\ \cline{2-2}
XA:   & \multicolumn{1}{|l|}{M~\hspace*{3.5in}}
X                                & {\rm length} & NZ(M)\\ \cline{2-2}
X     &                          & {\rm or}     & NZ(D+U)
X\end{array}
X\]
X$M$ is stored in row form.  If $M$ is symmetric, the elements of $L$ do
Xnot have to be stored in $A$.  The first element of row $I$ is
X$A(IA(I))$.  The length of row $I$ is determined by $IA(I+1)-IA(I)$,
Xwhich is why $IA$ requires $N+1$ elements instead of the obvious $N$
Xelements.  The column indices of $M$ are stored in the $JA$ vector.
XFor element $A(J)$, its column index is $JA(J)$.  The elements in a
Xrow may be stored in any order.
X 
X     The new Yale sparse matrix format \cite{NewYSMP} requires two vectors:
X\[
X\begin{array}{llcl}\cline{2-2}
XIJA: & \multicolumn{1}{|l|}{IA\hspace{0.6in}~\mid~JA\hspace*{2.4in}}    
X                              & {\rm length} & N+1+NZ(M-D)\\ \cline{2-2}
X     &                        & {\rm or}     & N+1+NZ(U)\\
X     &                        &              & \\ \cline{2-2}
XA:   & \multicolumn{1}{|l|}{D\hspace{0.525in}\mid 0 
X                                            \mid~L~{\rm and}~U\hspace*{2.1in}}    
X                              & {\rm length} & N+1+NZ(M-D)\\ \cline{2-2}
X     &                        & {\rm or}     & N+1+NZ(U)  
X\end{array}
X\]
X$M$ is still stored in row form.  The $IA$-$JA$ vectors of the old
Xformat are combined into a single vector, sometimes referred as
Xan $IJA$ vector.  As before, the first element of row $I$ is
X$A(IJA(I))$.  In this case, the main diagonal of $M$ is separated out
Xfrom the nonzeros of $L$ and $U$.  The diagonal for row $I$ is in $A(I)$.
XThere is a zero after $D$ so that the $JA$ and $L/U$ parts of the $IJA$
Xand $A$ vectors are aligned properly.  Technically, the rows of $A$
Xshould be stored in ascending column order.  However, this is not
Xenforced.
X 
X     The Bank-Smith sparse matrix format \cite{BankSmith87} requires $M$ to be a
Xsquare matrix with a symmetric (or nearly so) zero structure.  It
Xrequires two vectors:
X\[
X\begin{array}{llcl}\cline{2-2}
XIJA: & \multicolumn{1}{|l|}{IA\hspace{0.6in}~\mid~JA\hspace*{2.4in}}    
X                              & {\rm length} & N+1+NZ(U)\\ \cline{2-2}
X     &                        &              & \\ \cline{2-2}
XA:   & \multicolumn{1}{|l|}{D\hspace{0.525in}\mid 0 
X                            \mid~U^T\hspace*{1.0in}\mid~L\hspace*{1.0in}}
X                              & {\rm length} & N+1+NZ(M-D)\\ \cline{2-2}
X     &                        & {\rm or}     & N+1+NZ(U)
X\end{array}
X\]
XWhile $M$ is stored strictly in row form, in a real sense it is
Xstored in both column and row form.  Since we assume that $M$ has a
Xsymmetric zero structure (or $L$ and $U$ are padded by a small number
Xof zeros), we need only store the row indices for $U$ (when $U$ is
Xstored in column form).  These are also the column indices for $L$
X(when $L$ is stored in row form).  However, we store the transpose
Xof $U$ in row form instead of $U$. If $M$ is symmetric, the elements of
X$L$ do not have to be stored in $A$. The first element of column $I$ of
X$U$ is $A(IA(I))$.  The length of column $I$ is determined by 
X$IA(I+1)-IA(I)$, which is why $IA$ requires $N+1$ elements instead of the
Xobvious $N$ elements.  The row indices of $U$ are stored in the $JA$
Xvector.  For element $A(J)$, its row index is $JA(J)$.  The elements
Xin a column must be stored in ascending row order.  We define
X$LSHIFT$ to be 0 if $M$ is symmetric and $IA(N+1)-IA(1)$ if $M$ is
Xnonsymmetric.  The first element of $L$ is $A(IA(1)+LSHIFT)$.  $L$ is
Xstored in row format.  The column index of an element
X$A(IA(I)+J+LSHIFT)$ is $JA(IA(I)+J)$.
X 
X     For all three sparse matrix formats, we can assume there are
Xthree vectors $IA$, $JA$, and $A$ which describe $M$.  Except for the old
XYale sparse matrix format, the vectors $IA$ and $JA$ are really the
Xsame vector $IJA$.  We also need a variable $DIAGA$ which is one if
Xthe diagonal is separated from the rest of the nonzeros and zero
Xotherwise.  Last, we need a variable $SYMA$ which is one if $M$ is
Xstored in a symmetric manner and zero otherwise.
X
X% ----------------------------------------------------------------------
X 
X\section{Calling sequences}
X\label{Sec:CallingSequences}
X 
XIn this section, we describe the five routines which comprise the package.
XThese include two routines to multiply matrices, a routine for the transpose
Xof a matrix, and two routines to convert between various sparse matrix
Xformats.
X 
XFor each routine in this section, the calling sequence assumes distinct $IA$
Xand $JA$ vectors for each matrix.  Suppose a matrix is actually stored using
Xan $IJA$ vector.  Then the routine should be called with $IJA$ as an argument
Xtwice, once for each $IA$ and $JA$.  The $IJA$ vector should not be
Xsubscripted to point to either of the $IA$ or $JA$ parts; the routines will do
Xthis automatically.
X 
XWe multiply two sparse matrices, resulting in a third:
X\begin{center}
X$C\ =\ AB.$
X\end{center}
XMatrix-matrix multiplication is performed in two steps.  These routines only
Xsupport the Yale sparse matrix formats.  First, the nonzero structure of the
Xresulting matrix is determined symbolically in $SYMBMM$:
X\begin{center}
X$\begin{array}{lll}
Xsubroutine & SYMBMM  & (N,M,L,\ IA,JA,DIAGA,\ IB,JB,DIAGB,\\
X           &         & \ IC,JC,DIAGC,\ INDEX)\\
X           &         & \\
X           & integer & N,M,L,\ IA(*),JA(*),DIAGA,\\
X           &         & IB(*),JB(*),DIAGB,\ IC(*),JC(*),DIAGC,\\
X           &         & INDEX(*)
X\end{array}$
X\end{center}
XThe number of rows and columns of the matrices are
X\begin{center}
X$\begin{array}{|c|c|c|}\hline
X{\rm matrix} & {\rm rows} & {\rm columns} \\ \hline
XA            & N          & M\\
XB            & M          & L\\
XC            & N          & L\\ \hline
X\end{array}$
X\end{center}
X$INDEX$ is a scratch vector of length $max\{L,M,N\}$.  It is used to store
Xlinked lists.  The output of $SYMBMM$ is $IC$ and $JC$.  They are dependent on
Xthe value of $DIAGC$.
X 
XOnce the nonzero structure for $C$ is known, the numerical matrix-matrix
Xmultiply is computed in $NUMBMM$:
X\begin{center}
X$\begin{array}{lll}
Xsubroutine & NUMBMM  & (N,M,L,\ IA,JA,DIAGA,A,\ IB,JB,DIAGB,B,\\
X           &         & \ IC,JC,DIAGC,C,\ TEMP)\\
X           &         & \\
X           & integer & N,M,L,\ IA(*),JA(*),DIAGA,\\
X           &         & IB(*),JB(*),DIAGB,\ IC(*),JC(*),DIAGC\\
X           & real    & A(*),\ B(*),\ C(*)
X\end{array}$
X\end{center}
X$TEMP$ is a scratch vector of length $max\{L,M,N\}$.  It is used to
Xstore partial sums.
X 
XWe may also compute the transpose of a matrix, resulting in a second:
X\begin{center}
X$B\ =\ A^{T}.$
X\end{center}
XWe do this operation in $TRANSP$:
X\begin{center}
X$\begin{array}{lll}
Xsubroutine & TRANSP  & (N,M,\ IA,JA,DIAGA,A,\ IB,JB,B,\ MOVE)\\
X           &         & \\
X           & integer & N,M,\ IA(*),JA(*),DIAGA,\\
X           &         & IB(*),JB(*),\ MOVE\\
X           & real    & A(*),\ B(*)
X\end{array}$
X\end{center}
XThe number of rows and columns of the matrices are
X\begin{center}
X$\begin{array}{|c|c|c|}\hline
X{\rm matrix} & {\rm rows} & {\rm columns} \\ \hline
XA            & N          & M\\
XB            & M          & N\\ \hline
X\end{array}$
X\end{center}
XWe assume that $B$ will use the same diagonal storage method that is used for
X$A$.  We do not actually move the elements of $A$ into $B$ unless $MOVE$ is
Xone.
X 
XFinally, we have two routines for converting between one of the Yale formats
Xand the Bank-Smith format.  This only makes sense when the matrices are
Xsquare.  The routine $YTOBS$ will convert a Yale format sparse matrix into the
XBank-Smith format:
X\begin{center}
X$\begin{array}{lll}
Xsubroutine & YTOBS   & (N,\ IA,JA,DIAGA,SYMA,A,\ IB,JB,B,\ MOVE)\\
X           &         & \\
X           & integer & N,\ IA(*),JA(*),DIAGA,SYMA,\\
X           &         & IB(*),JB(*),\ MOVE\\
X           & real    & A(*),\ B(*)
X\end{array}$
X\end{center}
XBy definition, $DIAGB$ must be one.  Hence, we do not need it as an argument.
XWe determine whether or not $B$ should be stored in a symmetric or
Xnonsymmetric manner from $SYMA$.  We do not actually move the elements of $A$
Xinto $B$ unless $MOVE$ is one.
X 
XThe routine $BSTOY$ will convert a Bank-Smith format sparse matrix into one of
Xthe Yale formats:
X\begin{center}
X$\begin{array}{lll}
Xsubroutine & BSTOY   & (N,\ IA,JA,SYMA,A,\ IB,JB,DIAGB,B,\ MOVE)\\
X           &         & \\
X           & integer & N,\ IA(*),JA(*),SYMA,\\
X           &         & IB(*),JB(*),DIAGB,\ MOVE\\
X           & real    & A(*),\ B(*)
X\end{array}$
X\end{center}
XWe determine which of the two formats by the value of $DIAGB$.  We determine
Xwhether or not $B$ should be stored in a symmetric or nonsymmetric manner from
X$SYMA$.  We do not actually move the elements of $A$ into $B$ unless $MOVE$ is
Xone.
X
X% ----------------------------------------------------------------------
X 
X\section{Algorithms}
X\label{Sec:Algorithms}
X 
XIn this section, we describe the algorithms for $SYMBMM$, $NUMBMM$,
Xand $TRANSP$.  We use a metalanguage rather than real code.  One of
Xthe facets of these algorithms is their ability to work well with
Xmatrices in a variety of formats.
X 
X\subsection{SYMBMM}
X 
XInitialization consists of setting up the first row pointer and clearing all
Xof the links (contained in $INDEX$):
X\begin{quote}\samepage
X 1\hspace*{ 4em}$  {\bf do}\ i\in\{1,\cdots,max\{l,m,n\}\}\ \{ $ \\
X 2\hspace*{ 8em}$      index_{i}\ =\ 0 $ \\
X 3\hspace*{ 8em}$      \} $ \\
X 4\hspace*{ 4em}$  {\bf if}\ (diagc\ ==\ 0)\ \{ $ \\
X 5\hspace*{ 8em}$      ic_{1}\ =\ 1 $ \\
X 6\hspace*{ 8em}$      \} $ \\
X 7\hspace*{ 4em}$  {\bf else}\ \{ $ \\
X 8\hspace*{ 8em}$      ic_{1}\ =\ n+2 $ \\
X 9\hspace*{ 8em}$      \} $
X\end{quote}
X$INDEX$ is used to store links.  If an entry in $INDEX$ is nonzero, it is
Xa pointer to the next column with a nonzero.  The links are determined as
Xthey are found, and are unordered.
X
XThe main loop consists of three components:  initialization, a long loop that
Xmerges row lists, and code to copy the links into the $JC$ vector.  The
Xinitialization part is as follows:
X\begin{quote}
X10\hspace*{ 4em}$ {\bf do}\ i\in\{1,\cdots,n\}\ \{ $ \\
X11\hspace*{ 8em}$     istart\ =\ -1 $ \\
X12\hspace*{ 8em}$     length\ =\ 0 $
X\end{quote}
XThe start column ($istart$) is reset and the number of column entries for the
X$i$-th row is assumed empty.  The loop to merge row lists is as follows:
X\begin{quote}
X13\hspace*{ 8em}$     {\bf do}\ jj\in\{ia_{i},\cdots,ia_{i+1}\}\ \{ $ \\
X14\hspace*{12em}$         {\bf if}\ (jj\ ==\ ia_{i+1})\ \{ $ \\
X15\hspace*{16em}$             {\bf if}\ (diaga\ ==\ 0\ | $ \\
X  \hspace*{19em}$                        i\ >\ min\{m,n\})\ \{ $ \\
X16\hspace*{20em}$                 {\bf next}\ jj $ \\
X17\hspace*{20em}$                 \} $ \\
X18\hspace*{16em}$             j\ =\ i $ \\
X19\hspace*{16em}$             \} $ \\
X20\hspace*{12em}$         {\bf else}\ \{ $ \\
X21\hspace*{16em}$             j\ =\ ja_{jj} $ \\
X22\hspace*{16em}$             \} $ \\
X23\hspace*{12em}$         {\bf if}\ (index_{j}\ ==\ 0\ \&\ diagb\ ==\ 1\ \& $ \\
X  \hspace*{15em}$                    j\ \leq\ min\{l,m\})\ \{ $ \\
X24\hspace*{16em}$             index_{j}\ =\ istart $ \\
X25\hspace*{16em}$             istart\ =\ j $ \\
X26\hspace*{16em}$             length\ =\ length + 1 $ \\
X27\hspace*{16em}$             \} $ \\
X28\hspace*{12em}$         {\bf do}\ k\in\{ib_{j},\cdots,ib_{j+1}-1\}\ \{ $ \\
X29\hspace*{16em}$             {\bf if}\ (index_{jb_{k}}\ ==\ 0)\ \{ $ \\
X30\hspace*{20em}$                 index_{jb_{k}}\ =\ istart $ \\
X31\hspace*{20em}$                 istart\ =\ jb_{k} $ \\
X32\hspace*{20em}$                 length\ =\ length + 1 $ \\
X33\hspace*{20em}$                 \} $ \\
X34\hspace*{16em}$             \}\ (\rm end\ of\ k\ loop) $ \\
X35\hspace*{12em}$         \}\ (\rm end\ of\ jj\ loop) $
X\end{quote}
XLines 14-22 determine if the $jj$ loop has to execute an ``extra'' iteration
Xwhen $A$ is stored in the new Yale sparse matrix format.  Lines 23-27 add
Xcolumn $j$ to the linked list.  Lines 28-34 determine the intersection of this
Xrow i with the nonzeros in column j of $B$.  Finally, we copy the links into
Xthe $JC$ vector as the column indices:
X\begin{quote}
X36\hspace*{ 8em}$     {\bf if}\ (diagc\ ==\ 1\ \&\ index_{i}\ \neq\ 0)\ \{ $ \\
X37\hspace*{12em}$         length\ =\ length - 1 $ \\
X38\hspace*{12em}$         \} $ \\
X39\hspace*{ 8em}$     ic_{i+1}\ =\ ic_{i} + length $ \\
X40\hspace*{ 8em}$     {\bf do}\ j\in\{ic_{i},\cdots,ic_{i+1}-1\}\ \{ $ \\
X41\hspace*{12em}$         {\bf if}\ (diagc\ ==\ 1\ \&\ istart\ ==\ i)\ \{ $ \\
X42\hspace*{16em}$             istart\ =\ index_{istart} $ \\
X43\hspace*{16em}$             index_{i}\ =\ 0 $ \\
X44\hspace*{16em}$             \} $ \\
X45\hspace*{12em}$         jc_{j}\ =\ istart $ \\
X46\hspace*{12em}$         istart\ =\ index_{istart} $ \\
X47\hspace*{12em}$         index_{jc_{j}}\ =\ 0 $ \\
X48\hspace*{12em}$         \}\ (\rm end\ of\ j\ loop)  $ \\
X49\hspace*{ 8em}$     index_{i}\ =\ 0 $ \\
X50\hspace*{ 8em}$     \}\ (\rm end\ of\ i\ loop) $
X\end{quote}
XLines 36-38 remove the diagonal element from the row if $C$ is stored in the
Xnew Yale sparse matrix format.  Note that in lines 43 and 47 the nonzero links
Xare cleared.  Due to the small number of links (with respect to $N$), it would
Xbe extremely inefficient to clear the entire vector.  The resulting vectors
X$IC$ and $JC$ contain the nonzero structure of \mbox{$C\ =\ AB$}.
X 
X\subsection{NUMBMM}
X 
XInitialization consists of clearing all of the partial sums:
X\begin{quote}\samepage
X 1\hspace*{ 4em}$ {\bf do}\ i\in\{1,\cdots,max\{l,m,n\}\}\ \{ $ \\
X 2\hspace*{ 8em}$     temp_{i}\ =\ 0 $ \\
X 3\hspace*{ 8em}$     \} $
X\end{quote}     
X
XThe main loop forms the partial sums and then copies the completed sums into
Xthe correct locations in the sparse matrix structure:
X\begin{quote}\samepage
X 4\hspace*{ 4em}$ {\bf do}\ i\in\{1,\cdots,n\}\ \{ $ \\
X 5\hspace*{ 8em}$      {\bf do}\ jj\in\{ia_{i},\cdots,ia_{i+1}\}\ \{ $ \\
X 6\hspace*{12em}$         {\bf if}\ (jj == ia_{i+1})\ \{ $ \\
X 7\hspace*{16em}$             {\bf if}\ (diaga\ ==\ 0\ | $\\
X  \hspace*{18em}$                        i\ >\ min\{m,n\})\ \{ $ \\
X 8\hspace*{20em}$                 {\bf next}\ jj $ \\
X 9\hspace*{20em}$                 \} $ \\
X10\hspace*{16em}$             j\ =\ i $ \\
X11\hspace*{16em}$             ajj\ =\ a_{i} $ \\
X12\hspace*{16em}$             \} $ \\
X13\hspace*{12em}$         {\bf else}\ \{ $ \\
X14\hspace*{16em}$             j\ =\ ja_{jj} $ \\
X15\hspace*{16em}$             ajj\ =\ a_{jj} $ \\
X16\hspace*{16em}$             \} $ \\
X17\hspace*{12em}$         {\bf if}\ (diagb\ ==\ 1\ \&\ j \leq\ min\{l,m\})\ \{ $ \\
X18\hspace*{16em}$             temp_{j}\ =\ temp_{j} + ajj * b_{j} $ \\
X19\hspace*{16em}$             \} $ \\
X20\hspace*{12em}$         {\bf do}\ k\in\{ib_{j},\cdots,ib_{j+1}-1\}\ \{ $ \\
X21\hspace*{16em}$             temp_{jb_{k}}\ =\ temp_{jb_{k}} + ajj * b_{k} $ \\
X22\hspace*{16em}$             \}\ (\rm end\ of\ k\ loop) $ \\
X23\hspace*{12em}$         \}\ (\rm end\ of\ jj\ loop) $ \\
X24\hspace*{ 8em}$     {\bf if}\ (diagc\ ==\ 1 \ \&\ i \leq\ min\{l,n\})\ \{ $ \\
X25\hspace*{12em}$         c_{i}\ =\ temp_{i} $ \\
X26\hspace*{12em}$         temp_{i}\ =\ 0 $ \\
X27\hspace*{12em}$         \} $ \\
X28\hspace*{ 8em}$     {\bf do}\  j\in\{ic_{i},\cdots,ic_{i+1}-1\}\ \{ $ \\
X29\hspace*{12em}$         c_{j}\ =\ temp_{jc_{j}} $ \\
X30\hspace*{12em}$         temp_{jc_{j}}\ =\ 0. $ \\
X31\hspace*{12em}$         \}\ (\rm end\ of\ j\ loop) $ \\
X32\hspace*{ 8em}$     \}\ (\rm end\ of\ i\ loop) $
X\end{quote}     
XLines 6-16 determine if the $jj$ loop has to execute an ``extra'' iteration
Xwhen $A$ is stored in the new Yale sparse matrix format.  Lines 20-22
Xaccumulate the product for row $j$, and store it in lines 28-31.  Lines 17-19
Xand 24-27 deal with special cases when a matrix is stored in the new Yale
Xsparse matrix format.  The resulting vector $C$ contains the numerical product
X$AB$.
X 
X\subsection{TRANSP}
X 
XWe begin by constructing $ib$.  This requires setting up the first row
Xpointer and counting indices for each column:
X\begin{quote}\samepage
X 1\hspace*{ 4em}$ {\rm do}\ i\in\{1,\cdots,m+1\}\ \{ $ \\
X 2\hspace*{ 8em}$     ib_{i}\ =\ 0 $ \\
X 3\hspace*{ 8em}$     \} $ \\
X 4\hspace*{ 4em}$ {\rm if}\ (move\ ==\ 1)\ \{ $ \\
X 5\hspace*{ 8em}$     {\rm do}\ i\in\{1,\cdots,m+1\}\ \{ $ \\
X 6\hspace*{12em}$         b_{i}\ =\ 0 $ \\
X 7\hspace*{12em}$         \} $ \\
X 8\hspace*{ 8em}$     \} $ \\
X 9\hspace*{ 4em}$ {\rm if}\ (diaga\ ==\ 1)\ \{ $ \\
X10\hspace*{ 8em}$     ib_{1}\ =\ m + 2 $ \\
X11\hspace*{ 8em}$     \} $ \\
X12\hspace*{ 4em}$ {\rm else}\ \{ $ \\
X13\hspace*{ 8em}$     ib_{1}\ =\ 1 $ \\
X14\hspace*{ 8em}$     \} $ \\
X15\hspace*{ 4em}$ {\rm do}\ i\in\{1,\cdots,n\}\ \{ $ \\
X16\hspace*{ 8em}$     {\rm do}\ j\in\{ia_{i},\cdots,ia_{i+1}-1\} $ \\
X17\hspace*{12em}$         ib_{ja_{j}+1}\ =\ ib_{ja{j}+1}+1 $ \\
X18\hspace*{12em}$         \} $ \\
X19\hspace*{ 8em}$     \} $ \\
X20\hspace*{ 4em}$ {\rm do}\ i\in\{1,\cdots,m\}\ \{ $ \\
X21\hspace*{ 8em}$    ib_{i+1}\ =\ ib_{i}+ib_{i+1} $ \\
X22\hspace*{ 8em}$    \} $
X\end{quote}     
XLines 1-3 clear $IB$.  If we are constructing $B$ at the same time, then lines
X4-8 clear the main diagonal of $B$.  Lines 9-14 determine where the rows of
X$B$ are stored.  Lines 15-18 count the number of indices in each column and
Xlines 20-22 converts this information into row pointers.
X
XNext, we construct $jb$:
X\begin{quote}\samepage
X23\hspace*{ 4em}$ {\rm do}\ i\in\{1,\cdots,n\}\ \{ $ \\
X24\hspace*{ 8em}$     {\rm do}\ j\in\{ia_{i},\cdots,ia_{i+1}-1\}\ \{ $ \\
X25\hspace*{12em}$         jj\ =\ ja_{j} $ \\
X26\hspace*{12em}$         jb_{ib_{jj}}\ =\ i $ \\
X27\hspace*{12em}$         {\rm if}\ (move\ ==\ 1)\ \} $ \\
X28\hspace*{16em}$             b_{ib_{jj}}\ =\ a_{j} $ \\
X29\hspace*{16em}$             \} $ \\
X30\hspace*{12em}$         ib_{jj}\ =\ ib_{jj}+1 $ \\
X31\hspace*{12em}$         \} $ \\
X32\hspace*{ 8em}$     \} $
X\end{quote}     
XLines 23-32 put $i$ as a column index into row $JA(j)$ in the first possible
Xposition (pointed to by $IB(jj)$) and increment the pointer.  If we are
Xconstructing $B$ at the same time, then lines 27-29 do the copy.
X
XFinally, we have to restore $IB$:
X\begin{quote}\samepage
X33\hspace*{ 4em}$ {\rm do}\ i\in\{m,m-1,\cdots,2\}\ \{ $ \\
X34\hspace*{ 8em}$     ib_{i}\ =\ ib_{i-1} $ \\
X35\hspace*{ 8em}$     \} $ \\
X36\hspace*{ 4em}$ {\rm if}\ (diaga\ ==\ 1)\ \{ $ \\
X37\hspace*{ 8em}$     {\rm if}\ (move\ ==\ 1)\ \{ $ \\
X38\hspace*{12em}$         j\ =\ min(n,m) $ \\
X39\hspace*{12em}$         {\rm do}\ i\in\{1,j\}\ \{ $ \\
X40\hspace*{16em}$             b_{i}\ =\ a_{i} $ \\
X41\hspace*{16em}$             \} $ \\
X42\hspace*{12em}$         \} $ \\
X43\hspace*{ 8em}$     ib_{1}\ =\ m + 2 $ \\
X44\hspace*{ 8em}$     \} $ \\
X45\hspace*{ 4em}$ {\rm else}\ \{ $ \\
X46\hspace*{ 8em}$     ib_{1}\ =\ 1 $ \\
X47\hspace*{ 8em}$     \} $
X\end{quote}     
XLines 34, 43, and 46 do the real work in restoring $IB$.  Lines 36-42 finish
Xcopying the main diagonal of $A$ when it is stored in the new Yale sparse
Xmatrix format.
X 
X% ----------------------------------------------------------------------
X 
X\section{Fortran source code}
X\label{Sec:Source_Code}
X 
XThe Fortran source code for this package is freely available from Netlib
Xas the file linalg/smmp.shar.
X
X
X% ----------------------------------------------------------------------
X 
X%\newpage
X
X\begin{thebibliography}{99}
X 
X\bibitem{BankSmith87}
X  R. E. Bank and R. K. Smith,
X  {\em General sparse elimination requires no permanent integer storage},
X  SIAM J. Sci. Stat. Comp., 8 (1987), pp. 574--584.
X 
X\bibitem{DouglasMiranker87}
X  C. C. Douglas and W. L. Miranker,
X  {\em Constructive interference in parallel algorithms},
X  SIAM J. Numer. Anal., 25 (1988), pp. 376--398.
X 
X\bibitem{DouglasSmith88}
X  C. C. Douglas and B. F. Smith,
X  {\em Using symmetries and antisymmetries to analyze a parallel
X       multigrid algorithm: the elliptic boundary value case}
X  SIAM J. Numer. Anal., 26 (1989), pp. 1439--1461.
X 
X\bibitem{DuffMarrRadi92}
X  I. Duff, M. Marrone, and G. Radicati,
X  {\em A proposal for user level sparse {BLAS}},
X  in preparation.
X
X\bibitem{NewYSMP}
X  S. C. Eisenstat and H. C. Elman and M. H. Schultz and A. H. Sherman,
X  {\em The (new) Yale sparse matrix package },
X  in Elliptic Problem Solvers II, G. Birkhoff and A. Schoenstadt, editors,
X  Academic Press, New York, 1984, pp. 45--52.
X
X\bibitem{YSMP1}
X  S. C. Eisenstat and M. C. Gursky and M. H. Schultz and A. H. Sherman,
X  {\em Yale sparse matrix package I:  the symmetric codes},
X  Int. J. Numer. Methods in Engin., 18 (1982), pp. 1145-1151.
X
X\bibitem{YSMP2}
X  \leavevmode\vrule height 2pt depth -1.6pt width 23pt, 
X  {\em Yale sparse matrix package II:  the nonsymmetric codes},
X  Research Report 114, Department of Computer Science, Yale University,
X  New Haven, CT, 1977.
X
X\bibitem{Heroux}
X  M. A. Heroux,
X  {\em  Proposal for a sparse {BLAS} toolkit},
X  in preparation.
X
X\bibitem{Saad90}
X  Y. Saad,
X  {\em {SPARSKIT}: a basic tool kit for sparse matrix computations},
X  preliminary version, 1990.
X  Available by anonymous ftp from riacs.edu.
X\end{thebibliography}
X 
X% ----------------------------------------------------------------------
X
X\end{document}
END_OF_FILE
if test 26303 -ne `wc -c <'aicm.tex'`; then
    echo shar: \"'aicm.tex'\" unpacked with wrong size!
fi
# end of 'aicm.tex'
fi
if test -f 'doc.txt' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'doc.txt'\"
else
echo shar: Extracting \"'doc.txt'\" \(16929 characters\)
sed "s/^X//" >'doc.txt' <<'END_OF_FILE'
XSparse Matrix Multiply Package (SMMP)
X
X
XRandolph E. Bank
XUniversity of California at San Diego
XDepartment of Mathematics C012
XP.O. Box 109
XLaJolla, CA 92093
X
XCraig C. Douglas
XIBM Research Division
XIBM T. J. Watson Research Center
XP.O. Box 218
XYorktown Heights, NY 10598
X
X
XAbstract:  Routines callable from FORTRAN and C are described
Xwhich implement matrix-matrix multiplication and transposition
Xfor a variety of sparse matrix formats.  Conversion routines
Xbetween the various formats are provided.
X
X
X
X1.  Introduction
X
X     These routines will perform matrix-matrix multiplies,
Xtransposes, and format conversions for sparse matrices.  There
Xare three formats supported.  These include the old and new Yale
Xsparse matrix package formats and the more efficient one for
Xsquare matrices advocated by Bank and Smith.  In each case, only
Xthe nonzeros are stored, not the zeros (or as few as possible).
X
X     These routines were written while both authors were visitors
Xto the Applied Mathematics Center in the Department of
XMathematics, Purdue University.  While they were developed using
Xthe UNIX(tm) f77 compiler on SUN workstations, they should be
Xportable to any compiler with a real FORTRAN-77 compiler.
X
X     In section 2, we describe the sparse matrix formats this
Xpackage supports.  In section 3, we describe the structure of the
Xpackage and the calling sequences of each routine.   In section
X4, we describe the algorithms implemented by the package.
X
X
X
X2.  Sparse Matrix Formats
X
X     Assume that a matrix M = D + L + U, where D is the main
Xdiagonal part of M, L is the strictly lower triangular part of M,
Xand U is the strictly upper triangular part of M.  We define the
Xnumber of nonzeros of M as NZ(M).
X
X     The old Yale sparse matrix format requires three vectors:
X         _______________
X        |               |
X    IA: | IA            | length N + 1
X        |_______________|
X
X         ___________________________________
X        |                                   |
X    JA: | JA                                | length NZ(M) or
X        |___________________________________|        NZ(D+U)
X
X         ___________________________________
X        |                                   |
X     A: | M                                 | length NZ(M) or
X        |___________________________________|        NZ(D+U)
X
XM is stored in row form.  If M is symmetric, the elements of L do
Xnot have to be stored in A.  The first element of row I is
XA(IA(I)).  The length of row I is determined by IA(I+1) - IA(I),
Xwhich is why IA requires N+1 elements instead of the obvious N
Xelements.  The column indices of M are stored in the JA vector.
XFor element A(J), its column index is JA(J).  The elements in a
Xrow may be stored in any order.
X
X     The new Yale sparse matrix format requires two vectors:
X         ___________________________________
X        |               |                   |
X   IJA: | IA            | JA                | length N + 1 + NZ(M) or
X        |_______________|___________________|        N + 1 + NZ(D+U)
X
X         ___________________________________
X        |             | |                   |
X     A: | D           |0| L and U           | length N + 1 + NZ(A) or
X        |_____________|_|___________________|        N + 1 + NZ(D+U)
X
XM is still stored in row form.  The IA-JA vectors of the old
Xformat are combined into a single vector, sometimes referred as
Xan IJA vector.  As before, the first element of row I is
XA(IJA(I)).  In this case, the main diagonal of M is separated out
Xfrom the nonzeros of L and U.  The diagonal for row I is in A(I).
XThere is a zero after D so that the JA and L/U parts of the IJA
Xand A vectors are aligned properly.  Technically, the rows of A
Xshould be stored in ascending column order.  However, this is not
Xenforced.
X
X     The Bank-Smith sparse matrix format requires M to be a
Xsquare matrix with a symmetric (or nearly so) zero structure.  It
Xrequires two vectors:
X         _________________________          
X        |               |         |          
X   IJA: | IA            | JA      |           length N + 1 + NZ(U)
X        |_______________|_________|          
X
X         ___________________________________
X        |             | |  t      |         |
X     A: | D           |0| U       | L       | length N + 1 + NZ(A) or
X        |_____________|_|_________|_________|        N + 1 + NZ(D+U)
X
XWhile M is stored strictly in row form, in a real sense it is
Xstored in both column and row form.  Since we assume that M has a
Xsymmetric zero structure (or L and U are padded by a small number
Xof zeros), we need only store the row indices for U (when U is
Xstored in column form).  These are also the column indices for L
X(when L is stored in row form).  However, we store the transpose
Xof U in row form instead of U. If M is symmetric, the elements of
XL do not have to be stored in A. The first element of column I of
XU is A(IA(I)).  The length of column I is determined by IA(I+1) -
XIA(I), which is why IA requires N+1 elements instead of the
Xobvious N elements.  The row indices of U are stored in the JA
Xvector.  For element A(J), its row index is JA(J).  The elements
Xin a column must be stored in ascending row order.  We define
XLSHIFT to be 0 if M is symmetric and IA(N+1)-IA(1) if M is
Xnonsymmetric.  The first element of L is A(IA(1)+LSHIFT).  L is
Xstored in row format.  The column index of an element
XA(IA(I)+J+LSHIFT) is JA(IA(I)+J).
X
X     For all three sparse matrix formats, we can assume there are
Xthree vectors IA, JA, and A which describe M.  Except for the old
XYale sparse matrix format, the vectors IA and JA are really the
Xsame vector IJA.  We also need a variable DIAGA which is one if
Xthe diagonal is separated from the rest of the nonzeros and zero
Xotherwise.  Last, we need a variable SYMA which is one if M is
Xstored in a symmetric manner and zero otherwise.
X
X
X
X3.  Calling Sequences
X
X     In this section, we describe the five routines which
Xcomprise the package.  These include two routines to multiply
Xmatrices, a routine for the transpose of a matrix, and two
Xroutines to convert between various sparse matrix formats.
X
X     For each routine in this section, the calling sequence
Xassumes distinct IA and JA vectors for each matrix.  Suppose a
Xmatrix is actually stored using an IJA vector.  Then the routine
Xshould be called with IJA as an argument twice, once for each IA
Xand JA.  The IJA vector should not be subscripted to point to
Xeither of the IA or JA parts; the routines will do this
Xautomatically.
X
X     We multiply two sparse matrices, resulting in a third:
X
X        C = AB.
X
XMatrix-matrix multiplication is performed in two steps.  These
Xroutines only support the Yale sparse matrix formats.  First, the
Xnonzero structure of the resulting matrix is determined
Xsymbolically in SYMBMM:
X
X        subroutine SYMBMM (N,M,L, IA,JA,DIAGA, IB,JB,DIAGB,
X                           IC,JC,DIAGC, INDEX)
X
X            integer  N,M,L, IA(*),JA(*),DIAGA,
X                     IB(*),JB(*),DIAGB, IC(*),JC(*),DIAGC,
X                     INDEX(*)
X
XThe number of rows and columns of the matrices are
X
X                rows     columns
X             -------------------
X            |         |         |
X          A |    N    |    M    |
X            |         |         |
X          B |    M    |    L    |
X            |         |         |
X          C |    N    |    L    |
X            |         |         |
X             -------------------
X
XINDEX is a scratch vector of length MAX(L,M,N).  It is used to
Xstore linked lists.  The output of SYMBMM is IC and JC.  They are
Xdependent on the value of DIAGC.
X
X     Once the nonzero structure for C is known, the numerical
Xmatrix-matrix multiply is computed in NUMBMM:
X
X        subroutine NUMBMM (N,M,L, IA,JA,DIAGA,A, IB,JB,DIAGB,B,
X                           IC,JC,DIAGC,C, TEMP)
X
X            integer  N,M,L, IA(*),JA(*),DIAGA,
X                     IB(*),JB(*),DIAGB, IC(*),JC(*),DIAGC
X            real     A(*), B(*), C(*)
X
XTEMP is a scratch vector of length MAX(L,M,N).  It is used to
Xstore partial sums.
X
X     We may also compute the transpose of a matrix, resulting in
Xa second:
X
X             t
X        B = A .
X
XWe do this operation in TRANSP:
X
X        TRANSP (N,M, IA,JA,DIAGA,A, IB,JB,B, MOVE)
X
X            integer  N,M, IA(*),JA(*),DIAGA,
X                     IB(*),JB(*), MOVE
X            real     A(*), B(*)
X
XThe number of rows and columns of the matrices are
X
X                rows     columns
X             -------------------
X            |         |         |
X          A |    N    |    M    |
X            |         |         |
X          B |    M    |    N    |
X            |         |         |
X             -------------------
X
XWe assume that B will use the same diagonal storage method that
Xis used for A.  We do not actually move the elements of A into B
Xunless MOVE is one.
X
X     Finally, we have two routines for converting between one of
Xthe Yale formats and the Bank-Smith format.  This only makes
Xsense when the matrices are square.  The routine YTOBS will
Xconvert a Yale format sparse matrix into the Bank-Smith format:
X
X        YTOBS (N, IA,JA,DIAGA,SYMA,A, IB,JB,B, MOVE)
X
X            integer  N, IA(*),JA(*),DIAGA,SYMA,
X                     IB(*),JB(*), MOVE
X            real     A(*), B(*)
X
XBy definition, DIAGB must be one.  Hence, we do not need it as an
Xargument.  We determine whether or not B should be stored in a
Xsymmetric or nonsymmetric manner from SYMA.  We do not actually
Xmove the elements of A into B unless MOVE is one.
X
X     The routine BSTOY will convert a Bank-Smith format sparse
Xmatrix into one of the Yale formats:
X
X        BSTOY (N, IA,JA,SYMA,A, IB,JB,DIAGB,B, MOVE)
X
X            integer  N, IA(*),JA(*),SYMA,
X                     IB(*),JB(*),DIAGB, MOVE
X            real     A(*), B(*)
X
XWe determine which of the two formats by the value of DIAGB.  We
Xdetermine whether or not B should be stored in a symmetric or
Xnonsymmetric manner from SYMA.  We do not actually move the
Xelements of A into B unless MOVE is one.
X
X
X
X4. Algorithms
X
X     In this section, we describe the algorithms actually
Ximplemented.  We do not show the complete details, but show in a
Xmetalanguage how to do the operations when the matrices are
Xprovided in the old Yale sparse matrix format.
X
X
X4.1  SYMBMM
X
X     Initialization consists of setting up the first row pointer
Xand clearing all of the links:
X
X        ic(1) = 1
X        do i=1,n {
X            index(i) = 0
X            }
X 
XThe main loop consists of three components:  reseting the start
Xcolumn and assuming a zero number of column entries, merging the
Xrow lists, and copying the links into the JC vector:
X
X        do i = 1,n {
X            istart = -1                 <--- initialization
X            length = 0
X            do jj = ia(i),ia(i+1)-1 {   <--- merge row lists
X                j = ja(jj)
X                do k = ib(j),ib(j+1)-1 {
X                    if (index(jb(k)) == 0) {
X                        index(jb(k)) = istart
X                        istart = jb(k)
X                        length = length + 1
X                        }
X                    }
X                }
X            ic(i+1) = ic(i) + length
X            do j = ic(i),ic(i+1)-1 {    <--- row i of jc
X                jc(j) = istart
X                istart = index(istart)
X                index(jc(j)) = 0
X                }
X            }
X
XNote that in the last loop, only the nonzero entries are cleared.
XDue to the small number of entries (with respect to N), it would
Xbe extremely inefficient to clear the entire vector.  The
Xresulting vectors IC and JC contain the nonzero structure of
XC = AB.
X
X
X4.2  NUMBMM
X
X     Initialization consists of setting up the first row pointer
Xand clearing all of the partial sums:
X
X        ic(1) = 1
X        do i=1,n {
X            temp(i) = 0.
X            }
X 
XThe main loop forms the partial sums and then copies the completed
Xsums into the correct locations in the sparse matrix structure:
X 
X        do i = 1,n {
X             do jj = ia(i),ia(i+1)-1 {
X                j = ja(jj)
X                ajj = a(jj)
X                do 20 k = ib(j),ib(j+1)-1 {
X                    temp(jb(k)) = temp(jb(k)) + ajj * b(k)
X                    }
X                }
X            do j = ic(i),ic(i+1)-1 {
X                c(j) = temp(jc(j))
X                temp(jc(j)) = 0.
X                }
X            }
XThe resulting vector C contains the numerical product AB.
X
X
X4.3  TRANSP
X
X     This routine consists of three parts.  We begin by counting
Xindices for each column in order to construct IB:
X
X        do i=2,m+1 {
X            ib(i) = 0
X            }
X        ib(1)=1
X        do i = 1,n { 
X            do j = ia(i),ia(i+1)-1
X                ib(ja(j)+1) = ib(ja(j)+1)+1
X                }
X            }
X        do i = 1,m {
X           ib(i+1) = ib(i)+ib(i+1)
X           }
X 
XNext, we construct JB:
X 
X        do i = 1,n { 
X            do j = ia(i),ia(i+1)-1 {
X                index = ja(j)
X                jb(ib(index)) = i
X                if (move == 1) b(ib(index)) = a(j)
X                ib(index) = ib(index)+1
X                }
X            }
X 
XFinally, we fix up IB:
X 
X        do i=m,2,-1 {
X            ib(i)=ib(i-1)
X            }
X        ib(1)=1
X
XThe resulting vectors IB and JB contain the nonzero structure of
XB = A-transpose.
X
X
X4.4  YTOBS
X
X     This routine consists of six parts.  We begin by
Xestimating how many elements are in each column of the upper
Xtriangular part of the matrix:
X
X        do i = 1,n {
X            ib(i+1) = ia(i+1)-ia(i)
X            }
X 
XNext, we look for upper triangular entries and duplicate entries.
XWe modify JA in order not to search twice for items.
X 
X        do i = 1,n {
X            do 4jj = ia(i),ia(i+1)-1{
X                j = ja(jj)
X                if (j > i) {
X                    ib(i+1) = ib(i+1)-1
X                    ib(j+1) = ib(j+1)+1
X                    do k = ia(j),ia(j+1)-1 {    <--- check for
X                        if (ja(k) == i) {       <--- duplicates
X                            ib(j+1) = ib(j+1)-1
X                            ja(jj) = -j
X                            k = ia(j+1)         <--- break
X                            }    
X                        }    
X                    }    
X                }
X            }
X
X 
XNext, we compute IB:
X 
X        ib(1) = 1
X        do i = 1,n {
X            ib(i+1) = ib(i+1) + ib(i)
X            }
X 
XWe now initialize B:
X 
X        if (move == 1) {
X            do ii = 1,ib(n+1)+lshift-1 {
X                b(ii) = 0.
X                }
X            }
X
XWe now compute JB and restore JA:
X 
X        do i = 1,n {
X            do jj = ia(i),ia(i+1)-1 {
X                j = ja(jj)
X                if (j > i) {
X                    jb(ib(j)) = i
X                    if (move == 1) {
X                        b(ib(j)) = a(jj)
X                        }
X                    ib(j) = ib(j) + 1
X                    }
X                else {
X                    if (j <= 0) {
X                        ja(jj) = -j
X                        if (move == 1 & i == -j) {
X                            b(ib(i)) = a(jj)
X                            }
X                        }
X                    else {
X                        jb(ib(i)) = j
X                        if (move == 1) {
X                            b(ib(i)) = a(jj)
X                            }
X                        ib(i) = ib(i) + 1
X                        }
X                    }
X                }
X            }
X 
XFinally, we fix up IB:
X 
X        do i=n,2,-1 {
X            ib(i) = ib(i-1)
X            }
X        ib(1)=1
X
XThe resulting vectors IB and JB contain the nonzero structure of
XA in the Bank-Smith sparse format.  In addition, the nonzeros of
XB are copied if so requested.
X
X
X4.5  BSTOY
X
X     This routine consists of four parts.  We begin by
Xestimating how many elements are in each column of the matrix:
X
X        ib(1) = 1
X        do i = 1,n {
X            ib(i+1) = ia(i+1) - ia(i) + 1
X            }
X        do i = 1,n {
X            do j = ia(i),ia(i+1)-1 {
X                ib(ja(j)+1) = ib(ja(j)+1) + 1
X                }
X            }
X        do i = 1,n {
X            ib(i+1) = ib(i+1) + ib(i)
X            }
X
XNow deal with the diagonal elements:
X
X        do i = 1,n {
X            jb(ib(i)) = i
X            if (move == 1) {
X                b(ib(i)) = a(i)
X                }
X            ib(i) = ib(i) + 1
X            }
X 
XNow compute JB:
X 
X        do i = 1,n {
X            do jj = ia(i),ia(i+1)-1{
X                j = ja(jj)
X                jb(ib(j)) = i
X                jb(ib(i)) = j
X                if (move == 1) {
X                    b(ib(j)) = a(jj)
X                    b(ib(i)) = a(jj)
X                    }
X                ib(i) = ib(i) + 1
X                ib(j) = ib(j) + 1
X                }
X            }
X 
XFinally, we fix up IB:
X 
X        do i=n,2,-1 {
X            ib(i) = ib(i-1)
X            }
X        ib(1) = 1
X
XThe resulting vectors IB and JB contain the nonzero structure of
XA in the old Yale sparse matrix package format.  In addition, the
Xnonzeros of B are copied if so requested.
X
END_OF_FILE
if test 16929 -ne `wc -c <'doc.txt'`; then
    echo shar: \"'doc.txt'\" unpacked with wrong size!
fi
# end of 'doc.txt'
fi
if test -f 'smmp.f' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'smmp.f'\"
else
echo shar: Extracting \"'smmp.f'\" \(9695 characters\)
sed "s/^X//" >'smmp.f' <<'END_OF_FILE'
Xc=======================================================================
Xc Sparse Matrix Multiplication Package
Xc
Xc Randolph E. Bank and Craig C. Douglas
Xc
Xc na.bank@na-net.ornl.gov and na.cdouglas@na-net.ornl.gov
Xc
Xc Compile this with the following command (or a similar one):
Xc
Xc     f77 -c -O smmp.f
Xc
Xc=======================================================================
X        subroutine symbmm 
X     *                   (n, m, l, 
X     *                    ia, ja, diaga, 
X     *                    ib, jb, diagb,
X     *                    ic, jc, diagc,
X     *                    index)
Xc
X            integer       ia(*), ja(*), diaga,
X     *                    ib(*), jb(*), diagb,
X     *                    ic(*), jc(*), diagc,
X     *                    index(*)
Xc
Xc       symbolic matrix multiply c=a*b
Xc
X        maxlmn = max(l,m,n)
X        do 10 i=1,maxlmn
X   10       index(i)=0
X        if (diagc.eq.0) then
X            ic(1)=1
X        else
X            ic(1)=n+2
X        endif
X        minlm = min(l,m)
X        minmn = min(m,n)
Xc
Xc    main loop
Xc
X        do 50 i=1,n
X            istart=-1
X            length=0
Xc
Xc    merge row lists
Xc
X            do 30 jj=ia(i),ia(i+1)
Xc    a = d + ...
X                if (jj.eq.ia(i+1)) then
X                    if (diaga.eq.0 .or. i.gt.minmn) goto 30
X                    j = i
X                else
X                    j=ja(jj)
X                endif
Xc    b = d + ...
X                if (index(j).eq.0 .and. diagb.eq.1 .and. j.le.minlm)then
X                    index(j)=istart
X                    istart=j
X                    length=length+1
X                endif
X                do 20 k=ib(j),ib(j+1)-1 
X                    if(index(jb(k)).eq.0) then
X                        index(jb(k))=istart
X                        istart=jb(k)
X                        length=length+1
X                    endif
X   20           continue
X   30       continue
Xc
Xc   row i of jc
Xc
X            if (diagc.eq.1 .and. index(i).ne.0) length = length - 1
X            ic(i+1)=ic(i)+length
X            do 40 j= ic(i),ic(i+1)-1
X                if (diagc.eq.1 .and. istart.eq.i) then
X                    istart = index(istart)
X                    index(i) = 0
X                endif
X                jc(j)=istart
X                istart=index(istart)
X                index(jc(j))=0
X   40       continue
X            index(i) = 0
X   50   continue
X        return
X        end
X        subroutine numbmm 
X     *                   (n, m, l,
X     *                    ia, ja, diaga, a,
X     *                    ib, jb, diagb, b,
X     *                    ic, jc, diagc, c,
X     *                    temp)
Xc
X            integer       ia(*), ja(*), diaga,
X     *                    ib(*), jb(*), diagb,
X     *                    ic(*), jc(*), diagc 
Xc
X            real          a(*), b(*), c(*), temp(*)
Xc
Xc       numeric matrix multiply c=a*b
Xc
X        maxlmn = max(l,m,n)
X        do 10 i = 1,maxlmn
X 10         temp(i) = 0.
X        minlm = min(l,m)
X        minln = min(l,n)
X        minmn = min(m,n)
Xc
Xc   c = a*b
Xc
X        do 50 i = 1,n
X             do 30 jj = ia(i),ia(i+1)
Xc    a = d + ...
X                if (jj.eq.ia(i+1)) then
X                    if (diaga.eq.0 .or. i.gt.minmn) goto 30
X                    j = i
X                    ajj = a(i)
X                else
X                    j=ja(jj)
X                    ajj = a(jj)
X                endif
Xc    b = d + ...
X                if (diagb.eq.1 .and. j.le.minlm) 
X     *              temp(j) = temp(j) + ajj * b(j)
X                do 20 k = ib(j),ib(j+1)-1
X 20                 temp(jb(k)) = temp(jb(k)) + ajj * b(k)
X 30         continue
Xc    c = d + ...
X            if (diagc.eq.1 .and. i.le.minln) then
X                c(i) = temp(i)
X                temp(i) = 0.
X            endif
X            do 40 j = ic(i),ic(i+1)-1
X                c(j) = temp(jc(j))
X 40             temp(jc(j)) = 0.
X 50     continue
X        return
X        end
X        subroutine transp
X     *                   (n, m,
X     *                    ia, ja, diaga, a,
X     *                    ib, jb,        b,
X     *                    move)
Xc
X            integer       ia(*), ja(*), diaga,
X     *                    ib(*), jb(*),
X     *                    move  
Xc
X            real          a(*), b(*)
Xc
Xc       compute b = a(transpose)
Xc
Xc       first make ib
Xc
X        do 10 i=1,m+1
X   10       ib(i)=0
X        if (move.eq.1) then
X            do 15 i =1,m+1
X   15           b(i) = 0.
X        endif
X        if (diaga.eq.1) then
X            ib(1)=m + 2
X        else
X            ib(1)=1
X        endif
Xc
Xc       count indices for each column 
Xc
X        do 30 i=1,n   
X            do 20 j=ia(i),ia(i+1)-1
X                ib(ja(j)+1)=ib(ja(j)+1)+1
X   20       continue
X   30   continue
X        do 40 i=1,m
X   40      ib(i+1)=ib(i)+ib(i+1)
Xc
Xc       now make jb
Xc
X        do 60 i=1,n   
X            do 50 j=ia(i),ia(i+1)-1
X                index=ja(j)
X                jb(ib(index))=i
X                if (move.eq.1) b(ib(index)) = a(j)
X                ib(index)=ib(index)+1
X   50       continue
X   60   continue
Xc
Xc       now fixup ib 
Xc
X        do 70 i=m,2,-1
X   70       ib(i)=ib(i-1)
X        if (diaga.eq.1) then
X            if (move.eq.1) then
X                j = min(n,m)
X                do 80 i = 1,j
X   80               b(i) = a(i)
X            endif
X            ib(1)=m + 2
X        else
X            ib(1)=1
X        endif
X        return
X        end
X        subroutine ytobs
X     *                  (n,
X     *                   ia, ja, diaga, syma, a,
X     *                   ib, jb,              b,
X     *                   move)
Xc
X            integer     ia(*), ja(*), diaga, syma,
X     *                  ib(*), jb(*), move
Xc
X            real        a(*), b(*)
Xc
Xc       create the bank-smith data structures b from the
Xc       corresponding yale data structures a
Xc
X        do 10 i=1,n
X   10       ib(i+1)=ia(i+1)-ia(i)
Xc
Xc       look for upper triangular entries and duplicate entries
Xc
X        do 50 i=1,n
X            do 40 jj=ia(i),ia(i+1)-1
X                j=ja(jj)
X                if (i.eq.j) then
X                    ib(i+1)=ib(i+1)-1
X                    ja(jj) = -j
X                endif
X                if(j.gt.i) then
X                    ib(i+1)=ib(i+1)-1
X                    ib(j+1)=ib(j+1)+1
Xc
Xc       check for duplicates
Xc
X                    do 20 k=ia(j),ia(j+1)-1
X                        if(ja(k).eq.i) then
X                            ib(j+1)=ib(j+1)-1
X                            ja(jj)=-j
X                            go to 30
X                        endif
X   20               continue
X   30               continue
X                endif
X   40       continue
X   50   continue
Xc
Xc       compute ib
Xc
X        ib(1)=n + 2
X        do 60 i=1,n
X   60       ib(i+1)=ib(i+1)+ib(i)
Xc
Xc       initialize b if move = 1
Xc
X        if (move.eq.1) then
X            lshift = 0
X            if (syma.eq.0) lshift = ib(n+1) - ib(1)
X            do 62 ii = 1,ib(n+1)+lshift-1
X   62           b(ii) = 0.
X            if (diaga.eq.1) then
X                do 64 ii = 1,n
X   64               b(ii) = a(ii)
X            endif
X        endif
Xc
Xc       compute jb
Xc
X        do 80 i=1,n
X            do 70 jj=ia(i),ia(i+1)-1
X                j=ja(jj)
X                if(j.gt.i) then
X                    jb(ib(j))=i
X                    if (move.eq.1) b(ib(j)) = a(jj)
X                    ib(j)=ib(j)+1
X                else
X                    if(j.le.0) then
X                        ja(jj)=-j
X                        if (move.eq.1 .and. i.eq.-j) b(i) = a(jj)
X                    else
X                        jb(ib(i))=j
X                        if (move.eq.1) b(ib(i)+lshift) = a(jj)
X                        ib(i)=ib(i)+1
X                    endif
X                endif
X   70       continue
X   80   continue
Xc
Xc       fixup ib
Xc
X        do 90 i=n,2,-1
X   90       ib(i)=ib(i-1)
X        ib(1)=n + 2
X        return
X        end
X        subroutine bstoy
X     *                  (n,
X     *                   ia, ja,        syma, a,
X     *                   ib, jb, diagb,       b,
X     *                   move)
Xc
X            integer      ia(*), ja(*),        syma,
X     *                   ib(*), jb(*), diagb, 
X     *                   move
Xc
X            real         a(*), b(*)
Xc
Xc       create the yale data structures b from the
Xc       corresponding bank-smith data structures a
Xc
Xc       compute ib
Xc
X        if (diagb.eq.1) then
X            ib(1) = n + 2
X            icor = 0
X            if (move.eq.1) then
X                lshift = 0
X                if (syma.eq.0) lshift = ia(n+1) - ia(1)
X                do 2 i = 1,n
X    2               b(i) = a(i)
X            endif
X        else
X            ib(1) = 1
X            icor = 1
X        endif
X        do 10 i=1,n
X   10       ib(i+1)=ia(i+1)-ia(i)+icor
X        do 30 i=1,n
X            do 20 j=ia(i),ia(i+1)-1
X                ib(ja(j)+1)=ib(ja(j)+1)+1
X   20       continue
X   30   continue
Xc
X        do 40 i=1,n
X   40       ib(i+1)=ib(i+1)+ib(i)
X        if (diagb.eq.0) then
X            do 45 i = 1,n
X                jb(ib(i)) = i
X                if (move.eq.1) b(ib(i)) = a(i)
X   45           ib(i) = ib(i) + 1
X        endif
Xc
Xc       now compute jb
Xc
X        do 60 i=1,n
X            do 50 jj=ia(i),ia(i+1)-1
X                j = ja(jj)
X                jb(ib(j))=i
X                jb(ib(i))=j
X                if (move.eq.1) then
X                    b(ib(j)) = a(jj)
X                    b(ib(i)) = a(jj+lshift)
X                endif
X                ib(i)=ib(i)+1
X                ib(j)=ib(j)+1
X   50       continue
X   60   continue
Xc
Xc       fixup ib
Xc
X        do 70 i=n,2,-1
X   70       ib(i)=ib(i-1)
X        if (diagb.eq.1) then
X            ib(1)=n+2
X        else
X            ib(1)=1
X        endif
X        return
X        end
END_OF_FILE
if test 9695 -ne `wc -c <'smmp.f'`; then
    echo shar: \"'smmp.f'\" unpacked with wrong size!
fi
# end of 'smmp.f'
fi
echo shar: End of shell archive.
exit 0