#!/bin/sh
# This is a shell archive, meaning:
# 1. Remove everything above the #!/bin/sh line.
# 2. Save the resulting text in a file.
# 3. Execute the file with /bin/sh (not csh) to create the files:
#	mpsim
# This archive created: Thu Feb  9 09:07:05 1989
# By:	Tom Dunigan 576-2522 ()
# From: dunigan@msr.EPM.ORNL.GOV (Tom Dunigan 576-2522)
export PATH; PATH=/bin:$PATH
if test ! -d 'mpsim'
then
	mkdir 'mpsim'
fi
cd 'mpsim'
if test ! -d 'src'
then
	mkdir 'src'
fi
cd 'src'
if test -f 'CHANGES'
then
	echo shar: over-writing existing file "'CHANGES'"
fi
cat << \SHAR_EOF > 'CHANGES'
8/22/86 make pause sigpause for 4.2ish machines so will work with SUN

8/1/87  fix DATLTH waste;  add System V conditionals
	makefile.sysv userin.c simlib.c mpsim.c

10/7/87	v1.2, add mypid() and global send  probemsg
	userin.c seqftn.f ftnsubs.c simlib.c drive.c mpsim.man

3/22/88 v 1.3 fix global send problem in drive.c

8/2/88	v2.0  iPSC/2 stuff, from new version of smpsim
	     logfile no longer append
SHAR_EOF
if test -f 'README'
then
	echo shar: over-writing existing file "'README'"
fi
cat << \SHAR_EOF > 'README'
		mpsim     portable hypercube simulator
		   Tom Dunigan,  ORNL

mpsim consists of a a simulator driver (mpsim) and a library (simlib.a)
for building host and node programs to simulate Intel hypercube
on Intel 310 xenix, SUN, pc AT xenix, Unix 4.2, and Sequents.
Uses forks and pipes, creating subprocesses under unix for each node
and the host  (much like the Intel xsim simulator).  Limited to
d3 cube on xenix (or d4 if xenix is configured properly).  4.2 supports
d4's.   nstats and ccplot (from other ORNL simulation package)
can be used to analyze trace file.

You should be able to build your cube/host programs with no changes.
Use  x8000 as the "node id" for the host process.

Sample script sbld is included to show how build simulator programs.
Then just   say mpsim  and type  ? to its prompt and it will
show you how to "load" the host and node programs and start the
simulation.
To create mpsim and simlib.a just do a make with makefile for
the appropriate machine.

There is a version for Sequent that uses shared memory rather
than pipes.

Present documentation is only a manual entry -- mpsim.man
and technical report "A Portable Hypercube Simulator", Dunigan, ORNL/TM-10410
SHAR_EOF
if test -f 'makefile'
then
	echo shar: over-writing existing file "'makefile'"
fi
cat << \SHAR_EOF > 'makefile'
SIMSRC = mpsim.c drive.c userin.c
SIMOBJ = mpsim.o drive.o userin.o
LIBSRC = simlib.c ftnsubs.c
LIBOBJ = simlib.o ftnsubs.o

CFLAGS = -O -DBCOPY

all: mpsim simlib

mpsim: $(SIMOBJ)
	cc  -o mpsim $(SIMOBJ)

$(SIMOBJ): sim.h simlib.h

simlib: $(LIBOBJ)
	ar cr simlib.a $(LIBOBJ)
	ranlib simlib.a

$(LIBOBJ): simlib.h

shar: 
	shar CHANGES README makefile* sbld* sfbld* *.man *.h *.c *.f > tmp.shar
SHAR_EOF
if test -f 'makefile.seq'
then
	echo shar: over-writing existing file "'makefile.seq'"
fi
cat << \SHAR_EOF > 'makefile.seq'
# seq make  -P8
SIMSRC = mpsim.c drive.c userin.c
SIMOBJ = mpsim.o drive.o userin.o
LIBSRC = simlib.c ftnsubs.c seqftn.f
LIBOBJ = simlib.o ftnsubs.o seqftn.o

CFLAGS = -O -DBCOPY
FFLAGS = -e

all: & mpsim simlib

mpsim: & $(SIMOBJ)
	cc  -o mpsim $(SIMOBJ)

$(SIMOBJ): sim.h simlib.h

simlib: & $(LIBOBJ)
	ar cr simlib.a $(LIBOBJ)
	ranlib simlib.a

$(LIBOBJ): simlib.h
SHAR_EOF
if test -f 'makefile.sysv'
then
	echo shar: over-writing existing file "'makefile.sysv'"
fi
cat << \SHAR_EOF > 'makefile.sysv'
# SYS V, Release 2.0 for PC/AT.
SIMSRC = mpsim.c drive.c userin.c
SIMOBJ = mpsim.o drive.o userin.o
LIBSRC = simlib.c
LIBOBJ = simlib.o

CFLAGS = -Ml -DSYSV

all: mpsim simlib 

mpsim: $(SIMOBJ)
	cc -Ml -o mpsim $(SIMOBJ)

$(SIMOBJ): sim.h simlib.h

simlib: $(LIBOBJ)
	ar cr simlib.a $(LIBOBJ)

$(LIBOBJ): simlib.h

SHAR_EOF
if test -f 'makefile.sysv3'
then
	echo shar: over-writing existing file "'makefile.sysv3'"
fi
cat << \SHAR_EOF > 'makefile.sysv3'
# SYS V, Release 2.0 for PC/AT.
SIMSRC = mpsim.c drive.c userin.c
SIMOBJ = mpsim.o drive.o userin.o
LIBSRC = simlib.c ftnsubs.c
LIBOBJ = simlib.o ftnsubs.o

CFLAGS =  -DSYSV

all: mpsim simlib 

mpsim: $(SIMOBJ)
	cc -o mpsim $(SIMOBJ)

$(SIMOBJ): sim.h simlib.h

simlib: $(LIBOBJ)
	ar cr simlib.a $(LIBOBJ)

$(LIBOBJ): simlib.h

SHAR_EOF
if test -f 'makefile.vax'
then
	echo shar: over-writing existing file "'makefile.vax'"
fi
cat << \SHAR_EOF > 'makefile.vax'
SIMSRC = mpsim.c drive.c userin.c
SIMOBJ = mpsim.o drive.o userin.o
LIBSRC = simlib.c ftnsubs.c
LIBOBJ = simlib.o ftnsubs.o

CFLAGS = -O -DBCOPY

all: mpsim simlib

mpsim: $(SIMOBJ)
	cc  -o mpsim $(SIMOBJ)

$(SIMOBJ): sim.h simlib.h

simlib: $(LIBOBJ)
	ar cr simlib.a $(LIBOBJ)
	ranlib simlib.a

$(LIBOBJ): simlib.h

shar: 
	shar README makefile* sbld* sfbld* *.man *.h *.c *.f > tmp.shar
SHAR_EOF
if test -f 'makefile.xenix'
then
	echo shar: over-writing existing file "'makefile.xenix'"
fi
cat << \SHAR_EOF > 'makefile.xenix'
SIMSRC = mpsim.c drive.c userin.c
SIMOBJ = mpsim.o drive.o userin.o
LIBSRC = simlib.c
LIBOBJ = simlib.o

CFLAGS = -Ml -DXENIX

all: mpsim simlib

mpsim: $(SIMOBJ)
	cc -Ml -o mpsim $(SIMOBJ) -lx

$(SIMOBJ): sim.h simlib.h

simlib: $(LIBOBJ)
	ar cr simlib.a $(LIBOBJ)
	ranlib simlib.a

$(LIBOBJ): simlib.h
SHAR_EOF
if test -f 'sbld'
then
	echo shar: over-writing existing file "'sbld'"
fi
cat << \SHAR_EOF > 'sbld'
cc -o $1  $1.c src/simlib.a -lm
SHAR_EOF
chmod +x 'sbld'
if test -f 'sbld.xenix'
then
	echo shar: over-writing existing file "'sbld.xenix'"
fi
cat << \SHAR_EOF > 'sbld.xenix'
cc -o $1 -Ml $1.c simlib.o -lm -lx
SHAR_EOF
chmod +x 'sbld.xenix'
if test -f 'sfbld'
then
	echo shar: over-writing existing file "'sfbld'"
fi
cat << \SHAR_EOF > 'sfbld'
fortran -e -o $1  $1.f src/simlib.a
SHAR_EOF
chmod +x 'sfbld'
if test -f 'sfbld.seq'
then
	echo shar: over-writing existing file "'sfbld.seq'"
fi
cat << \SHAR_EOF > 'sfbld.seq'
fortran -e -o $1  $1.f src/simlib.a
SHAR_EOF
chmod +x 'sfbld.seq'
if test -f 'sfbld.vax'
then
	echo shar: over-writing existing file "'sfbld.vax'"
fi
cat << \SHAR_EOF > 'sfbld.vax'
f77 -o $1  $1.f src/simlib.a
SHAR_EOF
chmod +x 'sfbld.vax'
if test -f 'sfbld.xenix'
then
	echo shar: over-writing existing file "'sfbld.xenix'"
fi
cat << \SHAR_EOF > 'sfbld.xenix'
echo Fortran compile and link with ornl ipsc simulator.
ftn286 $1.f
bnd286 -xc -ss '"stack(+1000H)"' -ep -oj $1 -- \
	$1.obj				\
	/usr/local/simlib.lib   \
	/usr/intel/lib/osisl.obj	\
	/usr/intel/lib/cel287.lib	\
	/usr/intel/lib/f286r?.lib	\
	/usr/intel/lib/80287.lib	\
	/usr/intel/lib/xudil.lib	\
	/usr/intel/lib/xnxudi.lib	\
	/usr/ipsc/lib/sim/xsimlib.lib	\
	/usr/intel/lib/Llibc.lib	\
	/usr/intel/lib/Llibx.lib
SHAR_EOF
chmod +x 'sfbld.xenix'
if test -f 'mpsim.man'
then
	echo shar: over-writing existing file "'mpsim.man'"
fi
cat << \SHAR_EOF > 'mpsim.man'
.TH mpsim L "2 August 1988  v2.0"
.UC 4
.SH NAME
mpsim - portable message-passing multiprocessor simulator
.SH SYNOPSIS
.I mpsim
commands:
.nf
.sp 1
\f3c [filename]\f1			disable logging or log to \f2filename\f3
.br
h host\f1				load host with file \f2host\f3
.br
\f3l [-c dim] [-n nodeno] [-p pid] node\f1
.br
				load \f2nodeno\f1 node or all nodes of a
				 cube of dimension \f2dim\f1 with file
				 \f2node
.br
\f3q\f1				quit
.br
\f3s\f1				start simulation
.br
\f3t [on]\f1				enable or disable trace
\f3u file\f1                            use file for command input
.fi
.sp 2
The host and node programs should be linked with the library
.I simlib.a
which contains the following subroutines:
.ft 3
.nf

int copen(int pid);
void cclose();

void send(int d, int type, char *msg, int msglth,
	int dstnode, int dstpid);
void sendw(int d, int type, char *msg, int msglth,
	int dstnode, int dstpid);
void sendmsg(int d, int type, char *msg, int msglth,
	int dstnode, int dstpid);

void recv(int d, int type, char *msg, int maxlth,
	int *msglth, int *srcnode, int *srcpid);
void recvw(int d, int type, char *msg, int maxlth,
	int *msglth, int *srcnode, int *srcpid);
void recvmsg(int d, int *type, char *msg, int maxlth,
	int *msglth, int *srcnode, int *srcpid);

int probe(int d, type);
int probemsg(int pid);
int status(int d);

int mynode();
int mypid();
int cubedim();
int clock();
void syslog(int pid, char *msg);
void flick();

void setpid(int pid);
void crecv(int type, int *msg, int maxlth);
int irecv(int type, int *msg, int maxlth);
void csend(int type, int *msg, int msglth, int dstnode, int dstpid);
int isend(int type, int *msg, int msglth, int dstnode, int dstpid);
int myhost();
int mclock();
int infocount();
int infonode();
int infopid();
int infotype();
int msgdone(int id);
int msgwait(int id);
int iprobe(int type);
.fi
.ft 1
.SH DESCRIPTION
A simulator driver task,
.I mpsim,
provides a set of commands for loading and controlling a
set of hypercube application programs built with the
simulator library
.I simlib.a.
The simulator can be used to execute programs developed
for the Intel hypercubes (iPSC/1 or iPSC/2), and a trace file is provided to
assist in debugging or performance analysis.
The simulator uses the UNIX fork and pipe facilities
and has been tested on a Intel 310 with XENIX, an IBM PC/AT
with XENIX,
an Intel 301 with System V 3.0, a DEC VAX with UNIX 4.2,
an Encore, and a Sequent Balance 8000
with DYNIX.
Limited testing has been done on System V.
Since the simulation will spawn multiple processes, you may wish
to use
.I nice(1)
to reduce system load.
Note that simulation on the Sequent or Encore will utilize multiple processors.
The simulation does not provide the detailed timings, large number 
of processors, or
message-passing model of the simulator described in
.I intel(l),
but does permit porting of programs between the Intel cube
and the simulator with no source code changes.
In addition, there are no restrictions on the use of COMMON
or global variables in C.
The programmer compiles and links his host and node programs
with 
.I simlib.a
and then runs
.I mpsim
to load the application programs into the simulator cube and
start the simulation.
.PP
The 
.I c
commmand (cubelog) turns off logging with no arguments, or if
a file name is provided, enables logging to the given file.
The 
.I h
command (host) specifies an executable file that will be the
application host program.
The
.I l
command (load -- you may substitute
.I m
for
.I l)
loads the simulator nodes with the program
.I node.
A specific node may be loaded with the
.I -n
option, and a specifici process id (\f2pid\f1) may be assigned
with
.I -p
(The default pid is 0.  The host program must use \f2setpid()\f1
for version 2 subroutines.)
.I Dim
specifies the dimension of the cube and must be in the range 0-4.
(On some XENIX systems the maximum dimension may be 3.)
The
.I t
command (trace) enables or disables tracing of simulator events
\f2send\f1's and \f2recv\f1's.
Once all application programs are "loaded" the 
.I s
command (start) will start the simulation, you can exit with the
.I q
command (quit).
You may place commands for \f2mpsim\f1 in a file and use the
.I u
command to instruct \f2mpsim\f1 to read the file for command input.
If all processes exit then the simulator will exit, otherwise
it will be necessary to use  CTRL-C to terminate the simulation.
A sample session might be
.ft 2
.sp 1
 	cc -o host host.c simlib.a -lm
 	cc -o node node.c simlib.a -lm
 	mpsim
 	  t on
 	  l -c 3 node
 	  h host
 	  s
.ft 1
.PP
The functions provided in
.I simlib.a
mirror those described in the Intel
.I 
iPSC User's Guide.
.ft 1
The programmer must first establish one or more cube communication
channel data structures with calls to
.I copen.
.I copen
takes a value to be used as process identifier and
returns a descriptor, of type int, that is used in subsequent
message-passing functions.
A process is addressed by its node number and process identifier.
.I cclose
frees the channel data structure.
.PP
.I send
and
.I sendw
send the message pointed to by msg to the process at node dstnode
with process id dstpid.
The type and size of the message (msglth)  in bytes are also provided.
.I send
returns immediately, but one cannot use the message area until
.I status
returns FREE (0), indicating that the kernel has sent the message.
.I sendw
does not return until the message has been sent.  (This does not
imply that the message has been received.)
.I sendmsg
behaves exactly like 
.I sendw
but is intended as the host version for compatibilty with INTEL cube.
If \f2dstnode\f1 is -1, then the message is sent to all other nodes
in the cube.
One may send to the other nodes of a dimension-d subcube
containing the sending node
by setting \f2dstnode\f1 to \f2d - cubedim() - 1\f1.
.PP
.I recv
and
.I recvw
await the arrival of a message of the given type for the node
and process id associated with int d.
The functions provide addresses to store the message, the actual
length of the message, and the node and process id of the sender.
For
.I recvw
the process blocks until a message of the given type arrives.
For
.I recv
the process may continue processing after issuing the
.I recv,
and when 
.I status
returns a value of FREE (0), then a message of the given type has arrived.
Upon receipt of the message, the simulator sets the srcnode and srcpid to
those of the sender, sets the msglth to the length of the
received message, and copies the message into msg.
No more than maxlth bytes are copied.
Messages are handled in a FIFO fashion.
.I recvmsg
behaves like
.I recvw
except it does not discriminate on message type, rather the type
of the message is returned along with the message in accordance with
the INTEL cube.
.I sendmsg
and
.I recvmsg
are intended (by INTEL) to be used only by the host processes,
but the simulator permits node usage as well.
.PP
.I probe
determines if a message of the given type is available
for the node and process id associated with the given int.
If a message is available,
.I probe
returns the length of the message;
otherwise, a value of -1 is returned.
One must issue a 
.I recv
actually to fetch the message.
Note, the channel data structure should not be in use
by other message-passing functions.
On the host,
.I probemsg
determines if a message for the given process id is available.
If a message is available, a value of 1 is returned;
otherwise, a value of 0 returned.
If the process id is -1, then 1 is returned if a message
is waiting on the host for any process id.
.I status
returns a value of BUSY (1) or FREE (0) indicating whether the given
int data structure is in use or not.
For
.I send,
BUSY indicates that the kernel has not yet sent the message.
For
.I recv,
BUSY indicates that the desired message has not arrived.
.PP
.I mynode
returns the node number of the process.
The host has a node number of x8000 (32768).
.I mypid
presently returns 0.
The dynamic loader functions are not supported by the simulator.
.I cubedim
returns the dimension of the cube.
.I clock
returns the present value of the time-of-day clock in milliseconds.
.I syslog
places the given message and pid in the trace file.
.I flick
relinquishes control from the given process to other runnable
processes.
.I flick
is usually used in busy-wait conditions with
.I status
following a
.I recv 
or with
.I probe.
.PP
Note that 
.I mpsim
uses four file descriptors (logical unit numbers) in managing
the simulation through UNIX pipes.
The file descriptors used (and not available to your application)
are 10, 11, 12, and 13.
.SH VERSION 2 SUBROUTINES
The second generation of Intel hypercubes (iPSC/2) uses a slightly different
set of message passing routines.
The new routines do not require \f2copen\f1.
Information returned by \f2recvw\f1 are now returned by the \f2info\f1
functions.
.SH POST PROCESSORS
If tracing has been enabled
then a trace file is produced with simulator data that can be
summarized by 
.I nstats
or
.I ccplot.
.I nstats tracefile
will produce a per-node summary of compute time and sends and receives.
.I ccplot tracefile > plotfile
will produce a plotfile that can be plotted with various plotting
programs such as
.I graph(1).
.SH FORTRAN
The message passing subroutines may also be called from 
.I f77
programs.
.SH FILES
The following files are provided; the actual location is
site dependent.
.br
simlib.a	simulator subroutines
.br
mpsim		simulator driver
.br
sbld		sample C build script
.br
sfbld		sample f77 build script
.SH SEE ALSO
ppsim(l), smpsim(l) shared-memory version for Sequent,
hep(l), intel(l), Intel's
.I
iPSC User's Guide
and "A Portable Hypercube Simulator", Dunigan, ORNL/TM-10410.
A version of the simulator (dcube) is available that uses a
networked set of UNIX engines.
.SH BUGS
The delay in message passing is due to delays in UNIX pipes
and does not reflect a hypercube structure.
Messages are passed through the simulator driving task
.I mpsim.
The clock used is just the computer's time of day clock
and has a resolution of only 20 milliseconds.
Intel's notion of running multiple processes on a single 
hypercube node is not supported.
The
.I handler
function and the Intel dynamic loader 
functions are not presently implemented.
For the version 2 subroutines, the maximum message size is 256,000 bytes.
The typesel field in crecv() supports only wildcard (-1) and specific
type; the type mask is not currently supported.
.SH AUTHOR
T. H. Dunigan ORNL
SHAR_EOF
if test -f 'sim.h'
then
	echo shar: over-writing existing file "'sim.h'"
fi
cat << \SHAR_EOF > 'sim.h'
/* sim.h		thd  8/2/88
 *  simulator definitions
 */

#define MAXNODES 16
#define MAXDIM 4
#define FLTH 40
#define LOGNM "SIMLOG"
#define WAIT 0

struct NodeTable {
	struct MsgHdr *nt_recvq; /* pointer to messages waiting to be recvd */
				  /* must be first in list */
	int nt_upid;		/* unix pid */
	int nt_mynode;          /* FUTURE  dynamic load node number */
	int nt_mypid;           /* pid from load or setpid */
	char nt_filnam[FLTH];	/* load module file name */
	int nt_state;		/* waiting */
	int nt_type, nt_pid;	/* type and pid waiting for */
	char *nt_bldptr;	/* msg assembly/dis ptr */
				/* in senders struct pointing to recvers q*/
} node[MAXNODES+1];

int running;
int CubDim;
int trace;
int debug;
char logfile[FLTH];
int logging;
FILE *lf;
long clock(), sttime;
SHAR_EOF
if test -f 'simlib.h'
then
	echo shar: over-writing existing file "'simlib.h'"
fi
cat << \SHAR_EOF > 'simlib.h'
/* simlib.h		thd 8/2/88  */

  /*  we use 3 pipes
   *	MSG	r/w by both child and sim controlled by pause/signal
   *	REQ	written by all children, read by sim -- ASSUMES the
   *		two byte message is written contiguous in pipe
   *	ACK	written by child read by sim in puase/signal sequence
   */
  /* pipe fds */
#define MSGIN	10
#define MSGOUT	11
#define	ACK	12
#define REQ	13
#define REQOUT	14
#define ACKOUT	15

#define HOST 0x8000
#define FREE 0
#define ANY -1
#define BUSY 1
#define UNUSED -1
#define TRUE 1
#define FALSE 0
#define MAXLTH 256000
#define MAXPID 32767

  /*  request types */
#define DIM 1
#define SEND 2
#define RECV 3
#define RECVW 4
#define PROBE 5
#define STAT 6
#define STATW 7
#define SYSLOG 8
#define MSGWAIT 9
#define CPROBE 10
#define LWAIT 11
#define LWAITA 12
#define PROBEMSG 13
#define GETCUBE 14
#define IPROBE 15
#define IRECV 16
#define ISEND 17
#define RELCUBE 18
#define CRECV 19
#define LKILL 20
#define LOAD 21

#define DATLTH 1000
#define MAGIC -3571
  /* message header and recvq entry */
struct MsgHdr {
	struct MsgHdr *mh_next;	/* recvq chain (must be first in struct) */
	int mh_magic;		/* qa */
	int mh_req;		/* request type */
	int mh_upid;		/* unix pid */
	int mh_me;		/* my node id */
	int mh_mepid;		/* my pid when i send */
	int mh_node;		/* node to or from */
	int mh_pid;		/* from pid */
	int mh_type;		/* msg type */
	int mh_rcvlth;		/* max lth on recv */
	int mh_lth;		/* data field lth */
	char mh_data[DATLTH];	/* data*/
};
#define HDRLTH sizeof(struct MsgHdr)-DATLTH
SHAR_EOF
if test -f 'drive.c'
then
	echo shar: over-writing existing file "'drive.c'"
fi
cat << \SHAR_EOF > 'drive.c'
/* drive.c		thd    8/2/88
 *   main control loop
 *   read pipe looking for requests and perform
 */
#include <stdio.h>
#include <signal.h>
#include "sim.h"
#include "simlib.h"
struct MsgHdr *prev;  /* retainer for srchq */
struct MsgHdr *malloc();
char *reqnm[];

drive(){
	struct MsgHdr *m, *mp, *srchq();
	int n, req, lth, i, j, tfnd, nd, *ip, rcving;
	int d, waking, gsend;
	long ticks;

	m = malloc(HDRLTH+MAXLTH);  /* sim msg recv area */
	 if(m==NULL){
		fprintf(stderr,"mpsim init malloc failed\n");
		finish();
	}
	ip = (int *)m->mh_data;
	if (trace && logging){
		fprintf(lf,"mpsim trace start clock 0\n");
	  	fprintf(lf,"cnt\t%d\tclock\t%ld\n",running,0);
	}
	sttime = clock() - 20;

	for(;;){		/* loop forever */
	  ticks = clock() - sttime;
	  for(i=0;i <= MAXNODES; i++)  /* see who has died */
	   if (node[i].nt_state != UNUSED && kill(node[i].nt_upid,0) == -1){
			running--;   /*no such pid anymore */
			if (trace && logging){
			 fprintf(lf,"texit node %d clock %d\n",
			  i == MAXNODES ? HOST : i,ticks);
	  		 fprintf(lf,"cnt\t%d\tclock\t%ld\n",running,ticks);
			}
			node[i].nt_state = UNUSED;
	  }
	  if (!running)finish();
	  n=mget(m);		/* get a message request */
	  if (n==0)return;  /* no processes */
	  req = m->mh_req;
	  if (debug>99) fprintf(stderr,"mpsim read  req %d\n",req);
	  if ( m->mh_magic != MAGIC){
		printf("mpsim read failed  bad MAGIC\n");
		finish();
	  }
	  nd=m->mh_me;  /* node, index into nt */
	  if (nd==HOST) nd = MAXNODES;
	  tfnd=m->mh_node;  /* to/from node */
	  if (tfnd==HOST) tfnd = MAXNODES;
	  rcving = 0;

	  switch(req){		/*  handle request */
	   case DIM:		/* return dim and mynode */
		for(i=0;i<=MAXNODES;i++) if(node[i].nt_upid==m->mh_upid)n=i;
		if (n==MAXNODES) n= HOST;
		*ip = CubDim;
		*(ip+1) = node[nd].nt_mypid;
		m->mh_me = n;
		m->mh_lth = 8;
		if (trace && logging)
		 fprintf(lf,"start clock %ld upid %d running %d\n",
		  ticks,m->mh_upid,m->mh_me);
		msend(m->mh_upid,m);
		break;
	  case RECV:
	  case RECVW:
	  case MSGWAIT:
		if (trace && logging){
		 rcving = 1;
		 fprintf(lf,"%s clock %ld node %d pid %d type %d lth %d",
		 reqnm[req],ticks,m->mh_me,m->mh_pid,m->mh_type,m->mh_rcvlth);
		}
	  case STAT:
	  case STATW:
		if ((mp=srchq(m,nd))==NULL) { 
			/*nothing in my q matching*/
			if (req == RECV || req == STAT) {
				/* no wait send back his request */
				if(rcving)fprintf(lf,"\n");
				msend(node[nd].nt_upid,m);
				break;
			}
			 /* else recvw and statw must wait . . . */
			node[nd].nt_state = WAIT;
			running--;
			node[nd].nt_type = m->mh_type;
			node[nd].nt_pid = m->mh_pid;
			if (rcving){
			 fprintf(lf," blocking %d\n",m->mh_me);
	  		 fprintf(lf,"cnt\t%d\tclock\t%ld\n",running,ticks);
			}
			break;
		}
		 /* else have a message, send it all */
		if (rcving)
		  fprintf(lf," from %d\n",mp->mh_me);
		msend(node[nd].nt_upid,mp);
		prev->mh_next = mp->mh_next;  /* de q */
		free(mp);
		break;

	  case PROBE:
	  case PROBEMSG:
		/* see if message in q return lth or -1 in rcvlth */
		if ((mp=srchq(m,nd))== NULL) m->mh_rcvlth = -1;  /*nope */
			else {  /* got one here, send back synopsis */
                        m->mh_rcvlth = mp->mh_lth;
                        m->mh_type = mp->mh_type;
                        m->mh_me = mp->mh_me;
                        m->mh_mepid = mp->mh_mepid;
		}
		msend(node[nd].nt_upid,m);
		break;
	  
	  case SEND:
		if (trace && logging)
		 fprintf(lf,"send clock %ld node %d fpid %d to %d pid %d  type %d lth %d",
		ticks,m->mh_me,m->mh_mepid,m->mh_node,
		 m->mh_pid,m->mh_type,m->mh_lth);
		/* see if this is a global send */
		waking = gsend =0;
		if (tfnd < 0){
			/* global send */
			gsend =1;
			d = tfnd + CubDim + 1; /* dim of send */
			if (d <= 0) {  /* no send at all !! */
				if (trace && logging)fprintf(lf," gsend NIL\n");
				break;
			}
			n = 1<<d;  /* number of nodes to send */
			if (m->mh_me == HOST) tfnd =0;
			  else tfnd = (m->mh_me >> d) << d;
			if (trace && logging){
				fprintf(lf," gsend ");
				for (i=0,j=tfnd; i< n;i++,j++) if (j!=m->mh_me)
				 fprintf(lf,"%d,",j);
			}
		} else n = 1;    /*regular single node send */
		while(n--){  /* send to n nodes */
		  if (gsend && m->mh_me == tfnd) {
			tfnd++;
			continue; /*not to myself */
		  }
			/* see if node is waiting for this msg */
		  if (node[tfnd].nt_state==WAIT && 
		   node[tfnd].nt_pid == m->mh_pid &&
		   (node[tfnd].nt_type==m->mh_type ||
		   node[tfnd].nt_type==ANY)){
		       /* yes, so send it to him */
		     msend(node[tfnd].nt_upid,m);
		     node[tfnd].nt_state=BUSY;
		     running++;
		     if (waking==0 && trace && logging) fprintf(lf," waking ");
		     waking++;
		     if (trace && logging)
		      fprintf(lf,"%d ",m->mh_node < 0 ? tfnd : m->mh_node);
		 } else qend(m,tfnd); /* else no one waiting  q it */
		 tfnd++;
		} /* while n nodes */
		if (waking && trace && logging)
	  	    fprintf(lf,"\ncnt\t%d\tclock\t%ld",running,ticks);
		if (trace && logging) fprintf(lf,"\n");
		break;

	  case SYSLOG:
		if (logging)
		 fprintf(lf,"syslog clock %ld node %d id %d msg %s\n",
		  ticks,m->mh_me,m->mh_mepid,m->mh_data);
		break;
	  default:
		printf("mpsim unknown req %d\n",m->mh_req);
		break;
	}
	}
}

mget(mp)
struct MsgHdr *mp;
{
	/* get a request from children */
	int upid, n;

	n=rdpipe(REQ,&upid,sizeof(int));  /* fetch a req upid */
	 if(n==0) return(0);  /* no pipe means no children */
	n=kill(upid,SIGTERM);  /* let child write on MSG */
	if (n== -1){
		fprintf(stderr," pipe scrambled %d \n",upid);
		exit(1);
	}
	rdpipe(MSGIN,mp,HDRLTH);
	 if (mp->mh_magic != MAGIC){
		fprintf(stderr,"mpsim bad MAGIC\n");
		finish();
	}
	rdpipe(MSGIN,mp->mh_data,mp->mh_lth);
	return(1);
}

msend(upid,mp)
int upid;
struct MsgHdr *mp;
{
	/* send message  down the pipe -- only one guy should be reading */
	int lth;

	lth=HDRLTH + mp->mh_lth;
	 if(debug>99)
	   fprintf(stderr,"msend upid %d lth %d\n",upid,lth);
	kill(upid,SIGTERM);  /* release his pause */
	write(MSGOUT,mp,lth);		/* down the pipe */
	rdpipe(ACK,&lth,sizeof(int));  /* wait til he gets it */
}

qend(mp,nd)
struct MsgHdr *mp;
int nd;  /* node index */
{
	/* allocate an entry and add to end of q */
	struct MsgHdr *q, *p, *prev;
	int lth;

	if (debug>100){fprintf(stderr,"qend node %d\n",nd);fflush(stderr);}
	lth = HDRLTH + mp->mh_lth;
	p = malloc(lth);
	  if(p==NULL){
		fprintf(stderr,"mpsim malloc failed %d\n",lth);
		finish();
	}
	bcopy(mp,p,lth);
	p->mh_next= NULL;
	prev = (struct MsgHdr *) &node[nd];
	q = node[nd].nt_recvq;
	while(q!=NULL){  /* find end */
		prev=q;
		q= q->mh_next;
	}
	prev->mh_next = p;
}

rdpipe(pipe,buf,lth)
int pipe;
char *buf;
int lth;
{
	/* read lth bytes into buf from the pipe pipe
	 *  since pipes may fill sporadically we persist
	 */
	int n;

	 if(debug>99)
	  fprintf(stderr,"mpsim rdpipe %d lth\n",pipe,lth);
	while(lth){
		n=read(pipe,buf,lth);
		 if(n==0) return(0);  /* other end gone */
		buf += n;   /* advance -- use arg since pass by val */
		lth -= n;
	}
	return(1);  /* ok */
}

#ifndef BCOPY
bcopy(src,dst,n)
char *dst, *src;
int n;
{
	while(n--) *dst++ = *src++;
}
#endif

struct MsgHdr *
srchq(mp,nd)
struct MsgHdr *mp;
int nd;		/* node of dest */
{
	/* see if a message is on  nd's q matching his recv request */
	/* NOTE  SIDE EFFECT    prev (global) is set for later use */
	struct MsgHdr *q;

	if (debug>100){fprintf(stderr,"srchq node %d\n",nd);fflush(stderr);}
	prev= (struct MsgHdr *) &node[nd];
	q = node[nd].nt_recvq;
	while(q!=NULL){
		if ((mp->mh_pid == ANY || mp->mh_pid == q->mh_pid) &&
		    (mp->mh_type == ANY || mp->mh_type == q->mh_type))
		    return(q);  /* found it */
		prev = q;
		q = q->mh_next;
	}
	return(NULL);  /* not there */
}
SHAR_EOF
if test -f 'ftnsubs.c'
then
	echo shar: over-writing existing file "'ftnsubs.c'"
fi
cat << \SHAR_EOF > 'ftnsubs.c'
/* ftnsubs.c
 *	f77 interfaces for oak ridge stuff -- snd rcv hep
 *	make fortran names and change pass by reference to pass by value
 */

int
copen_(pidptr)
int *pidptr;
{
	return(copen(*pidptr));
}

void
cclose_(dptr)
int *dptr;
{
	cclose(*dptr);
}

void
sendw_(dptr,typptr,buff,lthptr,nodeptr,pidptr)
int *dptr;
char *buff;
int *typptr, *lthptr, *nodeptr, *pidptr;
{
	sendw(*dptr,*typptr,buff,*lthptr,*nodeptr,*pidptr);
}

void
sendmsg_(dptr,typptr,buff,lthptr,nodeptr,pidptr)
int *dptr;
char *buff;
int *typptr, *lthptr, *nodeptr, *pidptr;
{
	sendmsg(*dptr,*typptr,buff,*lthptr,*nodeptr,*pidptr);
}

void
recv_(dptr,typptr,buff,maxptr, lthptr,nodeptr,pidptr)
int *dptr;
char *buff;
int *maxptr;
int *typptr, *lthptr, *nodeptr, *pidptr;
{
	recv(*dptr,*typptr,buff,*maxptr,lthptr,nodeptr,pidptr);
}
void
send_(dptr,typptr,buff,lthptr,nodeptr,pidptr)
int *dptr;
char *buff;
int *typptr, *lthptr, *nodeptr, *pidptr;
{
	send(*dptr,*typptr,buff,*lthptr,*nodeptr,*pidptr);
}

void
recvw_(dptr,typptr,buff,maxptr, lthptr,nodeptr,pidptr)
int *dptr;
char *buff;
int *maxptr;
int *typptr, *lthptr, *nodeptr, *pidptr;
{
	recvw(*dptr,*typptr,buff,*maxptr,lthptr,nodeptr,pidptr);
}

void
recvmsg_(dptr,typptr,buff,maxptr, lthptr,nodeptr,pidptr)
int *dptr;
char *buff;
int *maxptr;
int *typptr, *lthptr, *nodeptr, *pidptr;
{
	recvmsg(*dptr,typptr,buff,*maxptr,lthptr,nodeptr,pidptr);
}

int
probe_(dptr,typptr)
int *dptr;
int *typptr;
{
	return(probe(*dptr,*typptr));
}

int
probemsg_(pidptr)
int *pidptr;
{
	return(probemsg(*pidptr));
}

int 
status_(dptr)
int *dptr;
{
	return(status(*dptr));
}

int
mynode_()
{
	return(mynode());
}

int
mypid_()
{
	return(mypid());
}

int
cubedim_()
{
	return(cubedim());
}

int
clock_()
{
	return(clock());
}

void
flick_()
{
	flick();
}

void
syslog_(pid,msg)
int *pid;
char *msg;
{
	syslog(*pid,msg);
}

/* new routines for INTEL  */

void
csend_(typptr,buff,lthptr,nodeptr,pidptr)
char *buff;
int *typptr, *lthptr, *nodeptr, *pidptr;
{
	csend(*typptr,buff,*lthptr,*nodeptr,*pidptr);
}

void
crecv_(typptr,buff,lthptr)
char *buff;
int *typptr, *lthptr;
{
	crecv(*typptr,buff,*lthptr);
}

int
irecv_(typptr,buff,lthptr)
char *buff;
int *typptr, *lthptr;
{
	return(irecv(*typptr,buff,*lthptr));
}

int
isend_(typptr,buff,lthptr,nodeptr,pidptr)
char *buff;
int *typptr, *lthptr, *nodeptr, *pidptr;
{
	return(isend(*typptr,buff,*lthptr,*nodeptr,*pidptr));
}

int
myhost_()
{
	return(myhost());
}

int
nodedim_()
{
	return(nodedim());
}

int
numnodes_()
{
	return(numnodes());
}

int
mclock_()
{
	return(mclock());
}

int
infonode_()
{
	return(infonode());
}

int
infopid_()
{
	return(infopid());
}

int
infotype_()
{
	return(infotype());
}

int
infocount_()
{
	return(infocount());
}

void
setpid_(pidptr)
int *pidptr;
{
	setpid(*pidptr);
}

int
iprobe_(typeptr)
int *typeptr;
{
	return(iprobe(*typeptr));
}

int
msgwait_(idptr)
int *idptr;
{
	return(msgwait(*idptr));
}

int
msgdone_(idptr)
int *idptr;
{
	return(msgdone(*idptr));
}
SHAR_EOF
if test -f 'mpsim.c'
then
	echo shar: over-writing existing file "'mpsim.c'"
fi
cat << \SHAR_EOF > 'mpsim.c'
/* mpsim.c
 *   intel iPSC hypercube simulator using UNIX forks and pipes
 *    XENIX, 4.2bsd, and sequent
 *    Sys V, Release 2.0.
 *	T. Dunigan   ORNL 12/21/85  8/2/88
 *		uses 3 pipes  (REQ, MSGIN/OUT, ACK)
 *		synchronized with signal/pause
 *		Assumes 2-byte write on a pipe (REQ) is atomic
 */

#include <stdio.h>
#ifdef SYSV
#include <fcntl.h>
#endif
#include <signal.h>
#include "sim.h"
#include "simlib.h"

char *reqnm[] ={ "","dim","send","recv","recvw","probe","stat",
		 "statw","syslog","msgwait","cprobe"
		};

int finish(), child();

main(argc,argv)
int argc;
char *argv[];
{
	int n;

	signal(SIGINT,finish);
	debug=0;
	if (argc>1)debug= atoi(argv[1]);
	init();
	userin();
	start();
	drive();
	finish();
}

init()
{
	/* set up things */
	int i;
	int fds[2];

	  /* set up pipes for us and children */
	pipe(fds);
#ifdef SYSV
	close(MSGIN);
	close(MSGOUT);
	fcntl(fds[0], F_DUPFD, MSGIN);
	fcntl(fds[1], F_DUPFD, MSGOUT);
#else
	dup2(fds[0],MSGIN);	/* rd wrt by both host and child */
	dup2(fds[1],MSGOUT);
#endif
	close(fds[0]);  close(fds[1]);
	pipe(fds);
#ifdef SYSV
	close(ACK);
	close(ACKOUT);
	fcntl(fds[0], F_DUPFD, ACK);
	fcntl(fds[1], F_DUPFD, ACKOUT);
#else
	dup2(fds[0],ACK);  /* rd by sim, write by child */
	dup2(fds[1],ACKOUT);  /* will be close later */
#endif
	close(fds[0]);  close(fds[1]);
	pipe(fds);
#ifdef SYSV
	close(REQ);
	close(REQOUT);
	fcntl(fds[0], F_DUPFD, REQ);
	fcntl(fds[1], F_DUPFD, REQOUT);
#else
	dup2(fds[0],REQ);	/* write by all children rd by host */
	dup2(fds[1],REQOUT);	/*  will be closed later */
#endif
	close(fds[0]);  close(fds[1]);
	strcpy(logfile,LOGNM);
	trace = 0;
	for(i=0;i <= MAXNODES;i++){
		*node[i].nt_filnam = 0;
		node[i].nt_state = UNUSED;
		node[i].nt_recvq = NULL;
	}
}

start()
{
	/* step thru fork tbl and start subprocesses */
	int i;

	lf = NULL;
	running=0;
	for(i = MAXNODES;i >= 0;i--){
		if (node[i].nt_filnam[0] != 0){
		 running++;
		 node[i].nt_upid=spawn(node[i].nt_filnam);
		 node[i].nt_state = BUSY;
		}
	}
	  /* children running so . . . */
	/* get rid of ends of pipes we don't use */
	close(ACKOUT);   close(REQOUT);
	/* open logfile */
	if (logging){
		if (strcmp(logfile,"stdout")==0) lf = stdout;
		else lf = fopen(logfile,"w");
	}
	if (lf == NULL) logging=0;  /* open failed? */
}

spawn(filename)
char *filename;
{
	/* start a child process */
	int pid;

	if ((pid=fork())==0){
		/* child */
		  /* set up child pipes to use write ends of ACK and REQ */
#ifdef SYSV
		close(ACK);
		fcntl(ACKOUT, F_DUPFD, ACK);
#else
		dup2(ACKOUT,ACK);
#endif
		close(ACKOUT);
#ifdef SYSV
		close(REQ);
		fcntl(REQOUT, F_DUPFD, REQ);
#else
		dup2(REQOUT,REQ);
#endif
		close(REQOUT);
		execl(filename,filename,0);
		printf(" mpsim unable to start %s\n",filename);
		exit(1);
	}
	 if (debug>50)
	 fprintf(stderr,"spawn %s upid %d\n",filename,pid);
	if (pid < 0){
		fprintf(stderr,"mpsim too many forks\n");
		finish();
	}
	return(pid);
}

log()
{
}

finish()
{
	/* catch interrupts and clean up */
	int i;

	printf(" mpsim exiting\n");
	if (lf == NULL) logging=0;  /*catch early exit */
	if (trace & logging)
	 fprintf(lf,"cnt\t%d\tclock\t%ld\n",0,clock()-sttime);
	if (logging)fclose(lf);
	for(i=0;i <= MAXNODES;i++)
		if (node[i].nt_state != UNUSED) kill(node[i].nt_upid,SIGKILL);
	if (debug==1)dump();
	exit(0);
}

dump()
{
	/* dump sim tables */
	int i,j;
	struct MsgHdr *mp;

	printf(" %d running\n",running);
	for (i=0;i <= MAXNODES;i++){
		if (node[i].nt_filnam[0]==0)continue;
		printf("\nnode %d  upid %d state %d file %s",i,
		 node[i].nt_upid,node[i].nt_state,node[i].nt_filnam);
		printf(" type %d pid %d\n",node[i].nt_type,node[i].nt_pid);
		printf(" recvq %lx  bldptr %lx\n",node[i].nt_recvq,
		 node[i].nt_bldptr);
		mp = node[i].nt_recvq;
		while(mp!=NULL){
		  printf("\tmagic %d req %d upid %d me %d mepid %d\n",
		   mp->mh_magic,mp->mh_req,mp->mh_upid,mp->mh_me,
		   mp->mh_mepid);
		  printf("\t  node %d pid %d type %d\n",
		   mp->mh_node,mp->mh_pid,mp->mh_type);
		  printf("\t  nextq %lx lth %d ",mp->mh_next,mp->mh_lth);
		  for(j=0;j<10;j++)printf(" %x",mp->mh_data[j]);
		  printf("\n\n");
		  mp = mp->mh_next;
		}
		printf("\n");
	}
}

#include <sys/types.h>
#ifdef SYSV
#include <sys/times.h>
#include <sys/param.h>
#else
#include <sys/timeb.h>
#endif

long
clock()
{
	/* return milliseconds of wall clock time */
#ifdef SYSV
	struct tms t;
#else
	struct timeb t;
#endif
	long v;

#ifdef SYSV
	long times();
	v = 1000 * times(&t)/ HZ;
#else
	ftime(&t);
	v = 1000*t.time + t.millitm;
#endif
	return(v);
}
SHAR_EOF
if test -f 'simlib.c'
then
	echo shar: over-writing existing file "'simlib.c'"
fi
cat << \SHAR_EOF > 'simlib.c'
/* simlib.c
 *   simulator library routines
 *	T. Dunigan  ORNL  1/18/86  8/2/88
 *   keep names private (static)
 *	uses 4 file descriptors and a signal
 */

#include <stdio.h>
#include <signal.h>
#include "simlib.h"

#ifdef SYSV
#define PAUSE
#endif

#ifdef XENIX
#define PAUSE
#endif

#define STARTED if (CubDim == -1) init();
#define CHKCI chkci(ci);
#define CHKTYPE ;
#define CHKPID ;
#define CHKLTH chklth(lth);
#define CHKNODE chknode(node);

static struct MsgHdr *mp;
struct MsgHdr *malloc();

#define MAXCI 8
static struct CiTable {
	int ct_pid;		/* user pid */
	int ct_stat;		/* ci stat: busy free unused */
	int ct_req;		/* current request */
	int ct_maxlth;
	int ct_type;
	int ct_stype, ct_node, ct_spid, ct_lth;
	int *ct_lthp;		/* return value ponters */
	char *ct_bufp;
	int *ct_nodep;
	int *ct_pidp;
} ct[MAXCI];
static int *ip;
static catch(),init(),getreq(),sndreq(),rdpipe(),busyci();
static chklth(),chknode(),chkci(),bcopy();
static int CubDim = -1;
static int upid, maxnode, proceed, me, mepid;
static int info_count, info_node, info_pid, info_type; /*last recv info */

copen(pid)
int pid;
{
	int i;

	STARTED
	CHKPID
	for(i=0;i<MAXCI;i++)
	  if (ct[i].ct_stat == UNUSED){  /* got one */
		ct[i].ct_stat = FREE;
		ct[i].ct_pid = pid;
		return(i);
	  }
	fprintf(stderr,"node %d no more channels\n",me);
	exit(1);
}

cclose(ci)
int ci;
{
	 STARTED
	 CHKCI
	 ct[ci].ct_stat = UNUSED;
}

sendw(ci,type,buf,lth,node,pid)
int ci,type,lth,node,pid;
char *buf;
{
	 /*  send a message to another node */

	 STARTED
	 CHKCI
	 busyci(ci);
	 CHKTYPE
	 CHKPID
	 CHKLTH
	if (node != HOST && node >= maxnode){
		fprintf(stderr,"node %d bad dest node %d\n",me,node);
		exit(1);
	}
	 mp->mh_mepid = ct[ci].ct_pid;
	 mp->mh_node = node; /* dest node */
	 mp->mh_pid = pid;  /* dest pid */
	 mp->mh_lth =lth;
	 mp->mh_type = type;
	bcopy(buf,mp->mh_data,lth);
	sndreq(SEND);
}

send(ci,type,buf,lth,node,pid)
int ci,type,lth,node,pid;
char *buf;
{
	sendw(ci,type,buf,lth,node,pid);
}

sendmsg(ci,type,buf,lth,node,pid)
int ci,type,lth,node,pid;
char *buf;
{
	sendw(ci,type,buf,lth,node,pid);
}

recvw(ci,type,buf,maxlth,lth,node,pid)
int ci,type,maxlth,*lth,*node,*pid;
char *buf;
{
	/* blocking recv */
	int l;

	STARTED
	CHKCI
	busyci(ci);
	CHKTYPE
	mp->mh_pid = ct[ci].ct_pid;  /* has to come to my pid */
	mp->mh_type = type;
	mp->mh_lth = 0;
	mp->mh_rcvlth = maxlth;  /*for trace */
	getreq(RECVW);		/* request message and wait */
	l = mp->mh_lth;   /* message length */
	bcopy(mp->mh_data,buf, l<maxlth?l:maxlth);
	*lth = l;
	*node = mp->mh_me;
	*pid = mp->mh_mepid;
}

recv(ci,type,buf,maxlth,lth,node,pid)
int ci,type,maxlth,*lth,*node,*pid;
char *buf;
{
	/* non-blocking recv -- see if a mesg is there otherwise
	 *     save args so  stat can ask again
	 */
	int l;

	STARTED
	CHKCI
	busyci(ci);
	CHKTYPE
	mp->mh_pid = ct[ci].ct_pid;  /* has to come to my pid */
	mp->mh_type = type;
	mp->mh_lth = 0;
	mp->mh_rcvlth = maxlth;  /*for trace */
	getreq(RECV);		/* request message and wait */
	  /* either got msg req==SEND or our own msg back*/
	if (mp->mh_req != SEND){
		/* no msg yet  -- save args for later use by stat */
		ct[ci].ct_type = type;
		ct[ci].ct_maxlth = maxlth;
		ct[ci].ct_lthp = lth;   /* save pointers */
		ct[ci].ct_bufp = buf;
		ct[ci].ct_nodep = node;
		ct[ci].ct_pidp =pid;
		ct[ci].ct_stat = BUSY;   /* show ci in use */
		return;
	}
	  /* else have a msg -- copy to caller */
	l = mp->mh_lth;   /* message length */
	bcopy(mp->mh_data,buf, l<maxlth?l:maxlth);
	*lth = l;
	*node = mp->mh_me;
	*pid = mp->mh_mepid;
	ct[ci].ct_stype = mp->mh_type;
	ct[ci].ct_stat = FREE;
}

recvmsg(ci,type,buf,maxlth,lth,node,pid)
int ci,*type,maxlth,*lth,*node,*pid;
char *buf;
{
	/* blocking recv */
	int l;

	STARTED
	CHKCI
	busyci(ci);
	mp->mh_pid = ct[ci].ct_pid;  /* has to come to my pid */
	mp->mh_type = ANY;
	mp->mh_rcvlth = maxlth;  /* for trace info */
	mp->mh_lth = 0;
	getreq(RECVW);		/* request message and wait */
	l = mp->mh_lth;   /* message length */
	bcopy(mp->mh_data,buf, l<maxlth?l:maxlth);
	*lth = l;
	*node = mp->mh_me;
	*pid = mp->mh_mepid;
	*type = mp->mh_type;
}

probe(ci,type)
int ci;
int type;
{
	/* see if a message is waiting, return lth if yes, else -1 */
	STARTED
	CHKCI
	CHKTYPE
	mp->mh_pid = ct[ci].ct_pid;  /* has to come to my pid */
	mp->mh_type = type;
	mp->mh_lth = 0;
	mp->mh_rcvlth = 0;
	getreq(PROBE);		/* request message and wait */
	return(mp->mh_rcvlth);
}

probemsg(pid)
int pid;
{
	/* see if a message is waiting, return  1 if yes, else 0 */
	STARTED
	mp->mh_pid = pid;
	mp->mh_type = ANY;
	mp->mh_lth = 0;
	mp->mh_rcvlth = 0;
	getreq(PROBEMSG);		/* request message and wait */
	return(mp->mh_rcvlth== -1 ? 0 : 1);
}


status(ci)
int ci;
{
	int l;

	STARTED
	CHKCI
	if (ct[ci].ct_stat == FREE) return(FREE);
	  /* otherwise recv outstanding, ask sim about status*/
	  /*get poop from ci tbl */
	mp->mh_pid = ct[ci].ct_pid;  /* has to come to my pid */
	mp->mh_type = ct[ci].ct_type;
	mp->mh_lth = 0;
	mp->mh_rcvlth = ct[ci].ct_maxlth;
	getreq(STAT);  /*will get back no or the message */
	if (mp->mh_req != SEND) return(BUSY);   /* still nothing */
	  /* else have a msg -- copy to caller */
	l = mp->mh_lth;   /* message length */
	bcopy(mp->mh_data,ct[ci].ct_bufp,l<ct[ci].ct_maxlth?l:ct[ci].ct_maxlth);
	*ct[ci].ct_lthp = l;
	*ct[ci].ct_nodep = mp->mh_me;
	*ct[ci].ct_pidp = mp->mh_mepid;
	ct[ci].ct_stat = FREE;
	return(FREE);
}

syslog(n,s)
int n;
char *s;
{
	/* log string */
	STARTED
	mp->mh_mepid = n;
	mp->mh_lth = strlen(s) + 1;
	strcpy(mp->mh_data,s);
	sndreq(SYSLOG);
}

#include <sys/types.h>
#ifdef SYSV
#include <sys/times.h>
#include <sys/param.h>
#else
#include <sys/timeb.h>
#endif

long
clock()
{
	/* return milliseconds of wall clock time */
#ifdef SYSV
	struct tms t;
#else
	struct timeb t;
#endif
	long v;

#ifdef SYSV
	long times();
	v = 1000 * times(&t)/ HZ;
#else
	ftime(&t);
	v = 1000*t.time + t.millitm;
#endif
	return(v);
}

flick(){}

cubedim()
{
	STARTED
	return(CubDim);
}

mynode()
{
	STARTED
	return(me);
}

mypid()
{
	return(0);  /* for now */
}

static
init()
{
	/* set up first time */
	int i;

	signal(SIGTERM,catch);
	upid=getpid();
	proceed=0;
	mp = malloc(sizeof(struct MsgHdr)+MAXLTH);
	 if (mp==0){
		fprintf(stderr," malloc in sim upid %d\n",upid);
		exit(1);
	}
	ip = (int *)mp->mh_data;  /* int overlay */
	mp->mh_magic = MAGIC;
	mp->mh_upid=upid;
	mp->mh_lth = 0;
	getreq(DIM);  /* fetch dim  and mynode*/
	CubDim = *ip++;
	mepid = *ip;
	me = mp->mh_me;
	maxnode = 1<<CubDim;
	for(i=0;i<MAXCI;i++) ct[i].ct_stat = UNUSED;
}

static 
getreq(req)
int req;
{
	/* send req and await reply */
	struct MsgHdr *p;
	int n,lth;

	proceed=0;		/* sim will release pause with signal */
	sndreq(req);		/* ask sim for info  */
#ifdef PAUSE
	if(!proceed) pause();  /* let sim tell us when to go */
#else
	if(!proceed) sigpause(0);  /* let sim tell us when to go */
#endif
	proceed = 0;
	rdpipe(MSGIN,mp,HDRLTH);  /*get header */
	 if (mp->mh_magic != MAGIC){
		fprintf(stderr,"node %d upid %d bad MAGIC\n",me,upid);
		exit(1);
	 }
	rdpipe(MSGIN,mp->mh_data,mp->mh_lth);
	write(ACK,&n,sizeof(int));  /* tell sim we got poop */
}

static
sndreq(req)
int req;
{
	/* send a request to sim */

	proceed=0;
	write(REQ,&upid,sizeof(int));	/* make a request application */
#ifdef PAUSE
	if(!proceed) pause();  /* let sim tell us when to go */
#else
	if(!proceed) sigpause(0);  /* let sim tell us when to go */
#endif
	proceed=0;
	mp->mh_req = req;
	mp->mh_me = me;
	write(MSGOUT,mp,HDRLTH + mp->mh_lth); /* up the pipe */
}

static
rdpipe(pipe,buf,lth)
int pipe;
char *buf;
int lth;
{
	/* read lth bytes into buf from the pipe pipe
	 *  since pipes may fill sporadically we persist
	 */
	int n;

	while(lth){
		n=read(pipe,buf,lth);
		 if(n==0) return(0);  /* other end gone */
		buf += n;   /* advance -- use arg since pass by val */
		lth -= n;
	}
	return(1);  /* ok */
}

static
busyci(ci)
int ci;
{
	int l;
	/* if ci is busy with a recv then we must wait */

	if (ct[ci].ct_stat == FREE) return;
	/* else make us a STATW request */
	mp->mh_pid = ct[ci].ct_pid;  /* has to come to my pid */
	mp->mh_type = ct[ci].ct_type;
	mp->mh_lth = 0;
	mp->mh_rcvlth = ct[ci].ct_maxlth;
	getreq(STATW);  /*will wait for msg now ! */
	  /* have a msg -- copy to caller */
	l = mp->mh_lth;   /* message length */
	bcopy(mp->mh_data,ct[ci].ct_bufp,l<ct[ci].ct_maxlth?l:ct[ci].ct_maxlth);
	*ct[ci].ct_lthp = l;
	*ct[ci].ct_nodep = mp->mh_me;
	*ct[ci].ct_pidp = mp->mh_mepid;
	ct[ci].ct_stat = FREE;
}

static
chklth(lth)
int lth;
{
	if (lth >= 0 && lth <= MAXLTH ) return;
	fprintf(stderr,"node %d bad length %d\n",me,lth);
	exit(1);
}

static
chknode(node)
int node;
{
	if (node == HOST ||(node >= 0 && node < maxnode))return;
	fprintf(stderr,"node %d bad dest node %d\n",me,node);
	exit(1);
}

static
chkci(ci)
int ci;
{
	if (ci >= 0 && ci < MAXCI && ct[ci].ct_stat != UNUSED)return;
	fprintf(stderr,"node %d bad channel %d\n",me,ci);
	exit(1);
}

static
catch()
{
	/* catch signal from sim */
	proceed = 1;
	signal(SIGTERM,catch);
}

#ifndef BCOPY
static
bcopy(src,dst,n)
char *dst, *src;
int n;
{
	while(n--) *dst++ = *src++;
}
#endif

/*  v2 stuff    8/2/88  */

infocount()
{
	STARTED
	return(info_count);
}

infonode()
{
	STARTED
	return(info_node);
}

infopid()
{
	STARTED
	return(info_pid);
}

infotype()
{
	STARTED
	return(info_type);
}

mclock()
{
	return(clock());
}

myhost()
{
	STARTED
	return(HOST);
}

nodedim()
{
	STARTED
	return(CubDim);
}

numnodes()
{
	STARTED
	return(maxnode);
}

setpid(pid)
int pid;
{
	STARTED
	mepid = pid;
}

crecv(type,buf,maxlth)
int type,maxlth;
char *buf;
{
	int l;

	STARTED
	mp->mh_pid = mepid;
	mp->mh_type = type;
	mp->mh_lth = 0;
	mp->mh_rcvlth = maxlth;  /*for trace */
	getreq(RECVW);		/* request message and wait */
	l = mp->mh_lth;   /* message length */
	bcopy(mp->mh_data,buf, l<maxlth?l:maxlth);
	info_count = l;
	info_node = mp->mh_me;
	info_pid = mp->mh_mepid;
	info_type = mp->mh_type;
	if (l > maxlth) fprintf(stderr,
	 "Warning recvd lth %d exceeds buffer %d on node %d\n",l,maxlth,me);
}

csend(type,buf,lth,node,pid)
int type,lth,node,pid;
char *buf;
{
	 STARTED
	 CHKTYPE
	 CHKPID
	 CHKLTH
	 CHKNODE
	 mp->mh_mepid = mepid;
	 mp->mh_node = node; /* dest node */
	 mp->mh_pid = pid;  /* dest pid */
	 mp->mh_lth =lth;
	 mp->mh_type = type;
	bcopy(buf,mp->mh_data,lth);
	sndreq(SEND);
}

#define ISNDID -1234
int
isend(type,buf,lth,node,pid)
int type,lth,node,pid;
char *buf;
{
	csend(type,buf,lth,node,pid);
	return( ISNDID );  /*dummy id -- e.g. buffer ready for resuse */
}

int
irecv(type,buf,maxlth)
int type,maxlth;
char *buf;
{
	int ci;

	ci = copen(mepid);		/* id for msgwait msgdone */
	recv(ci,type,buf,maxlth,&ct[ci].ct_lth,&ct[ci].ct_node,&ct[ci].ct_spid);
	return(ci);
}

cprobe(type)
int type;
{
	STARTED
	/* worry about msg sitting in  ct table from a irecv ? */
	/*  race condition???? */
  fprintf(stderr,"cprobe not implemented yet\n"); exit(1);
	mp->mh_pid = mepid;
	mp->mh_type = type;
	mp->mh_lth = 0;
	getreq(CPROBE);
	info_count = mp->mh_lth;   /* message length */
	info_node = mp->mh_me;
	info_pid = mp->mh_mepid;
	info_type = mp->mh_type;
	  /* note, mesg is still in our q, recv must be issued to get it */
}

int
iprobe(type)
int type;
{
	STARTED
	/* worry about msg sitting in  ct table from a irecv ? */
	mp->mh_pid = mepid;
	mp->mh_type = type;
	mp->mh_lth = 0;
	getreq(PROBE);
	if (mp->mh_rcvlth == -1) return(FALSE);  /* none there yet */
	/* else a msg is there */
	info_count = mp->mh_lth;   /* message length */
	info_node = mp->mh_me;
	info_pid = mp->mh_mepid;
	info_type = mp->mh_type;
	return(TRUE);
}

msgwait(ci)
int ci;
{
	int l,maxlth;

	if (ci == ISNDID) return(1); /*isend done */
	STARTED
	CHKCI
	if (ct[ci].ct_stat == FREE){
		/* recv previously completed */
		info_count = ct[ci].ct_lth;
		info_type = ct[ci].ct_stype;
		info_pid = ct[ci].ct_spid;
		info_node = ct[ci].ct_node;
	}  else {  /* request and wait */
		mp->mh_pid = mepid;
		mp->mh_type = ct[ci].ct_type;
		mp->mh_lth = 0;
		mp->mh_rcvlth = maxlth = ct[ci].ct_maxlth;  /*for trace */
		getreq(MSGWAIT);
		l = mp->mh_lth;   /* message length */
		bcopy(mp->mh_data,ct[ci].ct_bufp, l<maxlth?l:maxlth);
		info_count = l;
		info_node = mp->mh_me;
		info_pid = mp->mh_mepid;
		info_type = mp->mh_type;
	}
	if (info_count > ct[ci].ct_maxlth) fprintf(stderr,
	 "Warning recvd lth %d exceeds buffer %d on node %d\n",info_count,
	  ct[ci].ct_maxlth,me);
	cclose(ci);    /* release id */
}

int
msgdone(ci)
int ci;
{
	if (ci == ISNDID) return(1); /*isend done */
	STARTED
	CHKCI
	if (ct[ci].ct_stat == FREE){
		/* recv previously completed */
		info_count = ct[ci].ct_lth;
		info_type = ct[ci].ct_stype;
		info_pid = ct[ci].ct_spid;
		info_node = ct[ci].ct_node;
	}  else {  /* check with sim */
		mp->mh_pid = mepid;
		mp->mh_type = ct[ci].ct_type;
		mp->mh_lth = 0;
		mp->mh_rcvlth = ct[ci].ct_maxlth;  /*for trace */
		getreq(STAT);
		if (mp->mh_req != SEND) return(FALSE);  /* nothing yet --> */
		 /* else have the msg now */
		info_count = mp->mh_lth;   /* message length */
		bcopy(mp->mh_data,ct[ci].ct_bufp,info_count < ct[ci].ct_maxlth ?
		 info_count : ct[ci].ct_maxlth);
		info_node = mp->mh_me;
		info_pid = mp->mh_mepid;
		info_type = mp->mh_type;
	}
	if (info_count > ct[ci].ct_maxlth) fprintf(stderr,
	 "Warning recvd lth %d exceeds buffer %d on node %d\n",info_count,
	  ct[ci].ct_maxlth,me);
	cclose(ci);    /* release id */
	return(TRUE);
}
SHAR_EOF
if test -f 'userin.c'
then
	echo shar: over-writing existing file "'userin.c'"
fi
cat << \SHAR_EOF > 'userin.c'
/* userin.c
 *  get user input 
 *	T. Dunigan   ORNL    1/22/86 8/2/88
 */
#include <stdio.h>
#include <ctype.h>
#include "sim.h"

userin()
{
	int i,n,using,inpid;
	char line[200];
	char *p, *nxtword(), *getword();
	FILE *in;

	using=0;
	in = stdin;  /*default command input */
	trace=0;
	logging=0;
	printf("  mpsim v2.0 8/2/88\n");
	printf("mpsim> ");
	while(1){
                if (fgets(line,sizeof(line),in)==NULL){
                        /* end of file on input -- switch to stdin or quit */
                        if (using) {
                                fclose(in);
                                in = stdin;   /* back to tty in */
                                using=0;
                                printf("smpsim> ");
                                continue;  /*go read input again */
                        } else break;  /*quit */
                }
		p=getword(line);
		switch(*p){
		  case 'c':
			p=nxtword();
			if (*p == 0) {
				logging=0;
				printf(" logging disabled\n");
				break;
			}
			logging=1;
			strcpy(logfile,p);
			printf(" logging to %s\n",logfile);
			break;
		  case 'h':
		  case 'm':
			p=nxtword();
			if (filchk(p)) break;
			strcpy(node[MAXNODES].nt_filnam,p);
			break;
		  case 'l':
			n= -1;
			inpid = 0;
			p=nxtword();
			while(*p){
                          if (*p == '-')switch (*++p){
                           case 'c':
                                CubDim=atoi(nxtword());
                                if (CubDim<0 || CubDim>MAXDIM)CubDim=0;
                                printf(" dimension %d cube\n",CubDim);
                                break;
                           case 'n':
				n=atoi(nxtword());
				break;
                           case 'p':
                                inpid = atoi(nxtword());
                                break;
                           default:
                                printf("?mpsim unknown switch %s\n",*p);
                                break;
                         }  /* switch */
                         else {  /* filename */
                                if (filchk(p)) break;
                                if(n<0 || n>=(1<<CubDim))n = -1;
                                if (n== -1)  /* all nodes */
                                 for(i=0;i< 1<<CubDim;i++){
                                        strcpy(node[i].nt_filnam,p);
                                        node[i].nt_mynode = i;
                                        node[i].nt_mypid = inpid;
                                 }
                                 else {
                                        strcpy(node[n].nt_filnam,p);
                                        node[n].nt_mynode = n;
                                        node[n].nt_mypid = inpid;
                                 }
                                break;
                         }        
                         p = nxtword();
                        }  /* while still words */
			break;
		  case 's':
			return;  /* start em up */
		  case 't':
			p=nxtword();
			if (strcmp(p,"on")==0) trace=logging=1;
			 else trace=0;
			if (trace) printf(" trace and logging on to %s\n",
				logfile);
			  else printf(" trace off\n");
			break;
		  case 'q':
			exit(0);
		  case 'u':
                        p = nxtword();
                        if (using) {
                        printf("Can't nest command files\n");
                        break;
                        }
                        if ((in = fopen(p,"r"))==NULL){
                         printf("Can't open %s\n",p);
                         in =stdin;
                         break;
                        }
                        using =1;
                        break;
		  default:
			printf(" mpsim commands are:\n");
 printf("\tc [logfile]			cubelog to fileor disable\n");
 printf("\th file				load file to host\n");
 printf("\tl [-c dim][-n node][-p pid][file]    load file to nodes cubedim\n");
 printf("\ts				start\n");
 printf("\tt [on]				trace on or off\n");
 printf("\tu file                               use file for input\n");
 printf("\tq				quit\n");
			break;
		}
		if (!using) printf("mpsim> ");
	}
	exit(0);
}

filchk(s)
char *s;
{
	if(access(s,5)==0)return(0);
	printf("Unable to access %s\n",s);
	return(1);
}
static char *nxtwdp, word_str[100];  /*locals for getword and nxtword*/

char *getword(s)
char *s;
{
	char *p;

	while( *s && isspace(*s))s++;
	p=word_str;
	while(*s && !isspace(*s)) *p++ = *s++;
	*p=0;
	nxtwdp=s;
	return(word_str);
}

char *nxtword()
{
	char *p;

	while(*nxtwdp && isspace(*nxtwdp)) nxtwdp++;
	p=word_str;
	while(*nxtwdp && !isspace(*nxtwdp)) *p++ = *nxtwdp++;
	*p=0;
	return(word_str);
}

SHAR_EOF
if test -f 'f286toc.f'
then
	echo shar: over-writing existing file "'f286toc.f'"
fi
cat << \SHAR_EOF > 'f286toc.f'
$interface(C=_cubedim,_syslog,_copen,_cclose,_probe,_status,_mynode)
$interface(C=_clock,_flick,_send,_sendw,_sendmsg,_recv,_recvw,_recvmsg)

C  f286toc.f
C   fortran interface to ornl simulator -- Xenix on Intel

      integer function cubedim()
      integer _cubedim
      cubedim = _cubedim()
      end

      subroutine syslog(i,j)
      character*(*) j
      character*200 tmp
      equivalence (k,tmp)
      print *,len(j)
      tmp = j // char(0)
      call _syslog(%val(i),k)
      end

	integer function copen(pid)
	integer pid,_copen
	copen = _copen(%val(pid))
	end

	subroutine cclose(p)
	integer p
	call _cclose(%val(p))
	end

	subroutine sendw(a,b,c,d,e,f)
	integer a,b,c,d,e,f
	call _sendw(%val(a),%val(b),c,%val(d),%val(e),%val(f))
	end

	subroutine sendmsg(a,b,c,d,e,f)
	integer a,b,c,d,e,f
	call _sendmsg(%val(a),%val(b),c,%val(d),%val(e),%val(f))
	end

	subroutine send(a,b,c,d,e,f)
	integer a,b,c,d,e,f
	call _send(%val(a),%val(b),c,%val(d),%val(e),%val(f))
	end

	subroutine recvw(a,b,c,d,e,f,g)
	integer a,b,c,d,e,f,g
	call _recvw(%val(a),%val(b),c,%val(d),e,f,g)
	end

	subroutine recv(a,b,c,d,e,f,g)
	integer a,b,c,d,e,f,g
	call _recv(%val(a),%val(b),c,%val(d),e,f,g)
	end

	subroutine recvmsg(a,b,c,d,e,f,g)
	integer a,b,c,d,e,f,g
	call _recvmsg(%val(a),b,c,%val(d),e,f,g)
	end

	integer function probe(a,b)
	integer a,b,_probe
	probe = _probe(%val(a),%val(b))
	end

	integer function status(p)
	integer p,_status
	status = _status(%val(p))
	end

	integer function mynode()
	integer _mynode
	mynode = _mynode()
	end

	integer*4 function clock()
	integer*4 _clock
	clock = _clock()
	end

	subroutine flick()
	call _flick()
	end
SHAR_EOF
if test -f 'seqftn.f'
then
	echo shar: over-writing existing file "'seqftn.f'"
fi
cat << \SHAR_EOF > 'seqftn.f'
$SYSTEM
c
c  fortran interface for sequent fortran for mpsim
c   compile with -e
c   this interface rearranges arguments and calls c interface
c    routines which will fix call by value etc.
c
	integer function copen(pid)
	integer pid,_copen_
	copen = _copen_(pid)
	end

	subroutine cclose(p)
	integer p
	call _cclose_(p)
	end

	subroutine sendw(a,b,c,d,e,f)
	integer a,b,c,d,e,f
	call _sendw_(f,e,d,c,b,a)
	end

	subroutine sendmsg(a,b,c,d,e,f)
	integer a,b,c,d,e,f
	call _sendmsg_(f,e,d,c,b,a)
	end

	subroutine send(a,b,c,d,e,f)
	integer a,b,c,d,e,f
	call _send_(f,e,d,c,b,a)
	end

	subroutine recvw(a,b,c,d,e,f,g)
	integer a,b,c,d,e,f,g
	call _recvw_(g,f,e,d,c,b,a)
	end

	subroutine recv(a,b,c,d,e,f,g)
	integer a,b,c,d,e,f,g
	call _recv_(g,f,e,d,c,b,a)
	end

	subroutine recvmsg(a,b,c,d,e,f,g)
	integer a,b,c,d,e,f,g
	call _recvmsg_(g,f,e,d,c,b,a)
	end

	integer function cubedim()
	integer _cubedim_
	cubedim = _cubedim_()
	end

	integer function probe(a,b)
	integer a,b,_probe_
	probe = _probe_(b,a)
	end

	integer function probemsg(a)
	integer a,_probemsg_
	probemsg = _probemsg_(a)
	end

	integer function status(p)
	integer p,_status_
	status = _status_(p)
	end

	integer function mynode()
	integer _mynode_
	mynode = _mynode_()
	end

	integer function mypid()
	integer _mypid_
	mypid = _mypid_()
	end

	integer function clock()
	integer _clock_
	clock = _clock_()
	end

	subroutine flick()
	call _flick_()
	end

	subroutine syslog(a,b,c)
	integer a,c,i
	integer*1 b(*),tmp(200)
	do 10 i=1,iaddr(c)
10	tmp(i)= b(i)
	tmp(iaddr(c)+1)=0
	call _syslog_(tmp,a)
	end

C   intel version 2 stuff

	integer function myhost()
	integer _myhost_
	myhost = _myhost_()
	end

	integer function mclock()
	integer _mclock_
	mclock = _mclock_()
	end

	integer function infonode()
	integer _infonode_
	infonode = _infonode_()
	end

	integer function infopid()
	integer _infopid_
	infopid = _infopid_()
	end

	integer function infotype()
	integer _infotype_
	infotype = _infotype_()
	end

	integer function infocount()
	integer _infocount_
	infocount = _infocount_()
	end

	integer function numnodes()
	integer _numnodes_
	numnodes = _numnodes_()
	end

	integer function nodedim()
	integer _nodedim_
	nodedim = _nodedim_()
	end

	subroutine crecv(a,b,c)
	integer a,b,c
	call _crecv_(c,b,a)
	end

	subroutine csend(a,b,c,d,e)
	integer a,b,c,d,e
	call _csend_(e,d,c,b,a)
	end

	integer function isend(a,b,c,d,e)
	integer _isend_,a,b,c,d,e
	isend = _isend_(e,d,c,b,a)
	end

	integer function irecv(a,b,c)
	integer _irecv_,a,b,c
	irecv = _irecv_(c,b,a)
	end

	integer function msgwait(p)
	integer p,_msgwait_
	msgwait = _msgwait_(p)
	end

	integer function msgdone(p)
	integer p,_msgdone_
	msgdone = _msgdone_(p)
	end

	integer function iprobe(p)
	integer p,_iprobe_
	iprobe = _iprobe_(p)
	end

	subroutine setpid(p)
	integer p
	call _setpid_(p)
	end
SHAR_EOF
cd ..
if test -f 'cholhv2.c'
then
	echo shar: over-writing existing file "'cholhv2.c'"
fi
cat << \SHAR_EOF > 'cholhv2.c'

#include "chol.h"

/* Message types used (defined in globals.h):
   I : problem info
   M : map
   B : right hand side vector
*/

/* Order of matrix is n.  Columns of matrix and components of rhs and solution
   are numbered 0, 1, ... , n-1.  Nodes are numbered 0, 1, ..., np-1.
*/

main()
/* host process */
{
	CHNL ci ;
	REAL ti[5], mn[5], mx[5] ;
	int np, n, whichmap, whichord, whichcomm, maxblk, blksize,
		i, j, type, cnt, node, pid, info[4] ;
	int *map ;
	static char *ordering[] = { "natural", "gray" } ;
	static char *mapping[] = { "wrap", "reflect" } ;
	static char *commun[] = { "bcube", "bcast", "ring" } ;


	setpid(PID);
	while (1)
	{
	printf("Enter number of processors:") ;
	scanf("%d", &np) ;
	printf("Enter order of matrix:") ;
	scanf("%d", &n) ;
	if ( np == 0 || n == 0 ) break ;
	if ( n < np ) break ;
	maxblk = n/np + ( n%np ? 1 : 0 ) ;
	printf("Enter wrap(0) or reflect(1) mapping:") ;
	scanf("%d", &whichmap) ;
	printf("Enter natural(0) or gray(1) ordering:") ;
	scanf("%d", &whichord) ;
	printf("Enter blocksize (1-%d):", maxblk) ;
	scanf("%d", &blksize) ;
	printf("Enter bcube(0), bcast(1), or ring(2) communication:") ;
	scanf("%d", &whichcomm) ;
	printf("Number of processors = %d, order of matrix = %d\n", np, n) ;
	printf("%s mapping, %s ordering, blocksize = %d, %s communication\n",
		mapping[whichmap], ordering[whichord], blksize,
		commun[whichcomm]) ;

	info[0] = np ;
	info[1] = n ;
	info[2] = whichord ;
	info[3] = whichcomm ;
	csend (  I, info, 4*sizeof(int), 0, PID ) ;

	map = (int *)malloc(n*sizeof(int)) ;
	whichmap = (whichmap<<1) + whichord ;
	setmap ( whichmap, map, n, np, blksize ) ;
	csend (  M, map, n*sizeof(int), 0, PID ) ;

	for ( j = 0 ; j < 5 ; j++ )
	{
		mn[j] = 1.0e+20 ;
		mx[j] = -1.0e+20 ;
	}
	for ( i = 0 ; i < np ; i++ )
	{
		crecv(  -1, ti, 5*sizeof(float));
		node = infonode();
		printf("node %d, fac =%7.2f, fslv =%7.2f, bslv =%7.2f",
			node, (ti[1]-ti[0])/1000, (ti[2]-ti[1])/1000,
			(ti[3]-ti[2])/1000) ;
		printf(" maxerr = %e\n", ti[4] ) ;
		for ( j = 1 ; j < 5 ; j++ )
		{
			if (j==4)mx[j] = ti[j] > mx[j] ? ti[j] : mx[j] ;
			else mx[j-1] = (ti[j]-ti[j-1])> mx[j-1] ? 
			  (ti[j]-ti[j-1]) : mx[j-1];
		}
	}
	printf("totals , fac =%7.2f, fslv =%7.2f, bslv =%7.2f",
		mx[0]/1000, mx[1]/1000, mx[2]/1000) ;
	printf(" maxerr = %e\n", mx[4] ) ;

	free(map) ;
	}

}

setmap ( whichmap, map, n, np, blksize )
	int whichmap, *map, n, np, blksize;
{
	int i, gray();
	switch (whichmap)
	{ 
	case 0:                                                       /* wrap */
		for ( i = 0 ; i < n ; i++ ) map[i] = (i/blksize)%np;
		break;
	case 1:                                                  /* gray wrap */
		for ( i = 0 ; i < n ; i++ ) map[i] = gray((i/blksize)%np);
		break;
	case 2:                                                    /* reflect */
		for ( i = 0 ; i < n ; i++ ) map[i] = ((i/blksize)/np)%2
			? np-1 - (i/blksize)%np
			: (i/blksize)%np;
		break;
	case 3:                                               /* gray reflect */
		for ( i = 0 ; i < n ; i++ ) map[i] = ((i/blksize)/np)%2
			? gray(np-1 - (i/blksize)%np)
			: gray((i/blksize)%np);
		break;
	}
}

int gray (i)
    int i ;
{ 
   return( (i>>1)^i ) ;
}
SHAR_EOF
if test -f 'cholnv2.c'
then
	echo shar: over-writing existing file "'cholnv2.c'"
fi
cat << \SHAR_EOF > 'cholnv2.c'

#include "chol.h"

main()
/* node process */
{
	CHNL ci ;
	long mclock() ;
	REAL ti[5], maxerr ;
	int n, i, j, k, maxlth, ncols, np, me, cnt, node, pid, 
		whichord, whichcomm, info[4] ;
	int *map, *collth, *mycols ;
	REAL *b, **col, *colk, *p, *q, *y ;
	REAL x, t ;

	me = mynode() ;

	while (1)
	{

/* receive problem data */

	crecv (  I, info, 4*sizeof(int));
	np = info[0] ;
	n = info[1] ;
	whichord = info[2] ;
	whichcomm = info[3] ;
	if ( me == 0 ) for ( i = 1 ; i < np ; i++ )
		csend (  I, info, 4*sizeof(int), i, PID ) ;
	if ( np == 0 || n == 0 ) break ;
	maxlth = n*sizeof(REAL) ;
	map = (int *)malloc(n*sizeof(int)) ;
	crecv (  M, map, n*sizeof(int));
	if ( me == 0 ) for ( i = 1 ; i < np ; i++ )
		csend (  M, map, n*sizeof(int), i, PID ) ;
	ncols = 0 ;
	for ( i = 0 ; i < n; i++ ) if ( map[i] == me ) ncols++ ;

/* allocate storage */

	collth = (int *)malloc(ncols*sizeof(int)) ;
	mycols = (int *)malloc(ncols*sizeof(int)) ;
	b = (REAL *)malloc(n*sizeof(REAL)) ;
	col = (REAL * *)malloc(ncols*sizeof(REAL *)) ;
	colk = (REAL *)malloc(n*sizeof(REAL)) ;
	y = (REAL *)malloc(ncols*sizeof(REAL)) ;

/* set up data structures for problem to be solved */

	j = 0 ;
	for ( k = 0 ; k < n ; k++ )
		if ( map[k] == me )
		{  
			p = (REAL *)malloc((n-k)*sizeof(REAL)) ;
			col[j] = p ;
			mycols[j] = k ;
			collth[j] = n-k ;
			j++ ;
			*p++ = 2 ;
			if ( n-k > 1 ) *p++ = -1 ;
			for ( i = 2 ; i < n-k ; i++ ) *p++ = 0 ;
		}
		for ( i = 0 ; i < n ; i++ ) b[i] = 0 ;
		b[n-1] = n+1 ;

	maxerr = 0 ;
	ti[0] = mclock(0) ;

/* Cholesky factorization */

	j = 0 ;
	if ( map[0] == me )
	{
		t = 1.0/sqrt(*col[j]) ;
		for ( p = col[j] ; p < col[j] + collth[j] ; p++ )
			*p *= t ;
		j++ ;
		comm ( ci, 0, col[0], n*sizeof(REAL), map[0], np,
			FORWARD, whichord, whichcomm ) ;
	}
	for ( k = 0 ; k < n ; k++ )
	{
		crecv(  k, colk, maxlth);
		cnt=infocount();
		if ( whichcomm != 1 && me != map[k] )
			comm ( ci, k, colk, cnt, map[k], np,
				FORWARD, whichord, whichcomm ) ;
		for ( i = j ; i < ncols ; i++ )
		{
			for ( p = col[i] , q = colk + mycols[i]-k , t = *q ;
				p < col[i] + collth[i] ; p++ , q++ )
				*p -= *q * t ;
			if ( k == mycols[j]-1 )
			{
				t = 1.0/sqrt(*col[j]) ;
				for ( p = col[j] ; p < col[j] + collth[j] ;
					p++ ) *p *= t ;
				comm ( ci, mycols[j], col[j],
					collth[j]*sizeof(REAL), me, np,
					FORWARD, whichord, whichcomm ) ;
				j++ ;
			}
		}
	}

	ti[1] = mclock(0) ;

/* forward substitution */

	j = 0 ;
	for ( k = 0 ; k < n ; k++ )
	{
		if ( map[k] == me )
		{
			if ( k > 0 ) crecv (  B, b, maxlth);
			y[j] = b[k] / *col[j] ;
			for ( i = k+1 ; i < n ; i++ )
				b[i] -= y[j] * *(col[j]+i-k) ;
			if ( k < n-1 ) csend (  B, b, n*sizeof(REAL),
				map[k+1], PID ) ;
			j++ ;
		}
	}	

	ti[2] = mclock(0) ;

/* back substitution */

	j = ncols-1 ;
	if ( map[n-1] == me )
	{
		x = y[j] / *col[j] ;
		if ( maxerr < fabs(x-n) )
			maxerr = fabs(x-n) ;
		j-- ;
		comm ( ci, n-1, &x, sizeof(REAL), map[n-1], np,
			BACKWARD, whichord, whichcomm ) ;
	}
	for ( k = n-1 ; k >= 0 ; k-- )
	{
		crecv(  k, &x, sizeof(REAL));
		cnt=infocount();
		if ( whichcomm != 1 && me != map[k] )
			comm ( ci, k, &x, cnt, map[k], np,
				BACKWARD, whichord, whichcomm ) ;
		for ( i = j ; i >= 0 ; i-- )
		{
			y[i] -= x * *(col[i]+k-mycols[i]) ;
			if ( k == mycols[j]+1 )
			{
				x = y[j] / *col[j] ;
				if ( maxerr < fabs(x-k) )
					maxerr = fabs(x-k) ;
				comm ( ci, mycols[j], &x, sizeof(REAL), me, np,
					BACKWARD, whichord, whichcomm ) ;
				j-- ;
			}
		}
	}

	ti[3] = mclock(0) ;
	ti[4] = maxerr ;
	csend (  0, ti, 5*sizeof(float), HOST, PID ) ;

/* free storage */

	for ( j = 0 ; j < ncols ; j++ ) free(col[j]) ;
	free(y) ;
	free(colk) ;
	free(col) ;
	free(b) ;
	free(mycols) ;
	free(collth) ;
	free(map) ;
	}

}

comm ( ci, msgtype, vec, bytes, root, np, dir, ord, which )
	CHNL ci ;
	int msgtype, bytes, root, np, dir, ord, which ;
	char *vec ;
/*
 *  Communication driver
 */

{
	switch (which)
	{
	case 0:
		bcube( ci, msgtype, vec, bytes, root, np ) ;
		break ;
	case 1:
		bcast( ci, msgtype, vec, bytes, root, np, dir, ord ) ;
		break ;
	case 2:
		ring( ci, msgtype, vec, bytes, root, np, dir ) ;
		break ;
	}
}

bcube ( ci, msgtype, vec, bytes, root, np )
	CHNL ci ;
	int msgtype, bytes, root, np ;
	char *vec ;
/*
 *  Broadcast vector, vec, of length bytes
 *  to all processors using a minimum spanning
 *  tree with given root.
 */
{
	int i, me, cnt, node, pid ;

	me = mynode()^root ;
	for ( i = np/2 ; i > me ; i /= 2 )
		csend ( msgtype, vec, bytes, (me+i)^root, PID ) ;
	if ( me == 0 )
		csend (  msgtype, vec, bytes, root, PID ) ;
}

bcast ( ci, msgtype, vec, bytes, root, np, dir, ord )
	CHNL ci ;
	int msgtype, bytes, root, np, dir, ord ;
	char *vec ;
/*
 *  Broadcast vector, vec, of length bytes
 */

{
	int i, cnt, node, pid, gray(), invgray() ;

	switch (ord)
	{
	case NATURAL:
		if ( dir == FORWARD )
		{
			for ( i = 0 ; i < np ; i++ )
				csend ( msgtype, vec, bytes,
					(root+i)%np, PID ) ;
		}
		else
		{
			for ( i = 0 ; i < np ; i++ )
				csend ( msgtype, vec, bytes,
					(root+np-i)%np, PID ) ;
		}
		break ;
	case GRAY:
		if ( dir == FORWARD )
		{
			for ( i = 0 ; i < np ; i++ )
			{
				csend (  msgtype, vec, bytes,
					succ(root,i,np), PID ) ;
			}
		}
		else
		{
			for ( i = 0 ; i < np ; i++ )
				csend ( msgtype, vec, bytes,
					pred(root,i,np), PID ) ;
		}
		break ;
	}
}

ring ( ci, msgtype, vec, bytes, root, np, dir )
	CHNL ci ;
	int msgtype, bytes, root, np, dir ;
	char *vec ;
/*
 *  Send vector, vec, of length bytes to
 *  all processors, using an embedded ring 
 *  beginning at root.
 */
{
	int me, next, cnt, node, pid, pred(), succ() ;

	me = mynode() ;
	switch (dir)
	{
	case FORWARD:
		csend ( msgtype, vec, bytes, succ(me,1,np), PID ) ;
		break ;
	case BACKWARD:
		csend ( msgtype, vec, bytes, pred(me,1,np), PID ) ;
		break ;
	}
}

int pred ( node, i, np )
	int node, i, np ;
{
	int gray(), invgray() ;
	return( gray((invgray(node)+np-i)%np) ) ;
}

int succ ( node, i, np )
	int node, i, np ;
{
	int gray(), invgray() ;
	return( gray((invgray(node)+i)%np) ) ;
}

int gray (i)
    int i ;
{ 
   return( (i>>1)^i ) ;
}

int invgray (i)
    int i ;
{ 
   int k ;
   k = i ;
   while ( k > 0 ) 
	 {
	   k = k>>1 ;
	   i = i^k ;
	 }
   return (i) ;
}

SHAR_EOF
if test -f 'cholh.c'
then
	echo shar: over-writing existing file "'cholh.c'"
fi
cat << \SHAR_EOF > 'cholh.c'

#include "chol.h"

/* Message types used (defined in globals.h):
   I : problem info
   M : map
   B : right hand side vector
*/

/* Order of matrix is n.  Columns of matrix and components of rhs and solution
   are numbered 0, 1, ... , n-1.  Nodes are numbered 0, 1, ..., np-1.
*/

main()
/* host process */
{
	CHNL ci ;
	REAL ti[5], mn[5], mx[5] ;
	int np, n, whichmap, whichord, whichcomm, maxblk, blksize,
		i, j, type, cnt, node, pid, info[4] ;
	int *map ;
	static char *ordering[] = { "natural", "gray" } ;
	static char *mapping[] = { "wrap", "reflect" } ;
	static char *commun[] = { "bcube", "bcast", "ring" } ;

	ci = copen(PID) ;

	while (1)
	{
	printf("Enter number of processors:") ;
	scanf("%d", &np) ;
	printf("Enter order of matrix:") ;
	scanf("%d", &n) ;
	if ( np == 0 || n == 0 ) break ;
	if ( n < np ) break ;
	maxblk = n/np + ( n%np ? 1 : 0 ) ;
	printf("Enter wrap(0) or reflect(1) mapping:") ;
	scanf("%d", &whichmap) ;
	printf("Enter natural(0) or gray(1) ordering:") ;
	scanf("%d", &whichord) ;
	printf("Enter blocksize (1-%d):", maxblk) ;
	scanf("%d", &blksize) ;
	printf("Enter bcube(0), bcast(1), or ring(2) communication:") ;
	scanf("%d", &whichcomm) ;
	printf("Number of processors = %d, order of matrix = %d\n", np, n) ;
	printf("%s mapping, %s ordering, blocksize = %d, %s communication\n",
		mapping[whichmap], ordering[whichord], blksize,
		commun[whichcomm]) ;

	info[0] = np ;
	info[1] = n ;
	info[2] = whichord ;
	info[3] = whichcomm ;
	sendmsg ( ci, I, info, 4*sizeof(int), 0, PID ) ;

	map = (int *)malloc(n*sizeof(int)) ;
	whichmap = (whichmap<<1) + whichord ;
	setmap ( whichmap, map, n, np, blksize ) ;
	sendmsg ( ci, M, map, n*sizeof(int), 0, PID ) ;

	for ( j = 0 ; j < 5 ; j++ )
	{
		mn[j] = 1.0e+20 ;
		mx[j] = -1.0e+20 ;
	}
	for ( i = 0 ; i < np ; i++ )
	{
		recvmsg( ci, &type, ti, 5*sizeof(float), &cnt, &node, &pid ) ;
		printf("node %d, fac =%7.2f, fslv =%7.2f, bslv =%7.2f",
			node, (ti[1]-ti[0])/1000, (ti[2]-ti[1])/1000,
			(ti[3]-ti[2])/1000) ;
		printf(" maxerr = %e\n", ti[4] ) ;
		for ( j = 0 ; j < 5 ; j++ )
		{
			mn[j] = ti[j] < mn[j] ? ti[j] : mn[j] ;
			mx[j] = ti[j] > mx[j] ? ti[j] : mx[j] ;
		}
	}
	printf("totals , fac =%7.2f, fslv =%7.2f, bslv =%7.2f",
		(mx[1]-mn[0])/1000, (mx[2]-mn[1])/1000, (mx[3]-mn[2])/1000) ;
	printf(" maxerr = %e\n", mx[4] ) ;

	free(map) ;
	}

	cclose(ci) ;
}

setmap ( whichmap, map, n, np, blksize )
	int whichmap, *map, n, np, blksize;
{
	int i, gray();
	switch (whichmap)
	{ 
	case 0:                                                       /* wrap */
		for ( i = 0 ; i < n ; i++ ) map[i] = (i/blksize)%np;
		break;
	case 1:                                                  /* gray wrap */
		for ( i = 0 ; i < n ; i++ ) map[i] = gray((i/blksize)%np);
		break;
	case 2:                                                    /* reflect */
		for ( i = 0 ; i < n ; i++ ) map[i] = ((i/blksize)/np)%2
			? np-1 - (i/blksize)%np
			: (i/blksize)%np;
		break;
	case 3:                                               /* gray reflect */
		for ( i = 0 ; i < n ; i++ ) map[i] = ((i/blksize)/np)%2
			? gray(np-1 - (i/blksize)%np)
			: gray((i/blksize)%np);
		break;
	}
}

int gray (i)
    int i ;
{ 
   return( (i>>1)^i ) ;
}
SHAR_EOF
if test -f 'choln.c'
then
	echo shar: over-writing existing file "'choln.c'"
fi
cat << \SHAR_EOF > 'choln.c'

#include "chol.h"

main()
/* node process */
{
	CHNL ci ;
	long clock() ;
	REAL ti[5], maxerr ;
	int n, i, j, k, maxlth, ncols, np, me, cnt, node, pid, 
		whichord, whichcomm, info[4] ;
	int *map, *collth, *mycols ;
	REAL *b, **col, *colk, *p, *q, *y ;
	REAL x, t ;

	me = mynode() ;
	ci = copen(PID) ;

	while (1)
	{

/* receive problem data */

	recvw ( ci, I, info, 4*sizeof(int), &cnt, &node, &pid ) ;
	np = info[0] ;
	n = info[1] ;
	whichord = info[2] ;
	whichcomm = info[3] ;
	if ( me == 0 ) for ( i = 1 ; i < np ; i++ )
		send ( ci, I, info, 4*sizeof(int), i, PID ) ;
	if ( np == 0 || n == 0 ) break ;
	maxlth = n*sizeof(REAL) ;
	map = (int *)malloc(n*sizeof(int)) ;
	recvw ( ci, M, map, n*sizeof(int), &cnt, &node, &pid ) ;
	if ( me == 0 ) for ( i = 1 ; i < np ; i++ )
		send ( ci, M, map, n*sizeof(int), i, PID ) ;
	ncols = 0 ;
	for ( i = 0 ; i < n; i++ ) if ( map[i] == me ) ncols++ ;

/* allocate storage */

	collth = (int *)malloc(ncols*sizeof(int)) ;
	mycols = (int *)malloc(ncols*sizeof(int)) ;
	b = (REAL *)malloc(n*sizeof(REAL)) ;
	col = (REAL * *)malloc(ncols*sizeof(REAL *)) ;
	colk = (REAL *)malloc(n*sizeof(REAL)) ;
	y = (REAL *)malloc(ncols*sizeof(REAL)) ;

/* set up data structures for problem to be solved */

	j = 0 ;
	for ( k = 0 ; k < n ; k++ )
		if ( map[k] == me )
		{  
			p = (REAL *)malloc((n-k)*sizeof(REAL)) ;
			col[j] = p ;
			mycols[j] = k ;
			collth[j] = n-k ;
			j++ ;
			*p++ = 2 ;
			if ( n-k > 1 ) *p++ = -1 ;
			for ( i = 2 ; i < n-k ; i++ ) *p++ = 0 ;
		}
		for ( i = 0 ; i < n ; i++ ) b[i] = 0 ;
		b[n-1] = n+1 ;

	maxerr = 0 ;
	ti[0] = clock(0) ;

/* Cholesky factorization */

	j = 0 ;
	if ( map[0] == me )
	{
		t = 1.0/sqrt(*col[j]) ;
		for ( p = col[j] ; p < col[j] + collth[j] ; p++ )
			*p *= t ;
		j++ ;
		comm ( ci, 0, col[0], n*sizeof(REAL), map[0], np,
			FORWARD, whichord, whichcomm ) ;
	}
	for ( k = 0 ; k < n ; k++ )
	{
		recvw( ci, k, colk, maxlth, &cnt, &node, &pid ) ;
		if ( whichcomm != 1 && me != map[k] )
			comm ( ci, k, colk, cnt, map[k], np,
				FORWARD, whichord, whichcomm ) ;
		for ( i = j ; i < ncols ; i++ )
		{
			for ( p = col[i] , q = colk + mycols[i]-k , t = *q ;
				p < col[i] + collth[i] ; p++ , q++ )
				*p -= *q * t ;
			if ( k == mycols[j]-1 )
			{
				t = 1.0/sqrt(*col[j]) ;
				for ( p = col[j] ; p < col[j] + collth[j] ;
					p++ ) *p *= t ;
				comm ( ci, mycols[j], col[j],
					collth[j]*sizeof(REAL), me, np,
					FORWARD, whichord, whichcomm ) ;
				j++ ;
			}
		}
	}

	ti[1] = clock(0) ;

/* forward substitution */

	j = 0 ;
	for ( k = 0 ; k < n ; k++ )
	{
		if ( map[k] == me )
		{
			if ( k > 0 ) recvw ( ci, B, b, maxlth, &cnt, &node,
				&pid ) ;
			y[j] = b[k] / *col[j] ;
			for ( i = k+1 ; i < n ; i++ )
				b[i] -= y[j] * *(col[j]+i-k) ;
			if ( k < n-1 ) send ( ci, B, b, n*sizeof(REAL),
				map[k+1], PID ) ;
			j++ ;
		}
	}	

	ti[2] = clock(0) ;

/* back substitution */

	j = ncols-1 ;
	if ( map[n-1] == me )
	{
		x = y[j] / *col[j] ;
		if ( maxerr < fabs(x-n) )
			maxerr = fabs(x-n) ;
		j-- ;
		comm ( ci, n-1, &x, sizeof(REAL), map[n-1], np,
			BACKWARD, whichord, whichcomm ) ;
	}
	for ( k = n-1 ; k >= 0 ; k-- )
	{
		recvw( ci, k, &x, sizeof(REAL), &cnt, &node, &pid ) ;
		if ( whichcomm != 1 && me != map[k] )
			comm ( ci, k, &x, cnt, map[k], np,
				BACKWARD, whichord, whichcomm ) ;
		for ( i = j ; i >= 0 ; i-- )
		{
			y[i] -= x * *(col[i]+k-mycols[i]) ;
			if ( k == mycols[j]+1 )
			{
				x = y[j] / *col[j] ;
				if ( maxerr < fabs(x-k) )
					maxerr = fabs(x-k) ;
				comm ( ci, mycols[j], &x, sizeof(REAL), me, np,
					BACKWARD, whichord, whichcomm ) ;
				j-- ;
			}
		}
	}

	ti[3] = clock(0) ;
	ti[4] = maxerr ;
	send ( ci, 0, ti, 5*sizeof(float), HOST, PID ) ;

/* free storage */

	for ( j = 0 ; j < ncols ; j++ ) free(col[j]) ;
	free(y) ;
	free(colk) ;
	free(col) ;
	free(b) ;
	free(mycols) ;
	free(collth) ;
	free(map) ;
	}

	cclose(ci) ;
}

comm ( ci, msgtype, vec, bytes, root, np, dir, ord, which )
	CHNL ci ;
	int msgtype, bytes, root, np, dir, ord, which ;
	char *vec ;
/*
 *  Communication driver
 */

{
	switch (which)
	{
	case 0:
		bcube( ci, msgtype, vec, bytes, root, np ) ;
		break ;
	case 1:
		bcast( ci, msgtype, vec, bytes, root, np, dir, ord ) ;
		break ;
	case 2:
		ring( ci, msgtype, vec, bytes, root, np, dir ) ;
		break ;
	}
}

bcube ( ci, msgtype, vec, bytes, root, np )
	CHNL ci ;
	int msgtype, bytes, root, np ;
	char *vec ;
/*
 *  Broadcast vector, vec, of length bytes
 *  to all processors using a minimum spanning
 *  tree with given root.
 */
{
	int i, me, cnt, node, pid ;

	me = mynode()^root ;
	for ( i = np/2 ; i > me ; i /= 2 )
		send ( ci, msgtype, vec, bytes, (me+i)^root, PID ) ;
	if ( me == 0 )
		send ( ci, msgtype, vec, bytes, root, PID ) ;
}

bcast ( ci, msgtype, vec, bytes, root, np, dir, ord )
	CHNL ci ;
	int msgtype, bytes, root, np, dir, ord ;
	char *vec ;
/*
 *  Broadcast vector, vec, of length bytes
 */

{
	int i, cnt, node, pid, gray(), invgray() ;

	switch (ord)
	{
	case NATURAL:
		if ( dir == FORWARD )
		{
			for ( i = 0 ; i < np ; i++ )
				send ( ci, msgtype, vec, bytes,
					(root+i)%np, PID ) ;
		}
		else
		{
			for ( i = 0 ; i < np ; i++ )
				send ( ci, msgtype, vec, bytes,
					(root+np-i)%np, PID ) ;
		}
		break ;
	case GRAY:
		if ( dir == FORWARD )
		{
			for ( i = 0 ; i < np ; i++ )
			{
				send ( ci, msgtype, vec, bytes,
					succ(root,i,np), PID ) ;
			}
		}
		else
		{
			for ( i = 0 ; i < np ; i++ )
				send ( ci, msgtype, vec, bytes,
					pred(root,i,np), PID ) ;
		}
		break ;
	}
}

ring ( ci, msgtype, vec, bytes, root, np, dir )
	CHNL ci ;
	int msgtype, bytes, root, np, dir ;
	char *vec ;
/*
 *  Send vector, vec, of length bytes to
 *  all processors, using an embedded ring 
 *  beginning at root.
 */
{
	int me, next, cnt, node, pid, pred(), succ() ;

	me = mynode() ;
	switch (dir)
	{
	case FORWARD:
		send ( ci, msgtype, vec, bytes, succ(me,1,np), PID ) ;
		break ;
	case BACKWARD:
		send ( ci, msgtype, vec, bytes, pred(me,1,np), PID ) ;
		break ;
	}
}

int pred ( node, i, np )
	int node, i, np ;
{
	int gray(), invgray() ;
	return( gray((invgray(node)+np-i)%np) ) ;
}

int succ ( node, i, np )
	int node, i, np ;
{
	int gray(), invgray() ;
	return( gray((invgray(node)+i)%np) ) ;
}

int gray (i)
    int i ;
{ 
   return( (i>>1)^i ) ;
}

int invgray (i)
    int i ;
{ 
   int k ;
   k = i ;
   while ( k > 0 ) 
	 {
	   k = k>>1 ;
	   i = i^k ;
	 }
   return (i) ;
}

SHAR_EOF
if test -f 'echoh.c'
then
	echo shar: over-writing existing file "'echoh.c'"
fi
cat << \SHAR_EOF > 'echoh.c'
/* echo.c
 *   echo test to measure transmission delays
 * -r rate. -d cubedim -s startup -n repetition -l msglth  -p packetsize
 */

#include <stdio.h>

#define PID 11


#define CDIM 6
#define MSGSIZE 15000
#define REP 10
long times();
long ttemp[4];
struct PARM {
	long Reps;
	long Msglth;
	long Cdim;
};
#define reps parms.Reps
#define msglth parms.Msglth
#define cdim parms.Cdim

float atof();
	char buff[MSGSIZE];
	float total;
	float totals[CDIM+1];

main(argc,argv)
int argc;
char *argv[];
{
	int i;
	int d;
	long tmp;
	long end, start;
	float rate = 1.0;
	int startup =0;
	int psize =1;
	struct PARM parms; 


	msglth = 1024;
	cdim = cubedim();
	reps = REP;
	while (--argc > 0 && **++argv == '-')switch ((*argv)[1]) {
		case 'r':	/* rate */
			rate = atof(argv[1]);
			argc--; argv++;
			break;
		case 'n':	/* reps */
			reps = atoi(argv[1]);
			argc--; argv++;
			break;
		case 's':	/* startup */
			startup = atoi(argv[1]);
			argc--; argv++;
			break;
		case 'l':	/*  msg lth */
			msglth = atoi(argv[1]);
			argc--; argv++;
			break;
		case 'p':	/* packet size */
			psize = atoi(argv[1]);
			argc--; argv++;
			break;
		case 'd':	/* cube dimension*/
			cdim = atoi(argv[1]);
			argc--; argv++;
			break;
		default:
			break;
	}
	d = copen(PID);
	while(1){
	printf("reps lth"); scanf("%d%d",&reps, &msglth);
	printf(" repetition(n) %ld cube dimension(d) %ld  msg lth(l) %ld\n",
	 reps,cdim,msglth);

	/* now start node tests */
	sendmsg(d,1,&parms,sizeof(struct PARM),0,PID);
	recvmsg(d,&tmp,totals,sizeof(totals),&tmp,&tmp,&tmp);
	for (i=0;i<cdim+1;i++)
	 printf("node 0 hop of %d average (roundtrip) time  %g sec %g KBs \n",
	  i,totals[i]/(reps*1000),reps*2.0*msglth/totals[i]);
	printf("tests complete\n");
	}
}
SHAR_EOF
if test -f 'echon.c'
then
	echo shar: over-writing existing file "'echon.c'"
fi
cat << \SHAR_EOF > 'echon.c'
/*  echon.c    run with echoh to do roundtrip timing tests */

#define PID 11
#define HOST 0x08000
#define CDIM 6
#define MSGSIZE 15000
struct PARM {
	long Reps;
	long Msglth;
	long Cdim;
};
#define reps parms.Reps
#define msglth parms.Msglth
#define cdim parms.Cdim
char buff[MSGSIZE];
long clock();

main()
{
	/* do node to node tests */
	long tmp;
	int d;
	int lth;
	int srcnode, srcpid;
	long start;
	int i, dim, n;
	float totals[CDIM+1];
	struct PARM parms;

	d = copen(PID);
if (mynode() == 0)
    while(1){
	/* wait for host to say we can start */
	recvw(d,1,&parms,sizeof(struct PARM),&tmp,&srcnode,&srcpid);
	n=0;
	for(dim=0;dim<cdim+1;dim++){
		totals[dim] = 0.;
		start = clock();
		for(i=0;i<reps;i++){
			sendw(d,1,buff,(int)msglth,n,PID);
			recvw(d,1,buff,(int)msglth,&tmp,&tmp,&tmp);
		}
		totals[dim] = clock()-start;
		n = (n<<1) + 1;
	}
	sendw(d,1,totals,sizeof(totals),HOST,PID);  /* tell em we are done */
	syslog(PID,"node 0 restart");
  }

 else
	/* if not node 0 just echo back whatever we get */

	while(1){
		recvw(d,1,buff,MSGSIZE,&lth,&srcnode,&srcpid);
		sendw(d,1,buff,lth,srcnode,srcpid);
	}
}
SHAR_EOF
if test -f 'iphfv2.f'
then
	echo shar: over-writing existing file "'iphfv2.f'"
fi
cat << \SHAR_EOF > 'iphfv2.f'
c iphf.f  host
	implicit integer (a-z)
c do simple inner product with snd rcv
	integer matrix(4,4),vector(4)
	integer result(4)
	data matrix /1,2,3,4,2,3,1,0,3,3,1,2,4,3,2,1/
	data vector /2,3,1,4/

	call setpid(0)
	do 10 i=1,4
		call csend(1,matrix(1,i),16,i-1,0)
		call csend(2,vector,16,i-1,0)
10	continue
	do 20 i=1,4
		call crecv(-1,val,4)
		node = infonode()
		result(node+1) = val
20	continue
	write(*,*)result
	end
SHAR_EOF
if test -f 'iph.c'
then
	echo shar: over-writing existing file "'iph.c'"
fi
cat << \SHAR_EOF > 'iph.c'
/* iph.c  vector matrix(transpose) inner product using messages */

#include <stdio.h>

#define DIM 4

int vector[DIM] = {2,3,1,4};
int matrix[DIM][DIM] = { 1,2,3,4,
			 2,3,1,0,
			 3,3,1,2,
			 4,3,2,1
			 };
int result[DIM];

main()
{
	/* main task */
	int d;
	int i;
	int val;
	int type,lth,node,pid;

	d = copen(15);
	for (i=0;i<DIM;i++){	   /* start and send data to each node */
		sendmsg(d,1,matrix[i],sizeof(matrix[i]),i,15);
		sendmsg(d,2,vector,sizeof(vector),i,15);
	}
	i=DIM;
	while(i--){   /* wait for results */
		recvmsg(d,&type,&val,sizeof(int),&lth,&node,&pid);
		result[node] = val;
	}

	syslog(3,"a host message");
	for(i=0;i<DIM;i++) printf(" %d",result[i]);
	printf(" (should be 27 14 24 23)\n");
}

SHAR_EOF
if test -f 'iphg.c'
then
	echo shar: over-writing existing file "'iphg.c'"
fi
cat << \SHAR_EOF > 'iphg.c'
/* iph.c  vector matrix(transpose) inner product using messages */

#include <stdio.h>

#define DIM 4

int vector[DIM] = {2,3,1,4};
int matrix[DIM][DIM] = { 1,2,3,4,
			 2,3,1,0,
			 3,3,1,2,
			 4,3,2,1
			 };
int result[DIM];

main()
{
	/* main task */
	int d;
	int i;
	int val;
	int type,lth,node,pid;

	d = copen(15);
		   /* start and send data to each node */
	sendmsg(d,1,matrix[i],sizeof(matrix[i]),-1,15); /* global send */
	sendmsg(d,2,vector,sizeof(vector),-1,15);
	i=DIM;
	while(i--){   /* wait for results */
		recvmsg(d,&type,&val,sizeof(int),&lth,&node,&pid);
		result[node] = val;
	}

	syslog(3,"a host message");
	for(i=0;i<DIM;i++) printf(" %d",result[i]);
	printf(" (should be 27 14 24 23)\n");
}

SHAR_EOF
if test -f 'ipn.c'
then
	echo shar: over-writing existing file "'ipn.c'"
fi
cat << \SHAR_EOF > 'ipn.c'
#define DIM 5
main()
{
	/* do inner product of two vectors */
	int v1[DIM], v2[DIM];
	int i, sum;
	int d;
	int me;
	int node,pid,lth;

	me=mynode();
	d = copen(15);
	recvw(d,1,v1,sizeof v1,&lth,&node,&pid);
	recvw(d,2,v2,sizeof v2,&lth,&node,&pid);
	sum=0;
	for(i=0;i< (lth/ sizeof(int));i++) sum += v1[i] * v2[i];
	sendw(d,3,&sum,sizeof(int),node,pid);
}
SHAR_EOF
if test -f 'nstats.c'
then
	echo shar: over-writing existing file "'nstats.c'"
fi
cat << \SHAR_EOF > 'nstats.c'
/* 
 * nstats.c   get data on node performance and msg stats
 *	data may be piped in and through
 * nstats tracefile
 *  ...| nstats
 * ...| nstats -outfile | . . .
 */

#include <stdio.h>
#include <ctype.h>

#define MAXTASKS 1000
#define RUN 1

struct NDATA {
	long start;
	long end;
	long idle;
	long tmptime;
	long busy;
	long state;
	long sndcnt;
	long rcvcnt;
	long rcvwcnt;
} node[MAXTASKS];

float util;
float totalu, totaluh;
long totalsnd, totalrcv;

#define MPHOST -32768
#define VHOST 32768
long ndflag;
long atol();

#define MAXLINE 1000
char linestr[MAXLINE];	/* hold a line read from the trace file. */

struct Tdata {
	long type;
	long Qd;
	long tid;
	long clock;
} t;

char *key_words[] = {
	"strace","tfork","texit","send","recv","recvw",
	"tid","waking","cnt","clock","blocking","running",
	"etrace","node","mpsim","to","lth",
	0};

#define STRACE 0
#define TFORK 1
#define TEXIT 2
#define SEND 3
#define RECV 4
#define RECVW 5
#define TID 6
#define WAKING 7
#define CNT 8
#define CLOCK 9
#define QD 10
#define RUNNING 11
#define ETRACE 12
#define NODE 13
#define MPSIM 14
#define TO 15
#define LTH 16

char *getword(), *nxtword();
long maxtasks;
long piping;
FILE *out;

#define LTHS 20
long lthcnt[LTHS],hopcnt[LTHS];
int lthdn[]={8,16,32,64,128,256,512,1024,2048,4096,8192,16000,0};
int length,to,from;
float lthvol[LTHS],totvol,totcnt,hopvol[LTHS];

main(argc, argv)
int argc;
char *argv[];
{
	FILE *tfptr;	/* Stream pointer for the trace file. */
	long i;
	long duration;
	float totalb;   /* total busy */
	long endtime, starttime;

	endtime = 0; starttime = 1<<30;
	totalb =0;
	out = stdout;
	if(argc != 2) tfptr = stdin;
	  else if (*argv[1] == '-') {
		out = fopen(argv[1] + 1,"a");
		piping = 1;
		tfptr = stdin;
	  }
	  else if((tfptr = fopen(argv[1], "r")) == NULL) {
		fprintf(stderr, "Can't open %s for reading\n", argv[1]);
		exit(1);
	  }
	while(fgets(linestr, MAXLINE, tfptr) != NULL) {
		if (piping) printf("%s",linestr);
		parse(linestr);
		switch(t.type){
		  case TEXIT:
			node[t.tid].end = t.clock;
			node[t.tid].busy += t.clock - node[t.tid].tmptime;
			break;
		  case SEND:
			node[t.tid].sndcnt++;
			totcnt++;
			totvol += length;
			for(i=0;i< LTHS;i++) if(length < lthdn[i]){
				lthcnt[i]++;
				lthvol[i] += length;
				break;
			}
			i= distance(to,from);
			hopcnt[i]++;
			hopvol[i] += length;
			break;
		  case RECV:
		  case RECVW:
			node[t.tid].rcvcnt++;
			break;
		  default:
			break;
		}
	}
	fprintf(out,"node	  start	       end	duration    busy	utiliz	  sends	  recvs\n");
	for(i=0;i<maxtasks+1;i++){
	  if (node[i].end ==0) {
		/* hmmm, things have ended twas it busy or idle ????*/
		node[i].end = t.clock; /*use last value */
		if (node[i].state == RUN)
		  node[i].busy += t.clock - node[i].tmptime; /* was busy */
	}
	if (node[i].end > endtime) endtime=node[i].end;
	if (node[i].start < starttime) starttime = node[i].start;
	duration = node[i].end-node[i].start;
	if (duration==0) duration =1;  /*clock wasn't running*/
	util = 100 * ((float)node[i].busy)/duration;
	if (i==0)fprintf(out,"HOST");
	  else fprintf(out,"%4ld",i-1);
	  fprintf(out," %10ld %10ld %10ld %10ld	%3.0f%%	%7ld	%7ld\n",
	   node[i].start,node[i].end,duration,
	   node[i].busy,util,node[i].sndcnt,node[i].rcvcnt);
	if (i != 0) totalu += util;
	totaluh += util;
	totalsnd += node[i].sndcnt;
	totalrcv += node[i].rcvcnt;
	totalb += node[i].busy;
	}
	fprintf(out,
	 "\nNodal utilization %3.0f%%  Nodal+host utilization %3.0f%% sends %ld  recvs %ld\n",
	 totalu/maxtasks,totaluh/(maxtasks+1),totalsnd,totalrcv);
	fprintf(out,"Gross utilization %3.0f%%\n",
	 100.*totalb/((endtime-starttime)*(maxtasks+1)));

	printf("\nTotal messages %7.0f   %7.0f bytes\n",totcnt,totvol);
	printf("lth    count		  bytes\n");
	for (i=0;lthdn[i] >0 ;i++)
	  printf("%5d %7ld  %3.0f%%    %7.0f  %3.0f%%\n",
	   lthdn[i],lthcnt[i],100.*lthcnt[i]/totcnt,lthvol[i],
	   100.*lthvol[i]/totvol);
	printf("\nhops    count		    bytes\n");
	for(i=0;i<8;i++)
		printf("%3ld %7ld %3.0f%%    %7.0f  %3.0f%%\n",
		 i-1,hopcnt[i],100.*hopcnt[i]/totcnt,
		 hopvol[i],100.*hopvol[i]/totvol);
}

distance(from,to)
int from,to;
{
	/* calculate hops */
	int i,cnt;

	if (from == -1 || to == -1) return(0);  /*host*/
	i = from^to;
	cnt=0;
	while(i){ if (i & 1) cnt++; i = i>>1;}
	return(cnt+1);
}

parse(l)
char *l;
{
	/* parse a line l */
	char *p, *w;
	long i;
	long tmp;

	t.type = -1;
	t.Qd = 0;
	p = getword(l);  /* get first word */
	while(*p){
		for(i=0;(w = key_words[i]) != NULL;i++) if (!strcmp(w,p))
		 switch(i){
		   case STRACE:
		   case TEXIT:
		   case TFORK:
		   case RECV:
		   case RECVW:
		   case CNT:
		   case SEND:
			t.type = i;
			break;
		   case MPSIM:
			ndflag=1;
			return;
		   case QD:
			node[t.tid].busy += t.clock - node[t.tid].tmptime;
			node[t.tid].tmptime = t.clock;
			node[t.tid].state = !RUN;
			break;
		   case NODE:
			ndflag=1;
		   case TID:
			from = t.tid = atol(nxtword());
			if (ndflag) t.tid = (t.tid== MPHOST || t.tid == VHOST)
			  ? 0 : t.tid + 1 ;
			if (t.tid >maxtasks) maxtasks = t.tid;
			if (ndflag) from = (from == MPHOST || from == VHOST) ?
			  -1 : from;
			else from--;
			break;
		   case CLOCK:
			t.clock = atol(nxtword());
			break;
		   case WAKING:
			while(*(p=nxtword())) {
				tmp = atol(p);
			 if (ndflag) tmp = (tmp== MPHOST || tmp == VHOST) ? 0 : tmp + 1 ;
				if (node[tmp].start ==0)
				 node[tmp].start=t.clock;
				else
				 node[tmp].idle += t.clock - node[tmp].tmptime;
				node[tmp].tmptime = t.clock;
				node[tmp].state = RUN;
			}
			break;
		   case RUNNING:
			while(*(p=nxtword())) {
				tmp = atol(p);
			 if (ndflag) tmp = (tmp== MPHOST || tmp == VHOST) ? 0 : tmp + 1 ;
				node[tmp].start= node[tmp].tmptime = t.clock;
				node[tmp].state = RUN;
			}
			break;
		  case TO:
			to = atoi(nxtword());
			if (to < 0) { to=from; break;} /*global send */
			if (ndflag)to = (to == MPHOST || to == VHOST) ? -1 : to;
			else to--;
			break;
		  case LTH:
			length = atoi(nxtword());
			break;
	 	  default:
			break;
		}
		p = nxtword();
	}
}


static char *nxtwdp, word_str[100];  /*locals for getword and nxtword*/

char *getword(s)
char *s;
{
	char *p;

	while( *s && isspace(*s))s++;
	p=word_str;
	while(*s && !isspace(*s)) *p++ = *s++;
	*p=0;
	nxtwdp=s;
	return(word_str);
}

char *nxtword()
{
	char *p;

	while(*nxtwdp && isspace(*nxtwdp)) nxtwdp++;
	p=word_str;
	while(*nxtwdp && !isspace(*nxtwdp)) *p++ = *nxtwdp++;
	*p=0;
	return(word_str);
}
SHAR_EOF
if test -f 'ringhv1.f'
then
	echo shar: over-writing existing file "'ringhv1.f'"
fi
cat << \SHAR_EOF > 'ringhv1.f'
c ringh.f    host pgm to drive rings -- pass np, revs, and lth
	implicit integer (a-z)
	integer copen,d
	integer cmds(3)
	integer*4 result

	d = copen(1)
10	write(6,55)
55	format(' number of nodes ',$)
	read(5,56)cmds(1)
56	format(i5)
	write(6,57)
57	format(' revolutions ',$)
	read(5,56)cmds(2)
	write(6,58)
58	format(' length ',$)
	read(5,56)cmds(3)
	call sendmsg(d,1,cmds,12,0,1)
	itype = 1
	call recvmsg(d,itype,result,4,lth,node,pid)
	write(6,99)result,result/cmds(2), result/cmds(2)/cmds(1)
99	format(' time ',i7,' ms  time/rev. ',i7,' ms  avrg nn ',i7)
	go to 10
	end
SHAR_EOF
if test -f 'iphv2.c'
then
	echo shar: over-writing existing file "'iphv2.c'"
fi
cat << \SHAR_EOF > 'iphv2.c'
/* iph.c  vector matrix(transpose) inner product using messages */

#include <stdio.h>

#define PID 0
#define DIM 4

int vector[DIM] = {2,3,1,4};
int matrix[DIM][DIM] = { 1,2,3,4,
			 2,3,1,0,
			 3,3,1,2,
			 4,3,2,1
			 };
int result[DIM];

main()
{
	/* main task */
	int d;
	int i;
	int val;
	int type,lth,node,pid;

	setpid(PID);
	for (i=0;i<DIM;i++){	   /* start and send data to each node */
		csend(1,matrix[i],sizeof(matrix[i]),i,PID);
		csend(2,vector,sizeof(vector),i,PID);
	}
	i=DIM;
	while(i--){   /* wait for results */
		crecv(-1,&val,sizeof(int));
		node = infonode();
		result[node] = val;
	}

	for(i=0;i<DIM;i++) printf(" %d",result[i]);
	printf(" (should be 27 14 24 23)\n");
}

SHAR_EOF
if test -f 'ipnfv2.f'
then
	echo shar: over-writing existing file "'ipnfv2.f'"
fi
cat << \SHAR_EOF > 'ipnfv2.f'
	implicit integer (a-z)
c multiply two vectors
	integer v1(4), v2(4)

	call crecv(1,v1,20)
	call crecv(2,v2,20)
	lth = infocount()
	node = infonode()
	pid = infopid()
	sum = 0
	do 10 i=1,4
10		sum = sum + v1(i) * v2(i)
	call csend(3,sum,4,node,pid)
	end
SHAR_EOF
if test -f 'ringnv1.f'
then
	echo shar: over-writing existing file "'ringnv1.f'"
fi
cat << \SHAR_EOF > 'ringnv1.f'
c ringn.f   fortran ring
c   host passes node 0  number of nodes in ring and lth and revolutions
	program ringn
	integer*4 clock,start,tim
	integer bsize
	parameter (bsize=3000)
	integer*4 msg(bsize)
	integer cmds(3)
	integer copen,ci
	integer cnt, next,me, mynode, gray,invgray
	integer node,pid,host

	ci = copen(1)
	me = mynode()
	cnt = 0
10	if (me .eq. 0 .and. cnt .eq. 0) then
		call recvw(ci,1,cmds,12,l,host,pid)
		np = cmds(1)
		msg(1) = np
		cnt = cmds(2)
		next = gray(mod(invgray(me)+1,np))
		start = clock()
		call sendw(ci,1,msg,cmds(3),next,1)
	endif
	call recvw(ci,1,msg,4*bsize,lth,node,pid)
	np = msg(1)
	next = gray(mod(invgray(me)+1,np))
	cnt = cnt -1
	if (me .eq. 0 .and. cnt .eq. 0) then
		tim = clock() - start
		call sendw(ci,1,tim,4,host,1)
	else
		call sendw(ci,1,msg,lth,next,1)
	endif
	go to 10
	end
c gray and invgray  -- uses shift and xor

	integer function gray(mm)
	integer mm,rshift,xor

	gray = xor(rshift(mm,1),mm)
	end

	integer function invgray(mm)
	integer mm,m,xor,rshift

	m=mm
	invgray = 0
10	if (m .eq. 0) return
	invgray = xor(invgray,m)
	m = rshift(m,1)
	go to 10
	end

        integer function lshift( mm, nn )
        integer mm,nn,i,m
c
c   left shift mm by nn places
c
	m=mm
	do 10 i=1,nn
	   m = m*2
  10    continue
	lshift = m
	end

	integer function rshift( mm, nn )
	integer mm,nn,i,m
c   
c   right shift mm by nn places
c
	m=mm
	do 10 i=1,nn
	   m = m/2
  10    continue
	rshift = m
	end
      integer function xor(mm,nn)
      integer mm,nn
c
c     exclusive or
c
      integer b,x,m,n
      m = mm
      n = nn
      x = 0
      b = 1
   10 if (m .eq. n ) go to 20
         if (mod(m,2) .ne. mod(n,2)) x = x + b
         b = 2*b
         m = m/2
         n = n/2
      go to 10
   20 xor = x
      end
SHAR_EOF
if test -f 'sbld'
then
	echo shar: over-writing existing file "'sbld'"
fi
cat << \SHAR_EOF > 'sbld'
cc -o $1  $1.c src/simlib.a -lm
SHAR_EOF
chmod +x 'sbld'
if test -f 'sfbld'
then
	echo shar: over-writing existing file "'sfbld'"
fi
cat << \SHAR_EOF > 'sfbld'
f77 -o $1  $1.f src/simlib.a
SHAR_EOF
chmod +x 'sfbld'
if test -f 'iphf.f'
then
	echo shar: over-writing existing file "'iphf.f'"
fi
cat << \SHAR_EOF > 'iphf.f'
c iphf.f  host
	implicit integer (a-z)
c do simple inner product with snd rcv
	integer matrix(4,4),vector(4)
	integer result(4)
	data matrix /1,2,3,4,2,3,1,0,3,3,1,2,4,3,2,1/
	data vector /2,3,1,4/

	d = copen(15)
	do 10 i=1,4
		call sendmsg(d,1,matrix(1,i),16,i-1,15)
		call sendmsg(d,2,vector,16,i-1,15)
10	continue
	do 20 i=1,4
		call recvmsg(d,itype,val,4,lth,node,pid)
		result(node+1) = val
20	continue
	write(*,*)result
	end
SHAR_EOF
if test -f 'ipnf.f'
then
	echo shar: over-writing existing file "'ipnf.f'"
fi
cat << \SHAR_EOF > 'ipnf.f'
	implicit integer (a-z)
c multiply two vectors
	integer v1(5), v2(5)

	d = copen(15)
	call recvw(d,1,v1,20,lth,node,pid)
	call recvw(d,2,v2,20,lth,node,pid)
	sum = 0
	do 10 i=1,5
10		sum = sum + v1(i) * v2(i)
	call sendw(d,3,sum,4,node,pid)
	end
SHAR_EOF
if test -f 'ringh.f'
then
	echo shar: over-writing existing file "'ringh.f'"
fi
cat << \SHAR_EOF > 'ringh.f'
c ringh.f    host pgm to drive rings -- pass np, revs, and lth
	implicit integer (a-z)
	integer cmds(3)
	integer*4 result

	call setpid(0)
10	write(6,55)
55	format('number of nodes ')
	read(5,56)cmds(1)
56	format(i5)
	write(6,57)
57	format(' revolutions ',$)
	read(5,56)cmds(2)
	write(6,58)
58	format(' length ',$)
	read(5,56)cmds(3)
	call csend(1,cmds,12,0,0)
	call crecv(-1,result,4)
	write(6,99)result,result/cmds(2), result/cmds(2)/cmds(1)
99	format(' time ',i7,' ms  time/rev. ',i7,' ms  avrg nn ',i7)
	go to 10
	end
SHAR_EOF
if test -f 'ringn.f'
then
	echo shar: over-writing existing file "'ringn.f'"
fi
cat << \SHAR_EOF > 'ringn.f'
c ringn.f   fortran ring
c   host passes node 0  number of nodes in ring and lth and revolutions
	program ringn
	integer*4 mclock,start,tim
	integer bsize
	parameter (bsize=3000)
	integer*4 msg(bsize)
	integer cmds(3)
	integer cnt, next,me, mynode, gray,invgray

	me = mynode()
	np = numnodes()
	next = gray(mod(invgray(me)+1,np))
	cnt = 0
10	if (me .eq. 0 .and. cnt .eq. 0) then
		call crecv(1,cmds,12)
		np = cmds(1)
		msg(1) = np
		cnt = cmds(2)
		start = mclock()
		call csend(1,msg,cmds(3),next,0)
	endif
	call crecv(1,msg,4*bsize)
	lth = infocount()
	cnt = cnt -1
	if (me .eq. 0 .and. cnt .eq. 0) then
		tim = mclock() - start
		call csend(1,tim,4,myhost(),0)
	else
		call csend(1,msg,lth,next,0)
	endif
	go to 10
	end
c gray and invgray  -- uses shift and xor

	integer function gray(mm)
	integer mm,rshift,xor

	gray = xor(rshift(mm,1),mm)
	end

	integer function invgray(mm)
	integer mm,m,xor,rshift

	m=mm
	invgray = 0
10	if (m .eq. 0) return
	invgray = xor(invgray,m)
	m = rshift(m,1)
	go to 10
	end

        integer function lshift( mm, nn )
        integer mm,nn,i,m
c
c   left shift mm by nn places
c
	m=mm
	do 10 i=1,nn
	   m = m*2
  10    continue
	lshift = m
	end

	integer function rshift( mm, nn )
	integer mm,nn,i,m
c   
c   right shift mm by nn places
c
	m=mm
	do 10 i=1,nn
	   m = m/2
  10    continue
	rshift = m
	end
      integer function xor(mm,nn)
      integer mm,nn
c
c     exclusive or
c
      integer b,x,m,n
      m = mm
      n = nn
      x = 0
      b = 1
   10 if (m .eq. n ) go to 20
         if (mod(m,2) .ne. mod(n,2)) x = x + b
         b = 2*b
         m = m/2
         n = n/2
      go to 10
   20 xor = x
      end
SHAR_EOF
if test -f 'ipnv2.c'
then
	echo shar: over-writing existing file "'ipnv2.c'"
fi
cat << \SHAR_EOF > 'ipnv2.c'
#define DIM 4
main()
{
	/* do inner product of two vectors */
	int v1[DIM], v2[DIM];
	int i, sum;
	int d;
	int me;
	int node,pid,lth;

	me=mynode();
	crecv(1,v1,sizeof v1);
	crecv(2,v2,sizeof v2);
	lth = infocount();
	node = infonode();
	pid = infopid();
	sum=0;
	for(i=0;i< (lth/ sizeof(int));i++) sum += v1[i] * v2[i];
	csend(3,&sum,sizeof(int),node,pid);
}
SHAR_EOF
if test -f 'chol.h'
then
	echo shar: over-writing existing file "'chol.h'"
fi
cat << \SHAR_EOF > 'chol.h'

#include <stdio.h>
#include <math.h>

#ifndef HOST
#define  HOST myhost()
#endif
#define  PID      0
#define  FORWARD  1
#define  BACKWARD 0
#define  NATURAL  0
#define  GRAY     1
#define  I    32000
#define  M    32001
#define  B    32002

typedef float REAL ;
typedef int SHORT ;
typedef int LONG ;
typedef int CHNL ;

char *malloc() ;
SHAR_EOF
if test -f 'README'
then
	echo shar: over-writing existing file "'README'"
fi
cat << \SHAR_EOF > 'README'
	mpsim   portable hypercube simulator
	Dunigan   ORNL   dunigan@msr.epm.ornl.gov

MPSIM
	Portable (to most UNIX systems) message-passing simulator
	that supports both C and FORTRAN.  Uses forks and pipes
	to support up to 8 to 16 processes on UNIX BSD 4.x, SYS V 3.x,
	DYNIX, Encore, Ultrix, Sun, XENIX, Tek UNIX, 3B2s.  Simulates
	Intel iPSC/1 and iPSC/2 hypercubes and produces trace file of message
	events.  Trace-file analyzers produces tabular or graphical
	summaries.  (Encore and Sequent versions will utilize
	multiple processors.)


top directory contains sample programs and trace file postprocessors
  (nstats.c, ccplot.c, trace1.c)


src/
	the beef, do a make here to create mpsim and simlib.a
	mpsim.man and makefiles; also sample build scripts (sbld, sfbld)

---------------------------------------------------------
Information on other ORNL parallel processor simulators

  ORNL  parallel processor simulators
      dunigan     last change: 8/3/88

directories:  (source distribution size)

  mpsim/  portable simulator, sample programs   (122KB)
	src/   mpsim source

  smpsim/  sequent version (NS32x32) of mpsim using shared memory  (147KB)

  sysVsmpsim/  beta version of mpsim  using System V shared memory   (62KB)


  ppsim/  interpretive simulator   (2.8MB)
    
	doc/	 man entries and such
	postp/	 postprocessor of trace files
	samples/ sample codes
	lll/	 original LLNL distribution
	seqdn/	 version of simulators for Sequent (32032 CPUs only)
	   kernel/ library source, aspp source
	   local/  sample bld and makefile for applications
	sundn/	 version of simulators for Sun workstations (Sun 3/68881 only)
	   kernel/ library source, aspp source
	   local/  sample bld and makefile for applications
	vaxdn/	 version of simulators for VAX UNIX 4.x/Ultrix
	   kernel/ library source, aspp source
	   local/  sample bld and makefile for applications

Simulator summary:

We have several hypercube simulators and a simulator for shared-memory
parallel processors; all support C and Fortran.  A brief summary:

Name    Runs on              Granularity     Performance    Debugging 
						Info           Help
ppsim   Vax, Sequent, Sun      fine             yes            yes
mpsim   Any Unix with pipes    coarse           no             yes
smpsim  Sequent                coarse           no             yes


PPSIM
	Interpretive parallel processor simulator, based on "Multitasker"
	by Eugene Brooks, modified to support message-passing parallel
	processors.  Provides instruction level tracing, program
	tunable message-passing parameters (startup time, node-to-node
	rates, host-to-node rates, packet size).  Trace file post-processing
	programs provide concurrency plots and tabular summary
	of message passing behavior.  Supports C and f77 on UNIX 4.x
	on VAX, Sun 3 (68020/68881), or Sequent (32032) processors.
	Message-passing software emulates Intel iPSC/1 hypercube subroutines.
	Shared-memory simulation supports, locks, events, barriers,
	semaphores, enqueue, and task creation.  We have used this package
	in teaching graduate course in parallel programming.

SMPSIM
	MPSIM converted to use shared-memory of Sequent for message
	passing.

The above table is a bit of an over simplification, of course.  ppsim
gives an interpretive simulation at the level of individual
instructions and has tunable performance parameters.  It runs as a
single process under Unix and gives meaningful, graphical performance
output and is also useful for debugging.  mpsim uses multiple Unix
processes communicating via pipes.  It is helpful for debugging but
doesn't give very meaningful performance info.  Its advantage over
ppsim is faster execution (in terms of wall clock time).  smpsim is a
specialization of mpsim to the Sequent in which communication by pipes
is replaced by communication via shared memory, which is much faster
still.  In fact smpsim simulates an iPSC in faster than real time for
the same number of (real) processors.

Documentation:

man entries are included in the various distribution directories.
Additional technical reports:

 "A message-passing multiprocessor simulator",Dunigan, ORNL/TM-9966
 "A portable hypercube simulator", Dunigan, ORNL/TM-10410
 "Denelcor HEP multiprocessor simulator, Dunigan, ORNL/TM-9971
 "A multitasking kernel for the C and Fortran programming languages",
    Brooks, LLNL UCID-20167

Work-in-progress:

  dcube-- hypercube simulator using TCP/IP-connected workstations
	  as the nodes
	  "Hypercube Simulation on a Local Area Network", Dunigan,
	  ORNL/TM-10685

dunigan@msr.epm.ornl.gov
SHAR_EOF
if test -f 'recthv2.c'
then
	echo shar: over-writing existing file "'recthv2.c'"
fi
cat << \SHAR_EOF > 'recthv2.c'
/* recth.c   rectangel rule   host  */
#define PID 0
#define TYPE1 1
#define TYPE2 2
struct Result {
	double pi;
	long ticks;
} result;
long n;
main()
{
	int p;
	int c,type,tmp, i;

	setpid(PID);
	p = numnodes();
	while(1){
		printf("rectangles "); scanf("%ld",&n);
		for(i=0;i<p;i++)csend(TYPE1,&n,sizeof(n),i,PID);
		crecv(-1,&result,sizeof(result));
		printf(" pi %f in %ld\n",result.pi,result.ticks);
	}
}

SHAR_EOF
if test -f 'rectnv2.c'
then
	echo shar: over-writing existing file "'rectnv2.c'"
fi
cat << \SHAR_EOF > 'rectnv2.c'
/* rectn.c   rectangel rule   node  */
#define PID 0
#define TYPE1 1
#define TYPE2 2
#define TYPE3 3
#define ROOT 0

#define F(x) 4.0/(1.0 + x * x )

struct Result {
	double pi;
	long ticks;
} result;
long n;
main()
{
	int p,host,pid;
	int me,c,type,tmp;
	long i;
	double sum,x;
	long mclock(), s;

	p = numnodes();
	me = mynode();
	while(1){
		crecv(TYPE1,&n,sizeof(n));
		s = mclock();
		sum = 0.0;
		for(i=me;i<n; i += p) {
			x = (i + 0.5)/(double)n;
			sum += F(x);
		}
		if (me == ROOT){
		  for (i=1;i<p;i++){
		   crecv(TYPE3,&x,sizeof(x));
		   sum += x;
		  }
		  result.pi = sum/n;
		  result.ticks = mclock()-s;
		  csend(TYPE2,&result,sizeof(result),myhost(),PID);
		} else csend(TYPE3,&sum,sizeof(sum),ROOT,PID);
	}
}
SHAR_EOF
if test -f 'ccplot.c'
then
	echo shar: over-writing existing file "'ccplot.c'"
fi
cat << \SHAR_EOF > 'ccplot.c'
/*
 *  ccplot.c			thd   3/22/88
 * generate plot data for graph command
 *  should really only be used with data from ppsim
 */

#include <stdio.h>

#define MPHOST -32768
#define VHOST 32768
long ndflag;

	int concurrency;
	float avetasks;
	int maxtasks;
	int maxtid;
	int maxclock;
	int lasty, lastx;
char *strfnd();

main(argc, argv)
int argc;
char *argv[];
{
	float f, df;
	float util;
	int start,delta;
	int maxn;
	int count;
	int tid;
	int clock, oldclock;
	char *p;
	FILE *tfptr;	/* Stream pointer for the trace file. */
#define MAXLINE 1000
	char linestr[MAXLINE];	/* Buffer to hold a line read from the trace file. */
#define STRLEN	100
	char segstr[STRLEN];
	char avestr[STRLEN];
	int i;

	if(argc != 2) tfptr = stdin;
	  else 	if((tfptr = fopen(argv[1], "r")) == NULL) {
		fprintf(stderr, "Can't open %s for reading\n", argv[1]);
		exit(1);
	  }
	/* We read the first line from the tracefile which must be a header.
		*/
	while(fgets(linestr, MAXLINE, tfptr) != NULL) {
		if(strncmp(linestr, "strace", strlen("strace")) == 0) {
			/* Begin a new plot. */
			lasty=1;
			lastx=0;
			start= -1;
			oldclock = -1;
		}
		else if(strncmp(linestr, "mpsim", strlen("mpsim")) == 0)
			ndflag=1;
		else if(strncmp(linestr, "cnt", strlen("cnt")) == 0) {
			if(sscanf(linestr, "cnt	%d	clock	%d", &count, &clock) != 2) {
				fprintf(stderr, "Failed to read count and clock from linestr:%s\n", linestr);
				exit(1);
			}
			if (clock == -1){
			  fprintf(stderr,"?ccplot: trace clock not running build in aspp mode\n");
			  exit(1);
			}
			if (count>maxtasks) maxtasks =count;
			if (clock > maxclock) maxclock = clock;
			if (start == -1) start=clock;
			if (clock<oldclock)
			  fprintf(stderr,"?ccplot: trace not chronological %d\n"
			   ,clock);
			oldclock = clock;
			if (lastx) avetasks += lasty * (clock -lastx);
			printf("%d %d\n%d %d\n",clock,lasty,clock,count);
			lasty=count;
			lastx=clock;
		}
		else if(strncmp(linestr, "etrace", strlen("etrace")) == 0) {
			if(sscanf(linestr, "etrace tid %d clock %d", &tid, &clock) != 2) {
				fprintf(stderr, "failed to read tid and clock from linestr:%s\n", linestr);
				exit(1);
			}
			if (ndflag) tid = (tid== MPHOST || tid == VHOST)
			  ? 0 : tid + 1 ;
			break;
		}
		else if ((p=strfnd(linestr,"tid "))!=NULL){
			tid=atoi(p+4);
			if (ndflag) tid = (tid== MPHOST || tid == VHOST)
			  ? 0 : tid + 1 ;
			if (tid>maxtid)maxtid=tid;
		}
		else if ((p=strfnd(linestr,"node "))!=NULL){
			tid=atoi(p+5);
			if (ndflag) tid = (tid== MPHOST || tid == VHOST)
			  ? 0 : tid + 1 ;
			if (tid>maxtid)maxtid=tid;
		}
	}   /* while more data to read */
	count=0;
	printf("%d %d\n%d %d \" \"\n",clock,lasty,clock,count);
	maxn = maxtid > maxtasks ? maxtid : maxtasks;
	df = maxn/10.;  /* locate labels */
	f = maxn+ 2*df;
	delta=maxclock-start;
	printf("%d %f \"Concurrency plot  x-time  y-concurrent processes\"\n",
	 start + delta/4,f);
	f -= df;
	util = 100. * ((float)avetasks) /((maxtid+1) * delta);
	printf("%d %f \"Nodes %d Max concurrency %d  Gross Utilization %3.0f%% \"\n",
	  start + delta/4,f,maxtid,maxtasks,util);
}

char *strfnd(t,s)
	char *t, *s;
{
	/* see if string s is in t */
	while ((strlen(t) >= strlen(s)) && *t) 
	  if (strncmp(t++,s,strlen(s))==0)return(--t);
	return(NULL);
}

SHAR_EOF
if test -f 'trace1.c'
then
	echo shar: over-writing existing file "'trace1.c'"
fi
cat << \SHAR_EOF > 'trace1.c'
/* trace1.c			thd    3/22/88
 * generate plot file that shows which nodes are running over time
 * really should only be used with ppsim
 */

#include <stdio.h>
#include <ctype.h>

#define MPHOST -32768
#define VHOST 32768
long ndflag;

#define MAXTASKS 1000
int begin[MAXTASKS];  /* begin time */

#define MAXLINE 1000
char linestr[MAXLINE];	/* hold a line read from the trace file. */

struct Tdata {
	int type;
	int Qd;
	int tid;
	int clock;
} t;

char *key_words[] = {
	"strace","tfork","texit","send","recv","recvw",
	"tid","waking","cnt","clock","blocking","running",
	"mpsim","node",
	0};

#define STRACE 0
#define TFORK 1
#define TEXIT 2
#define SEND 3
#define RECV 4
#define RECVW 5
#define TID 6
#define WAKING 7
#define CNT 8
#define CLOCK 9
#define QD 10
#define RUNNING 11
#define MPSIM 12
#define NODE 13

char *getword(), *nxtword();
int maxtasks;
int minclock = -1;

main(argc, argv)
int argc;
char *argv[];
{
	FILE *tfptr;	/* Stream pointer for the trace file. */
	int i;

	if(argc != 2) tfptr = stdin;
	  else if((tfptr = fopen(argv[1], "r")) == NULL) {
		fprintf(stderr, "Can't open %s for reading\n", argv[1]);
		exit(1);
	  }
	while(fgets(linestr, MAXLINE, tfptr) != NULL) {
		parse(linestr);
		switch(t.type){
		  case STRACE:
			if (minclock == -1) minclock = t.clock;
			break;
		  case TEXIT:
			plot();
			break;
		}
		if (t.Qd) plot();
	}
	printf("%d %d \" \"\n %d 0\n",minclock,maxtasks+1,minclock);
}

parse(l)
char *l;
{
	/* parse a line l */
	char *p, *w;
	int i;
	long nd;

	t.type = -1;
	t.Qd = 0;
	p = getword(l);  /* get first word */
	while(*p){
		for(i=0;(w = key_words[i]) != NULL;i++) if (!strcmp(w,p))
		 switch(i){
		   case STRACE:
		   case TEXIT:
		   case TFORK:
		   case RECV:
		   case RECVW:
		   case SEND:
			t.type = i;
			break;
		   case QD:
			t.Qd++;
			break;
		   case TID:
		   case NODE:
			t.tid = atoi(nxtword());
			if (ndflag) t.tid = (t.tid== MPHOST || t.tid == VHOST)
			  ? 0 : t.tid + 1 ; /*adjust to ppsim style */
			break;
		   case CLOCK:
			t.clock = atoi(nxtword());
			break;
		   case MPSIM:
			ndflag=1;
			return;
		   case WAKING:
		   case RUNNING:
			while(*(p=nxtword())){
				nd = atoi(p);
				if (ndflag) nd = (nd== MPHOST || nd == VHOST)
				  ? 0 : nd + 1 ;
				begin[nd] = t.clock;
			}
			break;
		}
		p = nxtword();
	}
}
plot()
{
	float task;
	/* plot task duration */

	task = t.tid ;
	if (!t.tid) task += 0.25;
	if (t.tid > maxtasks) maxtasks = t.tid;
	printf("%d %f\n%d %f\n%d %f \" \"\n",begin[t.tid],task,t.clock,task,
	 t.clock,task + 0.25);
}


static char *nxtwdp, word_str[100];  /*locals for getword and nxtword*/

char *getword(s)
char *s;
{
	char *p;

	while( *s && isspace(*s))s++;
	p=word_str;
	while(*s && !isspace(*s)) *p++ = *s++;
	*p=0;
	nxtwdp=s;
	return(word_str);
}

char *nxtword()
{
	char *p;

	while(*nxtwdp && isspace(*nxtwdp)) nxtwdp++;
	p=word_str;
	while(*nxtwdp && !isspace(*nxtwdp)) *p++ = *nxtwdp++;
	*p=0;
	return(word_str);
}
SHAR_EOF
cd ..
#	End of shell archive
exit 0