#!/bin/sh # This is a shell archive, meaning: # 1. Remove everything above the #!/bin/sh line. # 2. Save the resulting text in a file. # 3. Execute the file with /bin/sh (not csh) to create the files: # mpsim # This archive created: Thu Feb 9 09:07:05 1989 # By: Tom Dunigan 576-2522 () # From: dunigan@msr.EPM.ORNL.GOV (Tom Dunigan 576-2522) export PATH; PATH=/bin:$PATH if test ! -d 'mpsim' then mkdir 'mpsim' fi cd 'mpsim' if test ! -d 'src' then mkdir 'src' fi cd 'src' if test -f 'CHANGES' then echo shar: over-writing existing file "'CHANGES'" fi cat << \SHAR_EOF > 'CHANGES' 8/22/86 make pause sigpause for 4.2ish machines so will work with SUN 8/1/87 fix DATLTH waste; add System V conditionals makefile.sysv userin.c simlib.c mpsim.c 10/7/87 v1.2, add mypid() and global send probemsg userin.c seqftn.f ftnsubs.c simlib.c drive.c mpsim.man 3/22/88 v 1.3 fix global send problem in drive.c 8/2/88 v2.0 iPSC/2 stuff, from new version of smpsim logfile no longer append SHAR_EOF if test -f 'README' then echo shar: over-writing existing file "'README'" fi cat << \SHAR_EOF > 'README' mpsim portable hypercube simulator Tom Dunigan, ORNL mpsim consists of a a simulator driver (mpsim) and a library (simlib.a) for building host and node programs to simulate Intel hypercube on Intel 310 xenix, SUN, pc AT xenix, Unix 4.2, and Sequents. Uses forks and pipes, creating subprocesses under unix for each node and the host (much like the Intel xsim simulator). Limited to d3 cube on xenix (or d4 if xenix is configured properly). 4.2 supports d4's. nstats and ccplot (from other ORNL simulation package) can be used to analyze trace file. You should be able to build your cube/host programs with no changes. Use x8000 as the "node id" for the host process. Sample script sbld is included to show how build simulator programs. Then just say mpsim and type ? to its prompt and it will show you how to "load" the host and node programs and start the simulation. To create mpsim and simlib.a just do a make with makefile for the appropriate machine. There is a version for Sequent that uses shared memory rather than pipes. Present documentation is only a manual entry -- mpsim.man and technical report "A Portable Hypercube Simulator", Dunigan, ORNL/TM-10410 SHAR_EOF if test -f 'makefile' then echo shar: over-writing existing file "'makefile'" fi cat << \SHAR_EOF > 'makefile' SIMSRC = mpsim.c drive.c userin.c SIMOBJ = mpsim.o drive.o userin.o LIBSRC = simlib.c ftnsubs.c LIBOBJ = simlib.o ftnsubs.o CFLAGS = -O -DBCOPY all: mpsim simlib mpsim: $(SIMOBJ) cc -o mpsim $(SIMOBJ) $(SIMOBJ): sim.h simlib.h simlib: $(LIBOBJ) ar cr simlib.a $(LIBOBJ) ranlib simlib.a $(LIBOBJ): simlib.h shar: shar CHANGES README makefile* sbld* sfbld* *.man *.h *.c *.f > tmp.shar SHAR_EOF if test -f 'makefile.seq' then echo shar: over-writing existing file "'makefile.seq'" fi cat << \SHAR_EOF > 'makefile.seq' # seq make -P8 SIMSRC = mpsim.c drive.c userin.c SIMOBJ = mpsim.o drive.o userin.o LIBSRC = simlib.c ftnsubs.c seqftn.f LIBOBJ = simlib.o ftnsubs.o seqftn.o CFLAGS = -O -DBCOPY FFLAGS = -e all: & mpsim simlib mpsim: & $(SIMOBJ) cc -o mpsim $(SIMOBJ) $(SIMOBJ): sim.h simlib.h simlib: & $(LIBOBJ) ar cr simlib.a $(LIBOBJ) ranlib simlib.a $(LIBOBJ): simlib.h SHAR_EOF if test -f 'makefile.sysv' then echo shar: over-writing existing file "'makefile.sysv'" fi cat << \SHAR_EOF > 'makefile.sysv' # SYS V, Release 2.0 for PC/AT. SIMSRC = mpsim.c drive.c userin.c SIMOBJ = mpsim.o drive.o userin.o LIBSRC = simlib.c LIBOBJ = simlib.o CFLAGS = -Ml -DSYSV all: mpsim simlib mpsim: $(SIMOBJ) cc -Ml -o mpsim $(SIMOBJ) $(SIMOBJ): sim.h simlib.h simlib: $(LIBOBJ) ar cr simlib.a $(LIBOBJ) $(LIBOBJ): simlib.h SHAR_EOF if test -f 'makefile.sysv3' then echo shar: over-writing existing file "'makefile.sysv3'" fi cat << \SHAR_EOF > 'makefile.sysv3' # SYS V, Release 2.0 for PC/AT. SIMSRC = mpsim.c drive.c userin.c SIMOBJ = mpsim.o drive.o userin.o LIBSRC = simlib.c ftnsubs.c LIBOBJ = simlib.o ftnsubs.o CFLAGS = -DSYSV all: mpsim simlib mpsim: $(SIMOBJ) cc -o mpsim $(SIMOBJ) $(SIMOBJ): sim.h simlib.h simlib: $(LIBOBJ) ar cr simlib.a $(LIBOBJ) $(LIBOBJ): simlib.h SHAR_EOF if test -f 'makefile.vax' then echo shar: over-writing existing file "'makefile.vax'" fi cat << \SHAR_EOF > 'makefile.vax' SIMSRC = mpsim.c drive.c userin.c SIMOBJ = mpsim.o drive.o userin.o LIBSRC = simlib.c ftnsubs.c LIBOBJ = simlib.o ftnsubs.o CFLAGS = -O -DBCOPY all: mpsim simlib mpsim: $(SIMOBJ) cc -o mpsim $(SIMOBJ) $(SIMOBJ): sim.h simlib.h simlib: $(LIBOBJ) ar cr simlib.a $(LIBOBJ) ranlib simlib.a $(LIBOBJ): simlib.h shar: shar README makefile* sbld* sfbld* *.man *.h *.c *.f > tmp.shar SHAR_EOF if test -f 'makefile.xenix' then echo shar: over-writing existing file "'makefile.xenix'" fi cat << \SHAR_EOF > 'makefile.xenix' SIMSRC = mpsim.c drive.c userin.c SIMOBJ = mpsim.o drive.o userin.o LIBSRC = simlib.c LIBOBJ = simlib.o CFLAGS = -Ml -DXENIX all: mpsim simlib mpsim: $(SIMOBJ) cc -Ml -o mpsim $(SIMOBJ) -lx $(SIMOBJ): sim.h simlib.h simlib: $(LIBOBJ) ar cr simlib.a $(LIBOBJ) ranlib simlib.a $(LIBOBJ): simlib.h SHAR_EOF if test -f 'sbld' then echo shar: over-writing existing file "'sbld'" fi cat << \SHAR_EOF > 'sbld' cc -o $1 $1.c src/simlib.a -lm SHAR_EOF chmod +x 'sbld' if test -f 'sbld.xenix' then echo shar: over-writing existing file "'sbld.xenix'" fi cat << \SHAR_EOF > 'sbld.xenix' cc -o $1 -Ml $1.c simlib.o -lm -lx SHAR_EOF chmod +x 'sbld.xenix' if test -f 'sfbld' then echo shar: over-writing existing file "'sfbld'" fi cat << \SHAR_EOF > 'sfbld' fortran -e -o $1 $1.f src/simlib.a SHAR_EOF chmod +x 'sfbld' if test -f 'sfbld.seq' then echo shar: over-writing existing file "'sfbld.seq'" fi cat << \SHAR_EOF > 'sfbld.seq' fortran -e -o $1 $1.f src/simlib.a SHAR_EOF chmod +x 'sfbld.seq' if test -f 'sfbld.vax' then echo shar: over-writing existing file "'sfbld.vax'" fi cat << \SHAR_EOF > 'sfbld.vax' f77 -o $1 $1.f src/simlib.a SHAR_EOF chmod +x 'sfbld.vax' if test -f 'sfbld.xenix' then echo shar: over-writing existing file "'sfbld.xenix'" fi cat << \SHAR_EOF > 'sfbld.xenix' echo Fortran compile and link with ornl ipsc simulator. ftn286 $1.f bnd286 -xc -ss '"stack(+1000H)"' -ep -oj $1 -- \ $1.obj \ /usr/local/simlib.lib \ /usr/intel/lib/osisl.obj \ /usr/intel/lib/cel287.lib \ /usr/intel/lib/f286r?.lib \ /usr/intel/lib/80287.lib \ /usr/intel/lib/xudil.lib \ /usr/intel/lib/xnxudi.lib \ /usr/ipsc/lib/sim/xsimlib.lib \ /usr/intel/lib/Llibc.lib \ /usr/intel/lib/Llibx.lib SHAR_EOF chmod +x 'sfbld.xenix' if test -f 'mpsim.man' then echo shar: over-writing existing file "'mpsim.man'" fi cat << \SHAR_EOF > 'mpsim.man' .TH mpsim L "2 August 1988 v2.0" .UC 4 .SH NAME mpsim - portable message-passing multiprocessor simulator .SH SYNOPSIS .I mpsim commands: .nf .sp 1 \f3c [filename]\f1 disable logging or log to \f2filename\f3 .br h host\f1 load host with file \f2host\f3 .br \f3l [-c dim] [-n nodeno] [-p pid] node\f1 .br load \f2nodeno\f1 node or all nodes of a cube of dimension \f2dim\f1 with file \f2node .br \f3q\f1 quit .br \f3s\f1 start simulation .br \f3t [on]\f1 enable or disable trace \f3u file\f1 use file for command input .fi .sp 2 The host and node programs should be linked with the library .I simlib.a which contains the following subroutines: .ft 3 .nf int copen(int pid); void cclose(); void send(int d, int type, char *msg, int msglth, int dstnode, int dstpid); void sendw(int d, int type, char *msg, int msglth, int dstnode, int dstpid); void sendmsg(int d, int type, char *msg, int msglth, int dstnode, int dstpid); void recv(int d, int type, char *msg, int maxlth, int *msglth, int *srcnode, int *srcpid); void recvw(int d, int type, char *msg, int maxlth, int *msglth, int *srcnode, int *srcpid); void recvmsg(int d, int *type, char *msg, int maxlth, int *msglth, int *srcnode, int *srcpid); int probe(int d, type); int probemsg(int pid); int status(int d); int mynode(); int mypid(); int cubedim(); int clock(); void syslog(int pid, char *msg); void flick(); void setpid(int pid); void crecv(int type, int *msg, int maxlth); int irecv(int type, int *msg, int maxlth); void csend(int type, int *msg, int msglth, int dstnode, int dstpid); int isend(int type, int *msg, int msglth, int dstnode, int dstpid); int myhost(); int mclock(); int infocount(); int infonode(); int infopid(); int infotype(); int msgdone(int id); int msgwait(int id); int iprobe(int type); .fi .ft 1 .SH DESCRIPTION A simulator driver task, .I mpsim, provides a set of commands for loading and controlling a set of hypercube application programs built with the simulator library .I simlib.a. The simulator can be used to execute programs developed for the Intel hypercubes (iPSC/1 or iPSC/2), and a trace file is provided to assist in debugging or performance analysis. The simulator uses the UNIX fork and pipe facilities and has been tested on a Intel 310 with XENIX, an IBM PC/AT with XENIX, an Intel 301 with System V 3.0, a DEC VAX with UNIX 4.2, an Encore, and a Sequent Balance 8000 with DYNIX. Limited testing has been done on System V. Since the simulation will spawn multiple processes, you may wish to use .I nice(1) to reduce system load. Note that simulation on the Sequent or Encore will utilize multiple processors. The simulation does not provide the detailed timings, large number of processors, or message-passing model of the simulator described in .I intel(l), but does permit porting of programs between the Intel cube and the simulator with no source code changes. In addition, there are no restrictions on the use of COMMON or global variables in C. The programmer compiles and links his host and node programs with .I simlib.a and then runs .I mpsim to load the application programs into the simulator cube and start the simulation. .PP The .I c commmand (cubelog) turns off logging with no arguments, or if a file name is provided, enables logging to the given file. The .I h command (host) specifies an executable file that will be the application host program. The .I l command (load -- you may substitute .I m for .I l) loads the simulator nodes with the program .I node. A specific node may be loaded with the .I -n option, and a specifici process id (\f2pid\f1) may be assigned with .I -p (The default pid is 0. The host program must use \f2setpid()\f1 for version 2 subroutines.) .I Dim specifies the dimension of the cube and must be in the range 0-4. (On some XENIX systems the maximum dimension may be 3.) The .I t command (trace) enables or disables tracing of simulator events \f2send\f1's and \f2recv\f1's. Once all application programs are "loaded" the .I s command (start) will start the simulation, you can exit with the .I q command (quit). You may place commands for \f2mpsim\f1 in a file and use the .I u command to instruct \f2mpsim\f1 to read the file for command input. If all processes exit then the simulator will exit, otherwise it will be necessary to use CTRL-C to terminate the simulation. A sample session might be .ft 2 .sp 1 cc -o host host.c simlib.a -lm cc -o node node.c simlib.a -lm mpsim t on l -c 3 node h host s .ft 1 .PP The functions provided in .I simlib.a mirror those described in the Intel .I iPSC User's Guide. .ft 1 The programmer must first establish one or more cube communication channel data structures with calls to .I copen. .I copen takes a value to be used as process identifier and returns a descriptor, of type int, that is used in subsequent message-passing functions. A process is addressed by its node number and process identifier. .I cclose frees the channel data structure. .PP .I send and .I sendw send the message pointed to by msg to the process at node dstnode with process id dstpid. The type and size of the message (msglth) in bytes are also provided. .I send returns immediately, but one cannot use the message area until .I status returns FREE (0), indicating that the kernel has sent the message. .I sendw does not return until the message has been sent. (This does not imply that the message has been received.) .I sendmsg behaves exactly like .I sendw but is intended as the host version for compatibilty with INTEL cube. If \f2dstnode\f1 is -1, then the message is sent to all other nodes in the cube. One may send to the other nodes of a dimension-d subcube containing the sending node by setting \f2dstnode\f1 to \f2d - cubedim() - 1\f1. .PP .I recv and .I recvw await the arrival of a message of the given type for the node and process id associated with int d. The functions provide addresses to store the message, the actual length of the message, and the node and process id of the sender. For .I recvw the process blocks until a message of the given type arrives. For .I recv the process may continue processing after issuing the .I recv, and when .I status returns a value of FREE (0), then a message of the given type has arrived. Upon receipt of the message, the simulator sets the srcnode and srcpid to those of the sender, sets the msglth to the length of the received message, and copies the message into msg. No more than maxlth bytes are copied. Messages are handled in a FIFO fashion. .I recvmsg behaves like .I recvw except it does not discriminate on message type, rather the type of the message is returned along with the message in accordance with the INTEL cube. .I sendmsg and .I recvmsg are intended (by INTEL) to be used only by the host processes, but the simulator permits node usage as well. .PP .I probe determines if a message of the given type is available for the node and process id associated with the given int. If a message is available, .I probe returns the length of the message; otherwise, a value of -1 is returned. One must issue a .I recv actually to fetch the message. Note, the channel data structure should not be in use by other message-passing functions. On the host, .I probemsg determines if a message for the given process id is available. If a message is available, a value of 1 is returned; otherwise, a value of 0 returned. If the process id is -1, then 1 is returned if a message is waiting on the host for any process id. .I status returns a value of BUSY (1) or FREE (0) indicating whether the given int data structure is in use or not. For .I send, BUSY indicates that the kernel has not yet sent the message. For .I recv, BUSY indicates that the desired message has not arrived. .PP .I mynode returns the node number of the process. The host has a node number of x8000 (32768). .I mypid presently returns 0. The dynamic loader functions are not supported by the simulator. .I cubedim returns the dimension of the cube. .I clock returns the present value of the time-of-day clock in milliseconds. .I syslog places the given message and pid in the trace file. .I flick relinquishes control from the given process to other runnable processes. .I flick is usually used in busy-wait conditions with .I status following a .I recv or with .I probe. .PP Note that .I mpsim uses four file descriptors (logical unit numbers) in managing the simulation through UNIX pipes. The file descriptors used (and not available to your application) are 10, 11, 12, and 13. .SH VERSION 2 SUBROUTINES The second generation of Intel hypercubes (iPSC/2) uses a slightly different set of message passing routines. The new routines do not require \f2copen\f1. Information returned by \f2recvw\f1 are now returned by the \f2info\f1 functions. .SH POST PROCESSORS If tracing has been enabled then a trace file is produced with simulator data that can be summarized by .I nstats or .I ccplot. .I nstats tracefile will produce a per-node summary of compute time and sends and receives. .I ccplot tracefile > plotfile will produce a plotfile that can be plotted with various plotting programs such as .I graph(1). .SH FORTRAN The message passing subroutines may also be called from .I f77 programs. .SH FILES The following files are provided; the actual location is site dependent. .br simlib.a simulator subroutines .br mpsim simulator driver .br sbld sample C build script .br sfbld sample f77 build script .SH SEE ALSO ppsim(l), smpsim(l) shared-memory version for Sequent, hep(l), intel(l), Intel's .I iPSC User's Guide and "A Portable Hypercube Simulator", Dunigan, ORNL/TM-10410. A version of the simulator (dcube) is available that uses a networked set of UNIX engines. .SH BUGS The delay in message passing is due to delays in UNIX pipes and does not reflect a hypercube structure. Messages are passed through the simulator driving task .I mpsim. The clock used is just the computer's time of day clock and has a resolution of only 20 milliseconds. Intel's notion of running multiple processes on a single hypercube node is not supported. The .I handler function and the Intel dynamic loader functions are not presently implemented. For the version 2 subroutines, the maximum message size is 256,000 bytes. The typesel field in crecv() supports only wildcard (-1) and specific type; the type mask is not currently supported. .SH AUTHOR T. H. Dunigan ORNL SHAR_EOF if test -f 'sim.h' then echo shar: over-writing existing file "'sim.h'" fi cat << \SHAR_EOF > 'sim.h' /* sim.h thd 8/2/88 * simulator definitions */ #define MAXNODES 16 #define MAXDIM 4 #define FLTH 40 #define LOGNM "SIMLOG" #define WAIT 0 struct NodeTable { struct MsgHdr *nt_recvq; /* pointer to messages waiting to be recvd */ /* must be first in list */ int nt_upid; /* unix pid */ int nt_mynode; /* FUTURE dynamic load node number */ int nt_mypid; /* pid from load or setpid */ char nt_filnam[FLTH]; /* load module file name */ int nt_state; /* waiting */ int nt_type, nt_pid; /* type and pid waiting for */ char *nt_bldptr; /* msg assembly/dis ptr */ /* in senders struct pointing to recvers q*/ } node[MAXNODES+1]; int running; int CubDim; int trace; int debug; char logfile[FLTH]; int logging; FILE *lf; long clock(), sttime; SHAR_EOF if test -f 'simlib.h' then echo shar: over-writing existing file "'simlib.h'" fi cat << \SHAR_EOF > 'simlib.h' /* simlib.h thd 8/2/88 */ /* we use 3 pipes * MSG r/w by both child and sim controlled by pause/signal * REQ written by all children, read by sim -- ASSUMES the * two byte message is written contiguous in pipe * ACK written by child read by sim in puase/signal sequence */ /* pipe fds */ #define MSGIN 10 #define MSGOUT 11 #define ACK 12 #define REQ 13 #define REQOUT 14 #define ACKOUT 15 #define HOST 0x8000 #define FREE 0 #define ANY -1 #define BUSY 1 #define UNUSED -1 #define TRUE 1 #define FALSE 0 #define MAXLTH 256000 #define MAXPID 32767 /* request types */ #define DIM 1 #define SEND 2 #define RECV 3 #define RECVW 4 #define PROBE 5 #define STAT 6 #define STATW 7 #define SYSLOG 8 #define MSGWAIT 9 #define CPROBE 10 #define LWAIT 11 #define LWAITA 12 #define PROBEMSG 13 #define GETCUBE 14 #define IPROBE 15 #define IRECV 16 #define ISEND 17 #define RELCUBE 18 #define CRECV 19 #define LKILL 20 #define LOAD 21 #define DATLTH 1000 #define MAGIC -3571 /* message header and recvq entry */ struct MsgHdr { struct MsgHdr *mh_next; /* recvq chain (must be first in struct) */ int mh_magic; /* qa */ int mh_req; /* request type */ int mh_upid; /* unix pid */ int mh_me; /* my node id */ int mh_mepid; /* my pid when i send */ int mh_node; /* node to or from */ int mh_pid; /* from pid */ int mh_type; /* msg type */ int mh_rcvlth; /* max lth on recv */ int mh_lth; /* data field lth */ char mh_data[DATLTH]; /* data*/ }; #define HDRLTH sizeof(struct MsgHdr)-DATLTH SHAR_EOF if test -f 'drive.c' then echo shar: over-writing existing file "'drive.c'" fi cat << \SHAR_EOF > 'drive.c' /* drive.c thd 8/2/88 * main control loop * read pipe looking for requests and perform */ #include #include #include "sim.h" #include "simlib.h" struct MsgHdr *prev; /* retainer for srchq */ struct MsgHdr *malloc(); char *reqnm[]; drive(){ struct MsgHdr *m, *mp, *srchq(); int n, req, lth, i, j, tfnd, nd, *ip, rcving; int d, waking, gsend; long ticks; m = malloc(HDRLTH+MAXLTH); /* sim msg recv area */ if(m==NULL){ fprintf(stderr,"mpsim init malloc failed\n"); finish(); } ip = (int *)m->mh_data; if (trace && logging){ fprintf(lf,"mpsim trace start clock 0\n"); fprintf(lf,"cnt\t%d\tclock\t%ld\n",running,0); } sttime = clock() - 20; for(;;){ /* loop forever */ ticks = clock() - sttime; for(i=0;i <= MAXNODES; i++) /* see who has died */ if (node[i].nt_state != UNUSED && kill(node[i].nt_upid,0) == -1){ running--; /*no such pid anymore */ if (trace && logging){ fprintf(lf,"texit node %d clock %d\n", i == MAXNODES ? HOST : i,ticks); fprintf(lf,"cnt\t%d\tclock\t%ld\n",running,ticks); } node[i].nt_state = UNUSED; } if (!running)finish(); n=mget(m); /* get a message request */ if (n==0)return; /* no processes */ req = m->mh_req; if (debug>99) fprintf(stderr,"mpsim read req %d\n",req); if ( m->mh_magic != MAGIC){ printf("mpsim read failed bad MAGIC\n"); finish(); } nd=m->mh_me; /* node, index into nt */ if (nd==HOST) nd = MAXNODES; tfnd=m->mh_node; /* to/from node */ if (tfnd==HOST) tfnd = MAXNODES; rcving = 0; switch(req){ /* handle request */ case DIM: /* return dim and mynode */ for(i=0;i<=MAXNODES;i++) if(node[i].nt_upid==m->mh_upid)n=i; if (n==MAXNODES) n= HOST; *ip = CubDim; *(ip+1) = node[nd].nt_mypid; m->mh_me = n; m->mh_lth = 8; if (trace && logging) fprintf(lf,"start clock %ld upid %d running %d\n", ticks,m->mh_upid,m->mh_me); msend(m->mh_upid,m); break; case RECV: case RECVW: case MSGWAIT: if (trace && logging){ rcving = 1; fprintf(lf,"%s clock %ld node %d pid %d type %d lth %d", reqnm[req],ticks,m->mh_me,m->mh_pid,m->mh_type,m->mh_rcvlth); } case STAT: case STATW: if ((mp=srchq(m,nd))==NULL) { /*nothing in my q matching*/ if (req == RECV || req == STAT) { /* no wait send back his request */ if(rcving)fprintf(lf,"\n"); msend(node[nd].nt_upid,m); break; } /* else recvw and statw must wait . . . */ node[nd].nt_state = WAIT; running--; node[nd].nt_type = m->mh_type; node[nd].nt_pid = m->mh_pid; if (rcving){ fprintf(lf," blocking %d\n",m->mh_me); fprintf(lf,"cnt\t%d\tclock\t%ld\n",running,ticks); } break; } /* else have a message, send it all */ if (rcving) fprintf(lf," from %d\n",mp->mh_me); msend(node[nd].nt_upid,mp); prev->mh_next = mp->mh_next; /* de q */ free(mp); break; case PROBE: case PROBEMSG: /* see if message in q return lth or -1 in rcvlth */ if ((mp=srchq(m,nd))== NULL) m->mh_rcvlth = -1; /*nope */ else { /* got one here, send back synopsis */ m->mh_rcvlth = mp->mh_lth; m->mh_type = mp->mh_type; m->mh_me = mp->mh_me; m->mh_mepid = mp->mh_mepid; } msend(node[nd].nt_upid,m); break; case SEND: if (trace && logging) fprintf(lf,"send clock %ld node %d fpid %d to %d pid %d type %d lth %d", ticks,m->mh_me,m->mh_mepid,m->mh_node, m->mh_pid,m->mh_type,m->mh_lth); /* see if this is a global send */ waking = gsend =0; if (tfnd < 0){ /* global send */ gsend =1; d = tfnd + CubDim + 1; /* dim of send */ if (d <= 0) { /* no send at all !! */ if (trace && logging)fprintf(lf," gsend NIL\n"); break; } n = 1<mh_me == HOST) tfnd =0; else tfnd = (m->mh_me >> d) << d; if (trace && logging){ fprintf(lf," gsend "); for (i=0,j=tfnd; i< n;i++,j++) if (j!=m->mh_me) fprintf(lf,"%d,",j); } } else n = 1; /*regular single node send */ while(n--){ /* send to n nodes */ if (gsend && m->mh_me == tfnd) { tfnd++; continue; /*not to myself */ } /* see if node is waiting for this msg */ if (node[tfnd].nt_state==WAIT && node[tfnd].nt_pid == m->mh_pid && (node[tfnd].nt_type==m->mh_type || node[tfnd].nt_type==ANY)){ /* yes, so send it to him */ msend(node[tfnd].nt_upid,m); node[tfnd].nt_state=BUSY; running++; if (waking==0 && trace && logging) fprintf(lf," waking "); waking++; if (trace && logging) fprintf(lf,"%d ",m->mh_node < 0 ? tfnd : m->mh_node); } else qend(m,tfnd); /* else no one waiting q it */ tfnd++; } /* while n nodes */ if (waking && trace && logging) fprintf(lf,"\ncnt\t%d\tclock\t%ld",running,ticks); if (trace && logging) fprintf(lf,"\n"); break; case SYSLOG: if (logging) fprintf(lf,"syslog clock %ld node %d id %d msg %s\n", ticks,m->mh_me,m->mh_mepid,m->mh_data); break; default: printf("mpsim unknown req %d\n",m->mh_req); break; } } } mget(mp) struct MsgHdr *mp; { /* get a request from children */ int upid, n; n=rdpipe(REQ,&upid,sizeof(int)); /* fetch a req upid */ if(n==0) return(0); /* no pipe means no children */ n=kill(upid,SIGTERM); /* let child write on MSG */ if (n== -1){ fprintf(stderr," pipe scrambled %d \n",upid); exit(1); } rdpipe(MSGIN,mp,HDRLTH); if (mp->mh_magic != MAGIC){ fprintf(stderr,"mpsim bad MAGIC\n"); finish(); } rdpipe(MSGIN,mp->mh_data,mp->mh_lth); return(1); } msend(upid,mp) int upid; struct MsgHdr *mp; { /* send message down the pipe -- only one guy should be reading */ int lth; lth=HDRLTH + mp->mh_lth; if(debug>99) fprintf(stderr,"msend upid %d lth %d\n",upid,lth); kill(upid,SIGTERM); /* release his pause */ write(MSGOUT,mp,lth); /* down the pipe */ rdpipe(ACK,<h,sizeof(int)); /* wait til he gets it */ } qend(mp,nd) struct MsgHdr *mp; int nd; /* node index */ { /* allocate an entry and add to end of q */ struct MsgHdr *q, *p, *prev; int lth; if (debug>100){fprintf(stderr,"qend node %d\n",nd);fflush(stderr);} lth = HDRLTH + mp->mh_lth; p = malloc(lth); if(p==NULL){ fprintf(stderr,"mpsim malloc failed %d\n",lth); finish(); } bcopy(mp,p,lth); p->mh_next= NULL; prev = (struct MsgHdr *) &node[nd]; q = node[nd].nt_recvq; while(q!=NULL){ /* find end */ prev=q; q= q->mh_next; } prev->mh_next = p; } rdpipe(pipe,buf,lth) int pipe; char *buf; int lth; { /* read lth bytes into buf from the pipe pipe * since pipes may fill sporadically we persist */ int n; if(debug>99) fprintf(stderr,"mpsim rdpipe %d lth\n",pipe,lth); while(lth){ n=read(pipe,buf,lth); if(n==0) return(0); /* other end gone */ buf += n; /* advance -- use arg since pass by val */ lth -= n; } return(1); /* ok */ } #ifndef BCOPY bcopy(src,dst,n) char *dst, *src; int n; { while(n--) *dst++ = *src++; } #endif struct MsgHdr * srchq(mp,nd) struct MsgHdr *mp; int nd; /* node of dest */ { /* see if a message is on nd's q matching his recv request */ /* NOTE SIDE EFFECT prev (global) is set for later use */ struct MsgHdr *q; if (debug>100){fprintf(stderr,"srchq node %d\n",nd);fflush(stderr);} prev= (struct MsgHdr *) &node[nd]; q = node[nd].nt_recvq; while(q!=NULL){ if ((mp->mh_pid == ANY || mp->mh_pid == q->mh_pid) && (mp->mh_type == ANY || mp->mh_type == q->mh_type)) return(q); /* found it */ prev = q; q = q->mh_next; } return(NULL); /* not there */ } SHAR_EOF if test -f 'ftnsubs.c' then echo shar: over-writing existing file "'ftnsubs.c'" fi cat << \SHAR_EOF > 'ftnsubs.c' /* ftnsubs.c * f77 interfaces for oak ridge stuff -- snd rcv hep * make fortran names and change pass by reference to pass by value */ int copen_(pidptr) int *pidptr; { return(copen(*pidptr)); } void cclose_(dptr) int *dptr; { cclose(*dptr); } void sendw_(dptr,typptr,buff,lthptr,nodeptr,pidptr) int *dptr; char *buff; int *typptr, *lthptr, *nodeptr, *pidptr; { sendw(*dptr,*typptr,buff,*lthptr,*nodeptr,*pidptr); } void sendmsg_(dptr,typptr,buff,lthptr,nodeptr,pidptr) int *dptr; char *buff; int *typptr, *lthptr, *nodeptr, *pidptr; { sendmsg(*dptr,*typptr,buff,*lthptr,*nodeptr,*pidptr); } void recv_(dptr,typptr,buff,maxptr, lthptr,nodeptr,pidptr) int *dptr; char *buff; int *maxptr; int *typptr, *lthptr, *nodeptr, *pidptr; { recv(*dptr,*typptr,buff,*maxptr,lthptr,nodeptr,pidptr); } void send_(dptr,typptr,buff,lthptr,nodeptr,pidptr) int *dptr; char *buff; int *typptr, *lthptr, *nodeptr, *pidptr; { send(*dptr,*typptr,buff,*lthptr,*nodeptr,*pidptr); } void recvw_(dptr,typptr,buff,maxptr, lthptr,nodeptr,pidptr) int *dptr; char *buff; int *maxptr; int *typptr, *lthptr, *nodeptr, *pidptr; { recvw(*dptr,*typptr,buff,*maxptr,lthptr,nodeptr,pidptr); } void recvmsg_(dptr,typptr,buff,maxptr, lthptr,nodeptr,pidptr) int *dptr; char *buff; int *maxptr; int *typptr, *lthptr, *nodeptr, *pidptr; { recvmsg(*dptr,typptr,buff,*maxptr,lthptr,nodeptr,pidptr); } int probe_(dptr,typptr) int *dptr; int *typptr; { return(probe(*dptr,*typptr)); } int probemsg_(pidptr) int *pidptr; { return(probemsg(*pidptr)); } int status_(dptr) int *dptr; { return(status(*dptr)); } int mynode_() { return(mynode()); } int mypid_() { return(mypid()); } int cubedim_() { return(cubedim()); } int clock_() { return(clock()); } void flick_() { flick(); } void syslog_(pid,msg) int *pid; char *msg; { syslog(*pid,msg); } /* new routines for INTEL */ void csend_(typptr,buff,lthptr,nodeptr,pidptr) char *buff; int *typptr, *lthptr, *nodeptr, *pidptr; { csend(*typptr,buff,*lthptr,*nodeptr,*pidptr); } void crecv_(typptr,buff,lthptr) char *buff; int *typptr, *lthptr; { crecv(*typptr,buff,*lthptr); } int irecv_(typptr,buff,lthptr) char *buff; int *typptr, *lthptr; { return(irecv(*typptr,buff,*lthptr)); } int isend_(typptr,buff,lthptr,nodeptr,pidptr) char *buff; int *typptr, *lthptr, *nodeptr, *pidptr; { return(isend(*typptr,buff,*lthptr,*nodeptr,*pidptr)); } int myhost_() { return(myhost()); } int nodedim_() { return(nodedim()); } int numnodes_() { return(numnodes()); } int mclock_() { return(mclock()); } int infonode_() { return(infonode()); } int infopid_() { return(infopid()); } int infotype_() { return(infotype()); } int infocount_() { return(infocount()); } void setpid_(pidptr) int *pidptr; { setpid(*pidptr); } int iprobe_(typeptr) int *typeptr; { return(iprobe(*typeptr)); } int msgwait_(idptr) int *idptr; { return(msgwait(*idptr)); } int msgdone_(idptr) int *idptr; { return(msgdone(*idptr)); } SHAR_EOF if test -f 'mpsim.c' then echo shar: over-writing existing file "'mpsim.c'" fi cat << \SHAR_EOF > 'mpsim.c' /* mpsim.c * intel iPSC hypercube simulator using UNIX forks and pipes * XENIX, 4.2bsd, and sequent * Sys V, Release 2.0. * T. Dunigan ORNL 12/21/85 8/2/88 * uses 3 pipes (REQ, MSGIN/OUT, ACK) * synchronized with signal/pause * Assumes 2-byte write on a pipe (REQ) is atomic */ #include #ifdef SYSV #include #endif #include #include "sim.h" #include "simlib.h" char *reqnm[] ={ "","dim","send","recv","recvw","probe","stat", "statw","syslog","msgwait","cprobe" }; int finish(), child(); main(argc,argv) int argc; char *argv[]; { int n; signal(SIGINT,finish); debug=0; if (argc>1)debug= atoi(argv[1]); init(); userin(); start(); drive(); finish(); } init() { /* set up things */ int i; int fds[2]; /* set up pipes for us and children */ pipe(fds); #ifdef SYSV close(MSGIN); close(MSGOUT); fcntl(fds[0], F_DUPFD, MSGIN); fcntl(fds[1], F_DUPFD, MSGOUT); #else dup2(fds[0],MSGIN); /* rd wrt by both host and child */ dup2(fds[1],MSGOUT); #endif close(fds[0]); close(fds[1]); pipe(fds); #ifdef SYSV close(ACK); close(ACKOUT); fcntl(fds[0], F_DUPFD, ACK); fcntl(fds[1], F_DUPFD, ACKOUT); #else dup2(fds[0],ACK); /* rd by sim, write by child */ dup2(fds[1],ACKOUT); /* will be close later */ #endif close(fds[0]); close(fds[1]); pipe(fds); #ifdef SYSV close(REQ); close(REQOUT); fcntl(fds[0], F_DUPFD, REQ); fcntl(fds[1], F_DUPFD, REQOUT); #else dup2(fds[0],REQ); /* write by all children rd by host */ dup2(fds[1],REQOUT); /* will be closed later */ #endif close(fds[0]); close(fds[1]); strcpy(logfile,LOGNM); trace = 0; for(i=0;i <= MAXNODES;i++){ *node[i].nt_filnam = 0; node[i].nt_state = UNUSED; node[i].nt_recvq = NULL; } } start() { /* step thru fork tbl and start subprocesses */ int i; lf = NULL; running=0; for(i = MAXNODES;i >= 0;i--){ if (node[i].nt_filnam[0] != 0){ running++; node[i].nt_upid=spawn(node[i].nt_filnam); node[i].nt_state = BUSY; } } /* children running so . . . */ /* get rid of ends of pipes we don't use */ close(ACKOUT); close(REQOUT); /* open logfile */ if (logging){ if (strcmp(logfile,"stdout")==0) lf = stdout; else lf = fopen(logfile,"w"); } if (lf == NULL) logging=0; /* open failed? */ } spawn(filename) char *filename; { /* start a child process */ int pid; if ((pid=fork())==0){ /* child */ /* set up child pipes to use write ends of ACK and REQ */ #ifdef SYSV close(ACK); fcntl(ACKOUT, F_DUPFD, ACK); #else dup2(ACKOUT,ACK); #endif close(ACKOUT); #ifdef SYSV close(REQ); fcntl(REQOUT, F_DUPFD, REQ); #else dup2(REQOUT,REQ); #endif close(REQOUT); execl(filename,filename,0); printf(" mpsim unable to start %s\n",filename); exit(1); } if (debug>50) fprintf(stderr,"spawn %s upid %d\n",filename,pid); if (pid < 0){ fprintf(stderr,"mpsim too many forks\n"); finish(); } return(pid); } log() { } finish() { /* catch interrupts and clean up */ int i; printf(" mpsim exiting\n"); if (lf == NULL) logging=0; /*catch early exit */ if (trace & logging) fprintf(lf,"cnt\t%d\tclock\t%ld\n",0,clock()-sttime); if (logging)fclose(lf); for(i=0;i <= MAXNODES;i++) if (node[i].nt_state != UNUSED) kill(node[i].nt_upid,SIGKILL); if (debug==1)dump(); exit(0); } dump() { /* dump sim tables */ int i,j; struct MsgHdr *mp; printf(" %d running\n",running); for (i=0;i <= MAXNODES;i++){ if (node[i].nt_filnam[0]==0)continue; printf("\nnode %d upid %d state %d file %s",i, node[i].nt_upid,node[i].nt_state,node[i].nt_filnam); printf(" type %d pid %d\n",node[i].nt_type,node[i].nt_pid); printf(" recvq %lx bldptr %lx\n",node[i].nt_recvq, node[i].nt_bldptr); mp = node[i].nt_recvq; while(mp!=NULL){ printf("\tmagic %d req %d upid %d me %d mepid %d\n", mp->mh_magic,mp->mh_req,mp->mh_upid,mp->mh_me, mp->mh_mepid); printf("\t node %d pid %d type %d\n", mp->mh_node,mp->mh_pid,mp->mh_type); printf("\t nextq %lx lth %d ",mp->mh_next,mp->mh_lth); for(j=0;j<10;j++)printf(" %x",mp->mh_data[j]); printf("\n\n"); mp = mp->mh_next; } printf("\n"); } } #include #ifdef SYSV #include #include #else #include #endif long clock() { /* return milliseconds of wall clock time */ #ifdef SYSV struct tms t; #else struct timeb t; #endif long v; #ifdef SYSV long times(); v = 1000 * times(&t)/ HZ; #else ftime(&t); v = 1000*t.time + t.millitm; #endif return(v); } SHAR_EOF if test -f 'simlib.c' then echo shar: over-writing existing file "'simlib.c'" fi cat << \SHAR_EOF > 'simlib.c' /* simlib.c * simulator library routines * T. Dunigan ORNL 1/18/86 8/2/88 * keep names private (static) * uses 4 file descriptors and a signal */ #include #include #include "simlib.h" #ifdef SYSV #define PAUSE #endif #ifdef XENIX #define PAUSE #endif #define STARTED if (CubDim == -1) init(); #define CHKCI chkci(ci); #define CHKTYPE ; #define CHKPID ; #define CHKLTH chklth(lth); #define CHKNODE chknode(node); static struct MsgHdr *mp; struct MsgHdr *malloc(); #define MAXCI 8 static struct CiTable { int ct_pid; /* user pid */ int ct_stat; /* ci stat: busy free unused */ int ct_req; /* current request */ int ct_maxlth; int ct_type; int ct_stype, ct_node, ct_spid, ct_lth; int *ct_lthp; /* return value ponters */ char *ct_bufp; int *ct_nodep; int *ct_pidp; } ct[MAXCI]; static int *ip; static catch(),init(),getreq(),sndreq(),rdpipe(),busyci(); static chklth(),chknode(),chkci(),bcopy(); static int CubDim = -1; static int upid, maxnode, proceed, me, mepid; static int info_count, info_node, info_pid, info_type; /*last recv info */ copen(pid) int pid; { int i; STARTED CHKPID for(i=0;i= maxnode){ fprintf(stderr,"node %d bad dest node %d\n",me,node); exit(1); } mp->mh_mepid = ct[ci].ct_pid; mp->mh_node = node; /* dest node */ mp->mh_pid = pid; /* dest pid */ mp->mh_lth =lth; mp->mh_type = type; bcopy(buf,mp->mh_data,lth); sndreq(SEND); } send(ci,type,buf,lth,node,pid) int ci,type,lth,node,pid; char *buf; { sendw(ci,type,buf,lth,node,pid); } sendmsg(ci,type,buf,lth,node,pid) int ci,type,lth,node,pid; char *buf; { sendw(ci,type,buf,lth,node,pid); } recvw(ci,type,buf,maxlth,lth,node,pid) int ci,type,maxlth,*lth,*node,*pid; char *buf; { /* blocking recv */ int l; STARTED CHKCI busyci(ci); CHKTYPE mp->mh_pid = ct[ci].ct_pid; /* has to come to my pid */ mp->mh_type = type; mp->mh_lth = 0; mp->mh_rcvlth = maxlth; /*for trace */ getreq(RECVW); /* request message and wait */ l = mp->mh_lth; /* message length */ bcopy(mp->mh_data,buf, lmh_me; *pid = mp->mh_mepid; } recv(ci,type,buf,maxlth,lth,node,pid) int ci,type,maxlth,*lth,*node,*pid; char *buf; { /* non-blocking recv -- see if a mesg is there otherwise * save args so stat can ask again */ int l; STARTED CHKCI busyci(ci); CHKTYPE mp->mh_pid = ct[ci].ct_pid; /* has to come to my pid */ mp->mh_type = type; mp->mh_lth = 0; mp->mh_rcvlth = maxlth; /*for trace */ getreq(RECV); /* request message and wait */ /* either got msg req==SEND or our own msg back*/ if (mp->mh_req != SEND){ /* no msg yet -- save args for later use by stat */ ct[ci].ct_type = type; ct[ci].ct_maxlth = maxlth; ct[ci].ct_lthp = lth; /* save pointers */ ct[ci].ct_bufp = buf; ct[ci].ct_nodep = node; ct[ci].ct_pidp =pid; ct[ci].ct_stat = BUSY; /* show ci in use */ return; } /* else have a msg -- copy to caller */ l = mp->mh_lth; /* message length */ bcopy(mp->mh_data,buf, lmh_me; *pid = mp->mh_mepid; ct[ci].ct_stype = mp->mh_type; ct[ci].ct_stat = FREE; } recvmsg(ci,type,buf,maxlth,lth,node,pid) int ci,*type,maxlth,*lth,*node,*pid; char *buf; { /* blocking recv */ int l; STARTED CHKCI busyci(ci); mp->mh_pid = ct[ci].ct_pid; /* has to come to my pid */ mp->mh_type = ANY; mp->mh_rcvlth = maxlth; /* for trace info */ mp->mh_lth = 0; getreq(RECVW); /* request message and wait */ l = mp->mh_lth; /* message length */ bcopy(mp->mh_data,buf, lmh_me; *pid = mp->mh_mepid; *type = mp->mh_type; } probe(ci,type) int ci; int type; { /* see if a message is waiting, return lth if yes, else -1 */ STARTED CHKCI CHKTYPE mp->mh_pid = ct[ci].ct_pid; /* has to come to my pid */ mp->mh_type = type; mp->mh_lth = 0; mp->mh_rcvlth = 0; getreq(PROBE); /* request message and wait */ return(mp->mh_rcvlth); } probemsg(pid) int pid; { /* see if a message is waiting, return 1 if yes, else 0 */ STARTED mp->mh_pid = pid; mp->mh_type = ANY; mp->mh_lth = 0; mp->mh_rcvlth = 0; getreq(PROBEMSG); /* request message and wait */ return(mp->mh_rcvlth== -1 ? 0 : 1); } status(ci) int ci; { int l; STARTED CHKCI if (ct[ci].ct_stat == FREE) return(FREE); /* otherwise recv outstanding, ask sim about status*/ /*get poop from ci tbl */ mp->mh_pid = ct[ci].ct_pid; /* has to come to my pid */ mp->mh_type = ct[ci].ct_type; mp->mh_lth = 0; mp->mh_rcvlth = ct[ci].ct_maxlth; getreq(STAT); /*will get back no or the message */ if (mp->mh_req != SEND) return(BUSY); /* still nothing */ /* else have a msg -- copy to caller */ l = mp->mh_lth; /* message length */ bcopy(mp->mh_data,ct[ci].ct_bufp,lmh_me; *ct[ci].ct_pidp = mp->mh_mepid; ct[ci].ct_stat = FREE; return(FREE); } syslog(n,s) int n; char *s; { /* log string */ STARTED mp->mh_mepid = n; mp->mh_lth = strlen(s) + 1; strcpy(mp->mh_data,s); sndreq(SYSLOG); } #include #ifdef SYSV #include #include #else #include #endif long clock() { /* return milliseconds of wall clock time */ #ifdef SYSV struct tms t; #else struct timeb t; #endif long v; #ifdef SYSV long times(); v = 1000 * times(&t)/ HZ; #else ftime(&t); v = 1000*t.time + t.millitm; #endif return(v); } flick(){} cubedim() { STARTED return(CubDim); } mynode() { STARTED return(me); } mypid() { return(0); /* for now */ } static init() { /* set up first time */ int i; signal(SIGTERM,catch); upid=getpid(); proceed=0; mp = malloc(sizeof(struct MsgHdr)+MAXLTH); if (mp==0){ fprintf(stderr," malloc in sim upid %d\n",upid); exit(1); } ip = (int *)mp->mh_data; /* int overlay */ mp->mh_magic = MAGIC; mp->mh_upid=upid; mp->mh_lth = 0; getreq(DIM); /* fetch dim and mynode*/ CubDim = *ip++; mepid = *ip; me = mp->mh_me; maxnode = 1<mh_magic != MAGIC){ fprintf(stderr,"node %d upid %d bad MAGIC\n",me,upid); exit(1); } rdpipe(MSGIN,mp->mh_data,mp->mh_lth); write(ACK,&n,sizeof(int)); /* tell sim we got poop */ } static sndreq(req) int req; { /* send a request to sim */ proceed=0; write(REQ,&upid,sizeof(int)); /* make a request application */ #ifdef PAUSE if(!proceed) pause(); /* let sim tell us when to go */ #else if(!proceed) sigpause(0); /* let sim tell us when to go */ #endif proceed=0; mp->mh_req = req; mp->mh_me = me; write(MSGOUT,mp,HDRLTH + mp->mh_lth); /* up the pipe */ } static rdpipe(pipe,buf,lth) int pipe; char *buf; int lth; { /* read lth bytes into buf from the pipe pipe * since pipes may fill sporadically we persist */ int n; while(lth){ n=read(pipe,buf,lth); if(n==0) return(0); /* other end gone */ buf += n; /* advance -- use arg since pass by val */ lth -= n; } return(1); /* ok */ } static busyci(ci) int ci; { int l; /* if ci is busy with a recv then we must wait */ if (ct[ci].ct_stat == FREE) return; /* else make us a STATW request */ mp->mh_pid = ct[ci].ct_pid; /* has to come to my pid */ mp->mh_type = ct[ci].ct_type; mp->mh_lth = 0; mp->mh_rcvlth = ct[ci].ct_maxlth; getreq(STATW); /*will wait for msg now ! */ /* have a msg -- copy to caller */ l = mp->mh_lth; /* message length */ bcopy(mp->mh_data,ct[ci].ct_bufp,lmh_me; *ct[ci].ct_pidp = mp->mh_mepid; ct[ci].ct_stat = FREE; } static chklth(lth) int lth; { if (lth >= 0 && lth <= MAXLTH ) return; fprintf(stderr,"node %d bad length %d\n",me,lth); exit(1); } static chknode(node) int node; { if (node == HOST ||(node >= 0 && node < maxnode))return; fprintf(stderr,"node %d bad dest node %d\n",me,node); exit(1); } static chkci(ci) int ci; { if (ci >= 0 && ci < MAXCI && ct[ci].ct_stat != UNUSED)return; fprintf(stderr,"node %d bad channel %d\n",me,ci); exit(1); } static catch() { /* catch signal from sim */ proceed = 1; signal(SIGTERM,catch); } #ifndef BCOPY static bcopy(src,dst,n) char *dst, *src; int n; { while(n--) *dst++ = *src++; } #endif /* v2 stuff 8/2/88 */ infocount() { STARTED return(info_count); } infonode() { STARTED return(info_node); } infopid() { STARTED return(info_pid); } infotype() { STARTED return(info_type); } mclock() { return(clock()); } myhost() { STARTED return(HOST); } nodedim() { STARTED return(CubDim); } numnodes() { STARTED return(maxnode); } setpid(pid) int pid; { STARTED mepid = pid; } crecv(type,buf,maxlth) int type,maxlth; char *buf; { int l; STARTED mp->mh_pid = mepid; mp->mh_type = type; mp->mh_lth = 0; mp->mh_rcvlth = maxlth; /*for trace */ getreq(RECVW); /* request message and wait */ l = mp->mh_lth; /* message length */ bcopy(mp->mh_data,buf, lmh_me; info_pid = mp->mh_mepid; info_type = mp->mh_type; if (l > maxlth) fprintf(stderr, "Warning recvd lth %d exceeds buffer %d on node %d\n",l,maxlth,me); } csend(type,buf,lth,node,pid) int type,lth,node,pid; char *buf; { STARTED CHKTYPE CHKPID CHKLTH CHKNODE mp->mh_mepid = mepid; mp->mh_node = node; /* dest node */ mp->mh_pid = pid; /* dest pid */ mp->mh_lth =lth; mp->mh_type = type; bcopy(buf,mp->mh_data,lth); sndreq(SEND); } #define ISNDID -1234 int isend(type,buf,lth,node,pid) int type,lth,node,pid; char *buf; { csend(type,buf,lth,node,pid); return( ISNDID ); /*dummy id -- e.g. buffer ready for resuse */ } int irecv(type,buf,maxlth) int type,maxlth; char *buf; { int ci; ci = copen(mepid); /* id for msgwait msgdone */ recv(ci,type,buf,maxlth,&ct[ci].ct_lth,&ct[ci].ct_node,&ct[ci].ct_spid); return(ci); } cprobe(type) int type; { STARTED /* worry about msg sitting in ct table from a irecv ? */ /* race condition???? */ fprintf(stderr,"cprobe not implemented yet\n"); exit(1); mp->mh_pid = mepid; mp->mh_type = type; mp->mh_lth = 0; getreq(CPROBE); info_count = mp->mh_lth; /* message length */ info_node = mp->mh_me; info_pid = mp->mh_mepid; info_type = mp->mh_type; /* note, mesg is still in our q, recv must be issued to get it */ } int iprobe(type) int type; { STARTED /* worry about msg sitting in ct table from a irecv ? */ mp->mh_pid = mepid; mp->mh_type = type; mp->mh_lth = 0; getreq(PROBE); if (mp->mh_rcvlth == -1) return(FALSE); /* none there yet */ /* else a msg is there */ info_count = mp->mh_lth; /* message length */ info_node = mp->mh_me; info_pid = mp->mh_mepid; info_type = mp->mh_type; return(TRUE); } msgwait(ci) int ci; { int l,maxlth; if (ci == ISNDID) return(1); /*isend done */ STARTED CHKCI if (ct[ci].ct_stat == FREE){ /* recv previously completed */ info_count = ct[ci].ct_lth; info_type = ct[ci].ct_stype; info_pid = ct[ci].ct_spid; info_node = ct[ci].ct_node; } else { /* request and wait */ mp->mh_pid = mepid; mp->mh_type = ct[ci].ct_type; mp->mh_lth = 0; mp->mh_rcvlth = maxlth = ct[ci].ct_maxlth; /*for trace */ getreq(MSGWAIT); l = mp->mh_lth; /* message length */ bcopy(mp->mh_data,ct[ci].ct_bufp, lmh_me; info_pid = mp->mh_mepid; info_type = mp->mh_type; } if (info_count > ct[ci].ct_maxlth) fprintf(stderr, "Warning recvd lth %d exceeds buffer %d on node %d\n",info_count, ct[ci].ct_maxlth,me); cclose(ci); /* release id */ } int msgdone(ci) int ci; { if (ci == ISNDID) return(1); /*isend done */ STARTED CHKCI if (ct[ci].ct_stat == FREE){ /* recv previously completed */ info_count = ct[ci].ct_lth; info_type = ct[ci].ct_stype; info_pid = ct[ci].ct_spid; info_node = ct[ci].ct_node; } else { /* check with sim */ mp->mh_pid = mepid; mp->mh_type = ct[ci].ct_type; mp->mh_lth = 0; mp->mh_rcvlth = ct[ci].ct_maxlth; /*for trace */ getreq(STAT); if (mp->mh_req != SEND) return(FALSE); /* nothing yet --> */ /* else have the msg now */ info_count = mp->mh_lth; /* message length */ bcopy(mp->mh_data,ct[ci].ct_bufp,info_count < ct[ci].ct_maxlth ? info_count : ct[ci].ct_maxlth); info_node = mp->mh_me; info_pid = mp->mh_mepid; info_type = mp->mh_type; } if (info_count > ct[ci].ct_maxlth) fprintf(stderr, "Warning recvd lth %d exceeds buffer %d on node %d\n",info_count, ct[ci].ct_maxlth,me); cclose(ci); /* release id */ return(TRUE); } SHAR_EOF if test -f 'userin.c' then echo shar: over-writing existing file "'userin.c'" fi cat << \SHAR_EOF > 'userin.c' /* userin.c * get user input * T. Dunigan ORNL 1/22/86 8/2/88 */ #include #include #include "sim.h" userin() { int i,n,using,inpid; char line[200]; char *p, *nxtword(), *getword(); FILE *in; using=0; in = stdin; /*default command input */ trace=0; logging=0; printf(" mpsim v2.0 8/2/88\n"); printf("mpsim> "); while(1){ if (fgets(line,sizeof(line),in)==NULL){ /* end of file on input -- switch to stdin or quit */ if (using) { fclose(in); in = stdin; /* back to tty in */ using=0; printf("smpsim> "); continue; /*go read input again */ } else break; /*quit */ } p=getword(line); switch(*p){ case 'c': p=nxtword(); if (*p == 0) { logging=0; printf(" logging disabled\n"); break; } logging=1; strcpy(logfile,p); printf(" logging to %s\n",logfile); break; case 'h': case 'm': p=nxtword(); if (filchk(p)) break; strcpy(node[MAXNODES].nt_filnam,p); break; case 'l': n= -1; inpid = 0; p=nxtword(); while(*p){ if (*p == '-')switch (*++p){ case 'c': CubDim=atoi(nxtword()); if (CubDim<0 || CubDim>MAXDIM)CubDim=0; printf(" dimension %d cube\n",CubDim); break; case 'n': n=atoi(nxtword()); break; case 'p': inpid = atoi(nxtword()); break; default: printf("?mpsim unknown switch %s\n",*p); break; } /* switch */ else { /* filename */ if (filchk(p)) break; if(n<0 || n>=(1< "); } exit(0); } filchk(s) char *s; { if(access(s,5)==0)return(0); printf("Unable to access %s\n",s); return(1); } static char *nxtwdp, word_str[100]; /*locals for getword and nxtword*/ char *getword(s) char *s; { char *p; while( *s && isspace(*s))s++; p=word_str; while(*s && !isspace(*s)) *p++ = *s++; *p=0; nxtwdp=s; return(word_str); } char *nxtword() { char *p; while(*nxtwdp && isspace(*nxtwdp)) nxtwdp++; p=word_str; while(*nxtwdp && !isspace(*nxtwdp)) *p++ = *nxtwdp++; *p=0; return(word_str); } SHAR_EOF if test -f 'f286toc.f' then echo shar: over-writing existing file "'f286toc.f'" fi cat << \SHAR_EOF > 'f286toc.f' $interface(C=_cubedim,_syslog,_copen,_cclose,_probe,_status,_mynode) $interface(C=_clock,_flick,_send,_sendw,_sendmsg,_recv,_recvw,_recvmsg) C f286toc.f C fortran interface to ornl simulator -- Xenix on Intel integer function cubedim() integer _cubedim cubedim = _cubedim() end subroutine syslog(i,j) character*(*) j character*200 tmp equivalence (k,tmp) print *,len(j) tmp = j // char(0) call _syslog(%val(i),k) end integer function copen(pid) integer pid,_copen copen = _copen(%val(pid)) end subroutine cclose(p) integer p call _cclose(%val(p)) end subroutine sendw(a,b,c,d,e,f) integer a,b,c,d,e,f call _sendw(%val(a),%val(b),c,%val(d),%val(e),%val(f)) end subroutine sendmsg(a,b,c,d,e,f) integer a,b,c,d,e,f call _sendmsg(%val(a),%val(b),c,%val(d),%val(e),%val(f)) end subroutine send(a,b,c,d,e,f) integer a,b,c,d,e,f call _send(%val(a),%val(b),c,%val(d),%val(e),%val(f)) end subroutine recvw(a,b,c,d,e,f,g) integer a,b,c,d,e,f,g call _recvw(%val(a),%val(b),c,%val(d),e,f,g) end subroutine recv(a,b,c,d,e,f,g) integer a,b,c,d,e,f,g call _recv(%val(a),%val(b),c,%val(d),e,f,g) end subroutine recvmsg(a,b,c,d,e,f,g) integer a,b,c,d,e,f,g call _recvmsg(%val(a),b,c,%val(d),e,f,g) end integer function probe(a,b) integer a,b,_probe probe = _probe(%val(a),%val(b)) end integer function status(p) integer p,_status status = _status(%val(p)) end integer function mynode() integer _mynode mynode = _mynode() end integer*4 function clock() integer*4 _clock clock = _clock() end subroutine flick() call _flick() end SHAR_EOF if test -f 'seqftn.f' then echo shar: over-writing existing file "'seqftn.f'" fi cat << \SHAR_EOF > 'seqftn.f' $SYSTEM c c fortran interface for sequent fortran for mpsim c compile with -e c this interface rearranges arguments and calls c interface c routines which will fix call by value etc. c integer function copen(pid) integer pid,_copen_ copen = _copen_(pid) end subroutine cclose(p) integer p call _cclose_(p) end subroutine sendw(a,b,c,d,e,f) integer a,b,c,d,e,f call _sendw_(f,e,d,c,b,a) end subroutine sendmsg(a,b,c,d,e,f) integer a,b,c,d,e,f call _sendmsg_(f,e,d,c,b,a) end subroutine send(a,b,c,d,e,f) integer a,b,c,d,e,f call _send_(f,e,d,c,b,a) end subroutine recvw(a,b,c,d,e,f,g) integer a,b,c,d,e,f,g call _recvw_(g,f,e,d,c,b,a) end subroutine recv(a,b,c,d,e,f,g) integer a,b,c,d,e,f,g call _recv_(g,f,e,d,c,b,a) end subroutine recvmsg(a,b,c,d,e,f,g) integer a,b,c,d,e,f,g call _recvmsg_(g,f,e,d,c,b,a) end integer function cubedim() integer _cubedim_ cubedim = _cubedim_() end integer function probe(a,b) integer a,b,_probe_ probe = _probe_(b,a) end integer function probemsg(a) integer a,_probemsg_ probemsg = _probemsg_(a) end integer function status(p) integer p,_status_ status = _status_(p) end integer function mynode() integer _mynode_ mynode = _mynode_() end integer function mypid() integer _mypid_ mypid = _mypid_() end integer function clock() integer _clock_ clock = _clock_() end subroutine flick() call _flick_() end subroutine syslog(a,b,c) integer a,c,i integer*1 b(*),tmp(200) do 10 i=1,iaddr(c) 10 tmp(i)= b(i) tmp(iaddr(c)+1)=0 call _syslog_(tmp,a) end C intel version 2 stuff integer function myhost() integer _myhost_ myhost = _myhost_() end integer function mclock() integer _mclock_ mclock = _mclock_() end integer function infonode() integer _infonode_ infonode = _infonode_() end integer function infopid() integer _infopid_ infopid = _infopid_() end integer function infotype() integer _infotype_ infotype = _infotype_() end integer function infocount() integer _infocount_ infocount = _infocount_() end integer function numnodes() integer _numnodes_ numnodes = _numnodes_() end integer function nodedim() integer _nodedim_ nodedim = _nodedim_() end subroutine crecv(a,b,c) integer a,b,c call _crecv_(c,b,a) end subroutine csend(a,b,c,d,e) integer a,b,c,d,e call _csend_(e,d,c,b,a) end integer function isend(a,b,c,d,e) integer _isend_,a,b,c,d,e isend = _isend_(e,d,c,b,a) end integer function irecv(a,b,c) integer _irecv_,a,b,c irecv = _irecv_(c,b,a) end integer function msgwait(p) integer p,_msgwait_ msgwait = _msgwait_(p) end integer function msgdone(p) integer p,_msgdone_ msgdone = _msgdone_(p) end integer function iprobe(p) integer p,_iprobe_ iprobe = _iprobe_(p) end subroutine setpid(p) integer p call _setpid_(p) end SHAR_EOF cd .. if test -f 'cholhv2.c' then echo shar: over-writing existing file "'cholhv2.c'" fi cat << \SHAR_EOF > 'cholhv2.c' #include "chol.h" /* Message types used (defined in globals.h): I : problem info M : map B : right hand side vector */ /* Order of matrix is n. Columns of matrix and components of rhs and solution are numbered 0, 1, ... , n-1. Nodes are numbered 0, 1, ..., np-1. */ main() /* host process */ { CHNL ci ; REAL ti[5], mn[5], mx[5] ; int np, n, whichmap, whichord, whichcomm, maxblk, blksize, i, j, type, cnt, node, pid, info[4] ; int *map ; static char *ordering[] = { "natural", "gray" } ; static char *mapping[] = { "wrap", "reflect" } ; static char *commun[] = { "bcube", "bcast", "ring" } ; setpid(PID); while (1) { printf("Enter number of processors:") ; scanf("%d", &np) ; printf("Enter order of matrix:") ; scanf("%d", &n) ; if ( np == 0 || n == 0 ) break ; if ( n < np ) break ; maxblk = n/np + ( n%np ? 1 : 0 ) ; printf("Enter wrap(0) or reflect(1) mapping:") ; scanf("%d", &whichmap) ; printf("Enter natural(0) or gray(1) ordering:") ; scanf("%d", &whichord) ; printf("Enter blocksize (1-%d):", maxblk) ; scanf("%d", &blksize) ; printf("Enter bcube(0), bcast(1), or ring(2) communication:") ; scanf("%d", &whichcomm) ; printf("Number of processors = %d, order of matrix = %d\n", np, n) ; printf("%s mapping, %s ordering, blocksize = %d, %s communication\n", mapping[whichmap], ordering[whichord], blksize, commun[whichcomm]) ; info[0] = np ; info[1] = n ; info[2] = whichord ; info[3] = whichcomm ; csend ( I, info, 4*sizeof(int), 0, PID ) ; map = (int *)malloc(n*sizeof(int)) ; whichmap = (whichmap<<1) + whichord ; setmap ( whichmap, map, n, np, blksize ) ; csend ( M, map, n*sizeof(int), 0, PID ) ; for ( j = 0 ; j < 5 ; j++ ) { mn[j] = 1.0e+20 ; mx[j] = -1.0e+20 ; } for ( i = 0 ; i < np ; i++ ) { crecv( -1, ti, 5*sizeof(float)); node = infonode(); printf("node %d, fac =%7.2f, fslv =%7.2f, bslv =%7.2f", node, (ti[1]-ti[0])/1000, (ti[2]-ti[1])/1000, (ti[3]-ti[2])/1000) ; printf(" maxerr = %e\n", ti[4] ) ; for ( j = 1 ; j < 5 ; j++ ) { if (j==4)mx[j] = ti[j] > mx[j] ? ti[j] : mx[j] ; else mx[j-1] = (ti[j]-ti[j-1])> mx[j-1] ? (ti[j]-ti[j-1]) : mx[j-1]; } } printf("totals , fac =%7.2f, fslv =%7.2f, bslv =%7.2f", mx[0]/1000, mx[1]/1000, mx[2]/1000) ; printf(" maxerr = %e\n", mx[4] ) ; free(map) ; } } setmap ( whichmap, map, n, np, blksize ) int whichmap, *map, n, np, blksize; { int i, gray(); switch (whichmap) { case 0: /* wrap */ for ( i = 0 ; i < n ; i++ ) map[i] = (i/blksize)%np; break; case 1: /* gray wrap */ for ( i = 0 ; i < n ; i++ ) map[i] = gray((i/blksize)%np); break; case 2: /* reflect */ for ( i = 0 ; i < n ; i++ ) map[i] = ((i/blksize)/np)%2 ? np-1 - (i/blksize)%np : (i/blksize)%np; break; case 3: /* gray reflect */ for ( i = 0 ; i < n ; i++ ) map[i] = ((i/blksize)/np)%2 ? gray(np-1 - (i/blksize)%np) : gray((i/blksize)%np); break; } } int gray (i) int i ; { return( (i>>1)^i ) ; } SHAR_EOF if test -f 'cholnv2.c' then echo shar: over-writing existing file "'cholnv2.c'" fi cat << \SHAR_EOF > 'cholnv2.c' #include "chol.h" main() /* node process */ { CHNL ci ; long mclock() ; REAL ti[5], maxerr ; int n, i, j, k, maxlth, ncols, np, me, cnt, node, pid, whichord, whichcomm, info[4] ; int *map, *collth, *mycols ; REAL *b, **col, *colk, *p, *q, *y ; REAL x, t ; me = mynode() ; while (1) { /* receive problem data */ crecv ( I, info, 4*sizeof(int)); np = info[0] ; n = info[1] ; whichord = info[2] ; whichcomm = info[3] ; if ( me == 0 ) for ( i = 1 ; i < np ; i++ ) csend ( I, info, 4*sizeof(int), i, PID ) ; if ( np == 0 || n == 0 ) break ; maxlth = n*sizeof(REAL) ; map = (int *)malloc(n*sizeof(int)) ; crecv ( M, map, n*sizeof(int)); if ( me == 0 ) for ( i = 1 ; i < np ; i++ ) csend ( M, map, n*sizeof(int), i, PID ) ; ncols = 0 ; for ( i = 0 ; i < n; i++ ) if ( map[i] == me ) ncols++ ; /* allocate storage */ collth = (int *)malloc(ncols*sizeof(int)) ; mycols = (int *)malloc(ncols*sizeof(int)) ; b = (REAL *)malloc(n*sizeof(REAL)) ; col = (REAL * *)malloc(ncols*sizeof(REAL *)) ; colk = (REAL *)malloc(n*sizeof(REAL)) ; y = (REAL *)malloc(ncols*sizeof(REAL)) ; /* set up data structures for problem to be solved */ j = 0 ; for ( k = 0 ; k < n ; k++ ) if ( map[k] == me ) { p = (REAL *)malloc((n-k)*sizeof(REAL)) ; col[j] = p ; mycols[j] = k ; collth[j] = n-k ; j++ ; *p++ = 2 ; if ( n-k > 1 ) *p++ = -1 ; for ( i = 2 ; i < n-k ; i++ ) *p++ = 0 ; } for ( i = 0 ; i < n ; i++ ) b[i] = 0 ; b[n-1] = n+1 ; maxerr = 0 ; ti[0] = mclock(0) ; /* Cholesky factorization */ j = 0 ; if ( map[0] == me ) { t = 1.0/sqrt(*col[j]) ; for ( p = col[j] ; p < col[j] + collth[j] ; p++ ) *p *= t ; j++ ; comm ( ci, 0, col[0], n*sizeof(REAL), map[0], np, FORWARD, whichord, whichcomm ) ; } for ( k = 0 ; k < n ; k++ ) { crecv( k, colk, maxlth); cnt=infocount(); if ( whichcomm != 1 && me != map[k] ) comm ( ci, k, colk, cnt, map[k], np, FORWARD, whichord, whichcomm ) ; for ( i = j ; i < ncols ; i++ ) { for ( p = col[i] , q = colk + mycols[i]-k , t = *q ; p < col[i] + collth[i] ; p++ , q++ ) *p -= *q * t ; if ( k == mycols[j]-1 ) { t = 1.0/sqrt(*col[j]) ; for ( p = col[j] ; p < col[j] + collth[j] ; p++ ) *p *= t ; comm ( ci, mycols[j], col[j], collth[j]*sizeof(REAL), me, np, FORWARD, whichord, whichcomm ) ; j++ ; } } } ti[1] = mclock(0) ; /* forward substitution */ j = 0 ; for ( k = 0 ; k < n ; k++ ) { if ( map[k] == me ) { if ( k > 0 ) crecv ( B, b, maxlth); y[j] = b[k] / *col[j] ; for ( i = k+1 ; i < n ; i++ ) b[i] -= y[j] * *(col[j]+i-k) ; if ( k < n-1 ) csend ( B, b, n*sizeof(REAL), map[k+1], PID ) ; j++ ; } } ti[2] = mclock(0) ; /* back substitution */ j = ncols-1 ; if ( map[n-1] == me ) { x = y[j] / *col[j] ; if ( maxerr < fabs(x-n) ) maxerr = fabs(x-n) ; j-- ; comm ( ci, n-1, &x, sizeof(REAL), map[n-1], np, BACKWARD, whichord, whichcomm ) ; } for ( k = n-1 ; k >= 0 ; k-- ) { crecv( k, &x, sizeof(REAL)); cnt=infocount(); if ( whichcomm != 1 && me != map[k] ) comm ( ci, k, &x, cnt, map[k], np, BACKWARD, whichord, whichcomm ) ; for ( i = j ; i >= 0 ; i-- ) { y[i] -= x * *(col[i]+k-mycols[i]) ; if ( k == mycols[j]+1 ) { x = y[j] / *col[j] ; if ( maxerr < fabs(x-k) ) maxerr = fabs(x-k) ; comm ( ci, mycols[j], &x, sizeof(REAL), me, np, BACKWARD, whichord, whichcomm ) ; j-- ; } } } ti[3] = mclock(0) ; ti[4] = maxerr ; csend ( 0, ti, 5*sizeof(float), HOST, PID ) ; /* free storage */ for ( j = 0 ; j < ncols ; j++ ) free(col[j]) ; free(y) ; free(colk) ; free(col) ; free(b) ; free(mycols) ; free(collth) ; free(map) ; } } comm ( ci, msgtype, vec, bytes, root, np, dir, ord, which ) CHNL ci ; int msgtype, bytes, root, np, dir, ord, which ; char *vec ; /* * Communication driver */ { switch (which) { case 0: bcube( ci, msgtype, vec, bytes, root, np ) ; break ; case 1: bcast( ci, msgtype, vec, bytes, root, np, dir, ord ) ; break ; case 2: ring( ci, msgtype, vec, bytes, root, np, dir ) ; break ; } } bcube ( ci, msgtype, vec, bytes, root, np ) CHNL ci ; int msgtype, bytes, root, np ; char *vec ; /* * Broadcast vector, vec, of length bytes * to all processors using a minimum spanning * tree with given root. */ { int i, me, cnt, node, pid ; me = mynode()^root ; for ( i = np/2 ; i > me ; i /= 2 ) csend ( msgtype, vec, bytes, (me+i)^root, PID ) ; if ( me == 0 ) csend ( msgtype, vec, bytes, root, PID ) ; } bcast ( ci, msgtype, vec, bytes, root, np, dir, ord ) CHNL ci ; int msgtype, bytes, root, np, dir, ord ; char *vec ; /* * Broadcast vector, vec, of length bytes */ { int i, cnt, node, pid, gray(), invgray() ; switch (ord) { case NATURAL: if ( dir == FORWARD ) { for ( i = 0 ; i < np ; i++ ) csend ( msgtype, vec, bytes, (root+i)%np, PID ) ; } else { for ( i = 0 ; i < np ; i++ ) csend ( msgtype, vec, bytes, (root+np-i)%np, PID ) ; } break ; case GRAY: if ( dir == FORWARD ) { for ( i = 0 ; i < np ; i++ ) { csend ( msgtype, vec, bytes, succ(root,i,np), PID ) ; } } else { for ( i = 0 ; i < np ; i++ ) csend ( msgtype, vec, bytes, pred(root,i,np), PID ) ; } break ; } } ring ( ci, msgtype, vec, bytes, root, np, dir ) CHNL ci ; int msgtype, bytes, root, np, dir ; char *vec ; /* * Send vector, vec, of length bytes to * all processors, using an embedded ring * beginning at root. */ { int me, next, cnt, node, pid, pred(), succ() ; me = mynode() ; switch (dir) { case FORWARD: csend ( msgtype, vec, bytes, succ(me,1,np), PID ) ; break ; case BACKWARD: csend ( msgtype, vec, bytes, pred(me,1,np), PID ) ; break ; } } int pred ( node, i, np ) int node, i, np ; { int gray(), invgray() ; return( gray((invgray(node)+np-i)%np) ) ; } int succ ( node, i, np ) int node, i, np ; { int gray(), invgray() ; return( gray((invgray(node)+i)%np) ) ; } int gray (i) int i ; { return( (i>>1)^i ) ; } int invgray (i) int i ; { int k ; k = i ; while ( k > 0 ) { k = k>>1 ; i = i^k ; } return (i) ; } SHAR_EOF if test -f 'cholh.c' then echo shar: over-writing existing file "'cholh.c'" fi cat << \SHAR_EOF > 'cholh.c' #include "chol.h" /* Message types used (defined in globals.h): I : problem info M : map B : right hand side vector */ /* Order of matrix is n. Columns of matrix and components of rhs and solution are numbered 0, 1, ... , n-1. Nodes are numbered 0, 1, ..., np-1. */ main() /* host process */ { CHNL ci ; REAL ti[5], mn[5], mx[5] ; int np, n, whichmap, whichord, whichcomm, maxblk, blksize, i, j, type, cnt, node, pid, info[4] ; int *map ; static char *ordering[] = { "natural", "gray" } ; static char *mapping[] = { "wrap", "reflect" } ; static char *commun[] = { "bcube", "bcast", "ring" } ; ci = copen(PID) ; while (1) { printf("Enter number of processors:") ; scanf("%d", &np) ; printf("Enter order of matrix:") ; scanf("%d", &n) ; if ( np == 0 || n == 0 ) break ; if ( n < np ) break ; maxblk = n/np + ( n%np ? 1 : 0 ) ; printf("Enter wrap(0) or reflect(1) mapping:") ; scanf("%d", &whichmap) ; printf("Enter natural(0) or gray(1) ordering:") ; scanf("%d", &whichord) ; printf("Enter blocksize (1-%d):", maxblk) ; scanf("%d", &blksize) ; printf("Enter bcube(0), bcast(1), or ring(2) communication:") ; scanf("%d", &whichcomm) ; printf("Number of processors = %d, order of matrix = %d\n", np, n) ; printf("%s mapping, %s ordering, blocksize = %d, %s communication\n", mapping[whichmap], ordering[whichord], blksize, commun[whichcomm]) ; info[0] = np ; info[1] = n ; info[2] = whichord ; info[3] = whichcomm ; sendmsg ( ci, I, info, 4*sizeof(int), 0, PID ) ; map = (int *)malloc(n*sizeof(int)) ; whichmap = (whichmap<<1) + whichord ; setmap ( whichmap, map, n, np, blksize ) ; sendmsg ( ci, M, map, n*sizeof(int), 0, PID ) ; for ( j = 0 ; j < 5 ; j++ ) { mn[j] = 1.0e+20 ; mx[j] = -1.0e+20 ; } for ( i = 0 ; i < np ; i++ ) { recvmsg( ci, &type, ti, 5*sizeof(float), &cnt, &node, &pid ) ; printf("node %d, fac =%7.2f, fslv =%7.2f, bslv =%7.2f", node, (ti[1]-ti[0])/1000, (ti[2]-ti[1])/1000, (ti[3]-ti[2])/1000) ; printf(" maxerr = %e\n", ti[4] ) ; for ( j = 0 ; j < 5 ; j++ ) { mn[j] = ti[j] < mn[j] ? ti[j] : mn[j] ; mx[j] = ti[j] > mx[j] ? ti[j] : mx[j] ; } } printf("totals , fac =%7.2f, fslv =%7.2f, bslv =%7.2f", (mx[1]-mn[0])/1000, (mx[2]-mn[1])/1000, (mx[3]-mn[2])/1000) ; printf(" maxerr = %e\n", mx[4] ) ; free(map) ; } cclose(ci) ; } setmap ( whichmap, map, n, np, blksize ) int whichmap, *map, n, np, blksize; { int i, gray(); switch (whichmap) { case 0: /* wrap */ for ( i = 0 ; i < n ; i++ ) map[i] = (i/blksize)%np; break; case 1: /* gray wrap */ for ( i = 0 ; i < n ; i++ ) map[i] = gray((i/blksize)%np); break; case 2: /* reflect */ for ( i = 0 ; i < n ; i++ ) map[i] = ((i/blksize)/np)%2 ? np-1 - (i/blksize)%np : (i/blksize)%np; break; case 3: /* gray reflect */ for ( i = 0 ; i < n ; i++ ) map[i] = ((i/blksize)/np)%2 ? gray(np-1 - (i/blksize)%np) : gray((i/blksize)%np); break; } } int gray (i) int i ; { return( (i>>1)^i ) ; } SHAR_EOF if test -f 'choln.c' then echo shar: over-writing existing file "'choln.c'" fi cat << \SHAR_EOF > 'choln.c' #include "chol.h" main() /* node process */ { CHNL ci ; long clock() ; REAL ti[5], maxerr ; int n, i, j, k, maxlth, ncols, np, me, cnt, node, pid, whichord, whichcomm, info[4] ; int *map, *collth, *mycols ; REAL *b, **col, *colk, *p, *q, *y ; REAL x, t ; me = mynode() ; ci = copen(PID) ; while (1) { /* receive problem data */ recvw ( ci, I, info, 4*sizeof(int), &cnt, &node, &pid ) ; np = info[0] ; n = info[1] ; whichord = info[2] ; whichcomm = info[3] ; if ( me == 0 ) for ( i = 1 ; i < np ; i++ ) send ( ci, I, info, 4*sizeof(int), i, PID ) ; if ( np == 0 || n == 0 ) break ; maxlth = n*sizeof(REAL) ; map = (int *)malloc(n*sizeof(int)) ; recvw ( ci, M, map, n*sizeof(int), &cnt, &node, &pid ) ; if ( me == 0 ) for ( i = 1 ; i < np ; i++ ) send ( ci, M, map, n*sizeof(int), i, PID ) ; ncols = 0 ; for ( i = 0 ; i < n; i++ ) if ( map[i] == me ) ncols++ ; /* allocate storage */ collth = (int *)malloc(ncols*sizeof(int)) ; mycols = (int *)malloc(ncols*sizeof(int)) ; b = (REAL *)malloc(n*sizeof(REAL)) ; col = (REAL * *)malloc(ncols*sizeof(REAL *)) ; colk = (REAL *)malloc(n*sizeof(REAL)) ; y = (REAL *)malloc(ncols*sizeof(REAL)) ; /* set up data structures for problem to be solved */ j = 0 ; for ( k = 0 ; k < n ; k++ ) if ( map[k] == me ) { p = (REAL *)malloc((n-k)*sizeof(REAL)) ; col[j] = p ; mycols[j] = k ; collth[j] = n-k ; j++ ; *p++ = 2 ; if ( n-k > 1 ) *p++ = -1 ; for ( i = 2 ; i < n-k ; i++ ) *p++ = 0 ; } for ( i = 0 ; i < n ; i++ ) b[i] = 0 ; b[n-1] = n+1 ; maxerr = 0 ; ti[0] = clock(0) ; /* Cholesky factorization */ j = 0 ; if ( map[0] == me ) { t = 1.0/sqrt(*col[j]) ; for ( p = col[j] ; p < col[j] + collth[j] ; p++ ) *p *= t ; j++ ; comm ( ci, 0, col[0], n*sizeof(REAL), map[0], np, FORWARD, whichord, whichcomm ) ; } for ( k = 0 ; k < n ; k++ ) { recvw( ci, k, colk, maxlth, &cnt, &node, &pid ) ; if ( whichcomm != 1 && me != map[k] ) comm ( ci, k, colk, cnt, map[k], np, FORWARD, whichord, whichcomm ) ; for ( i = j ; i < ncols ; i++ ) { for ( p = col[i] , q = colk + mycols[i]-k , t = *q ; p < col[i] + collth[i] ; p++ , q++ ) *p -= *q * t ; if ( k == mycols[j]-1 ) { t = 1.0/sqrt(*col[j]) ; for ( p = col[j] ; p < col[j] + collth[j] ; p++ ) *p *= t ; comm ( ci, mycols[j], col[j], collth[j]*sizeof(REAL), me, np, FORWARD, whichord, whichcomm ) ; j++ ; } } } ti[1] = clock(0) ; /* forward substitution */ j = 0 ; for ( k = 0 ; k < n ; k++ ) { if ( map[k] == me ) { if ( k > 0 ) recvw ( ci, B, b, maxlth, &cnt, &node, &pid ) ; y[j] = b[k] / *col[j] ; for ( i = k+1 ; i < n ; i++ ) b[i] -= y[j] * *(col[j]+i-k) ; if ( k < n-1 ) send ( ci, B, b, n*sizeof(REAL), map[k+1], PID ) ; j++ ; } } ti[2] = clock(0) ; /* back substitution */ j = ncols-1 ; if ( map[n-1] == me ) { x = y[j] / *col[j] ; if ( maxerr < fabs(x-n) ) maxerr = fabs(x-n) ; j-- ; comm ( ci, n-1, &x, sizeof(REAL), map[n-1], np, BACKWARD, whichord, whichcomm ) ; } for ( k = n-1 ; k >= 0 ; k-- ) { recvw( ci, k, &x, sizeof(REAL), &cnt, &node, &pid ) ; if ( whichcomm != 1 && me != map[k] ) comm ( ci, k, &x, cnt, map[k], np, BACKWARD, whichord, whichcomm ) ; for ( i = j ; i >= 0 ; i-- ) { y[i] -= x * *(col[i]+k-mycols[i]) ; if ( k == mycols[j]+1 ) { x = y[j] / *col[j] ; if ( maxerr < fabs(x-k) ) maxerr = fabs(x-k) ; comm ( ci, mycols[j], &x, sizeof(REAL), me, np, BACKWARD, whichord, whichcomm ) ; j-- ; } } } ti[3] = clock(0) ; ti[4] = maxerr ; send ( ci, 0, ti, 5*sizeof(float), HOST, PID ) ; /* free storage */ for ( j = 0 ; j < ncols ; j++ ) free(col[j]) ; free(y) ; free(colk) ; free(col) ; free(b) ; free(mycols) ; free(collth) ; free(map) ; } cclose(ci) ; } comm ( ci, msgtype, vec, bytes, root, np, dir, ord, which ) CHNL ci ; int msgtype, bytes, root, np, dir, ord, which ; char *vec ; /* * Communication driver */ { switch (which) { case 0: bcube( ci, msgtype, vec, bytes, root, np ) ; break ; case 1: bcast( ci, msgtype, vec, bytes, root, np, dir, ord ) ; break ; case 2: ring( ci, msgtype, vec, bytes, root, np, dir ) ; break ; } } bcube ( ci, msgtype, vec, bytes, root, np ) CHNL ci ; int msgtype, bytes, root, np ; char *vec ; /* * Broadcast vector, vec, of length bytes * to all processors using a minimum spanning * tree with given root. */ { int i, me, cnt, node, pid ; me = mynode()^root ; for ( i = np/2 ; i > me ; i /= 2 ) send ( ci, msgtype, vec, bytes, (me+i)^root, PID ) ; if ( me == 0 ) send ( ci, msgtype, vec, bytes, root, PID ) ; } bcast ( ci, msgtype, vec, bytes, root, np, dir, ord ) CHNL ci ; int msgtype, bytes, root, np, dir, ord ; char *vec ; /* * Broadcast vector, vec, of length bytes */ { int i, cnt, node, pid, gray(), invgray() ; switch (ord) { case NATURAL: if ( dir == FORWARD ) { for ( i = 0 ; i < np ; i++ ) send ( ci, msgtype, vec, bytes, (root+i)%np, PID ) ; } else { for ( i = 0 ; i < np ; i++ ) send ( ci, msgtype, vec, bytes, (root+np-i)%np, PID ) ; } break ; case GRAY: if ( dir == FORWARD ) { for ( i = 0 ; i < np ; i++ ) { send ( ci, msgtype, vec, bytes, succ(root,i,np), PID ) ; } } else { for ( i = 0 ; i < np ; i++ ) send ( ci, msgtype, vec, bytes, pred(root,i,np), PID ) ; } break ; } } ring ( ci, msgtype, vec, bytes, root, np, dir ) CHNL ci ; int msgtype, bytes, root, np, dir ; char *vec ; /* * Send vector, vec, of length bytes to * all processors, using an embedded ring * beginning at root. */ { int me, next, cnt, node, pid, pred(), succ() ; me = mynode() ; switch (dir) { case FORWARD: send ( ci, msgtype, vec, bytes, succ(me,1,np), PID ) ; break ; case BACKWARD: send ( ci, msgtype, vec, bytes, pred(me,1,np), PID ) ; break ; } } int pred ( node, i, np ) int node, i, np ; { int gray(), invgray() ; return( gray((invgray(node)+np-i)%np) ) ; } int succ ( node, i, np ) int node, i, np ; { int gray(), invgray() ; return( gray((invgray(node)+i)%np) ) ; } int gray (i) int i ; { return( (i>>1)^i ) ; } int invgray (i) int i ; { int k ; k = i ; while ( k > 0 ) { k = k>>1 ; i = i^k ; } return (i) ; } SHAR_EOF if test -f 'echoh.c' then echo shar: over-writing existing file "'echoh.c'" fi cat << \SHAR_EOF > 'echoh.c' /* echo.c * echo test to measure transmission delays * -r rate. -d cubedim -s startup -n repetition -l msglth -p packetsize */ #include #define PID 11 #define CDIM 6 #define MSGSIZE 15000 #define REP 10 long times(); long ttemp[4]; struct PARM { long Reps; long Msglth; long Cdim; }; #define reps parms.Reps #define msglth parms.Msglth #define cdim parms.Cdim float atof(); char buff[MSGSIZE]; float total; float totals[CDIM+1]; main(argc,argv) int argc; char *argv[]; { int i; int d; long tmp; long end, start; float rate = 1.0; int startup =0; int psize =1; struct PARM parms; msglth = 1024; cdim = cubedim(); reps = REP; while (--argc > 0 && **++argv == '-')switch ((*argv)[1]) { case 'r': /* rate */ rate = atof(argv[1]); argc--; argv++; break; case 'n': /* reps */ reps = atoi(argv[1]); argc--; argv++; break; case 's': /* startup */ startup = atoi(argv[1]); argc--; argv++; break; case 'l': /* msg lth */ msglth = atoi(argv[1]); argc--; argv++; break; case 'p': /* packet size */ psize = atoi(argv[1]); argc--; argv++; break; case 'd': /* cube dimension*/ cdim = atoi(argv[1]); argc--; argv++; break; default: break; } d = copen(PID); while(1){ printf("reps lth"); scanf("%d%d",&reps, &msglth); printf(" repetition(n) %ld cube dimension(d) %ld msg lth(l) %ld\n", reps,cdim,msglth); /* now start node tests */ sendmsg(d,1,&parms,sizeof(struct PARM),0,PID); recvmsg(d,&tmp,totals,sizeof(totals),&tmp,&tmp,&tmp); for (i=0;i 'echon.c' /* echon.c run with echoh to do roundtrip timing tests */ #define PID 11 #define HOST 0x08000 #define CDIM 6 #define MSGSIZE 15000 struct PARM { long Reps; long Msglth; long Cdim; }; #define reps parms.Reps #define msglth parms.Msglth #define cdim parms.Cdim char buff[MSGSIZE]; long clock(); main() { /* do node to node tests */ long tmp; int d; int lth; int srcnode, srcpid; long start; int i, dim, n; float totals[CDIM+1]; struct PARM parms; d = copen(PID); if (mynode() == 0) while(1){ /* wait for host to say we can start */ recvw(d,1,&parms,sizeof(struct PARM),&tmp,&srcnode,&srcpid); n=0; for(dim=0;dim 'iphfv2.f' c iphf.f host implicit integer (a-z) c do simple inner product with snd rcv integer matrix(4,4),vector(4) integer result(4) data matrix /1,2,3,4,2,3,1,0,3,3,1,2,4,3,2,1/ data vector /2,3,1,4/ call setpid(0) do 10 i=1,4 call csend(1,matrix(1,i),16,i-1,0) call csend(2,vector,16,i-1,0) 10 continue do 20 i=1,4 call crecv(-1,val,4) node = infonode() result(node+1) = val 20 continue write(*,*)result end SHAR_EOF if test -f 'iph.c' then echo shar: over-writing existing file "'iph.c'" fi cat << \SHAR_EOF > 'iph.c' /* iph.c vector matrix(transpose) inner product using messages */ #include #define DIM 4 int vector[DIM] = {2,3,1,4}; int matrix[DIM][DIM] = { 1,2,3,4, 2,3,1,0, 3,3,1,2, 4,3,2,1 }; int result[DIM]; main() { /* main task */ int d; int i; int val; int type,lth,node,pid; d = copen(15); for (i=0;i 'iphg.c' /* iph.c vector matrix(transpose) inner product using messages */ #include #define DIM 4 int vector[DIM] = {2,3,1,4}; int matrix[DIM][DIM] = { 1,2,3,4, 2,3,1,0, 3,3,1,2, 4,3,2,1 }; int result[DIM]; main() { /* main task */ int d; int i; int val; int type,lth,node,pid; d = copen(15); /* start and send data to each node */ sendmsg(d,1,matrix[i],sizeof(matrix[i]),-1,15); /* global send */ sendmsg(d,2,vector,sizeof(vector),-1,15); i=DIM; while(i--){ /* wait for results */ recvmsg(d,&type,&val,sizeof(int),<h,&node,&pid); result[node] = val; } syslog(3,"a host message"); for(i=0;i 'ipn.c' #define DIM 5 main() { /* do inner product of two vectors */ int v1[DIM], v2[DIM]; int i, sum; int d; int me; int node,pid,lth; me=mynode(); d = copen(15); recvw(d,1,v1,sizeof v1,<h,&node,&pid); recvw(d,2,v2,sizeof v2,<h,&node,&pid); sum=0; for(i=0;i< (lth/ sizeof(int));i++) sum += v1[i] * v2[i]; sendw(d,3,&sum,sizeof(int),node,pid); } SHAR_EOF if test -f 'nstats.c' then echo shar: over-writing existing file "'nstats.c'" fi cat << \SHAR_EOF > 'nstats.c' /* * nstats.c get data on node performance and msg stats * data may be piped in and through * nstats tracefile * ...| nstats * ...| nstats -outfile | . . . */ #include #include #define MAXTASKS 1000 #define RUN 1 struct NDATA { long start; long end; long idle; long tmptime; long busy; long state; long sndcnt; long rcvcnt; long rcvwcnt; } node[MAXTASKS]; float util; float totalu, totaluh; long totalsnd, totalrcv; #define MPHOST -32768 #define VHOST 32768 long ndflag; long atol(); #define MAXLINE 1000 char linestr[MAXLINE]; /* hold a line read from the trace file. */ struct Tdata { long type; long Qd; long tid; long clock; } t; char *key_words[] = { "strace","tfork","texit","send","recv","recvw", "tid","waking","cnt","clock","blocking","running", "etrace","node","mpsim","to","lth", 0}; #define STRACE 0 #define TFORK 1 #define TEXIT 2 #define SEND 3 #define RECV 4 #define RECVW 5 #define TID 6 #define WAKING 7 #define CNT 8 #define CLOCK 9 #define QD 10 #define RUNNING 11 #define ETRACE 12 #define NODE 13 #define MPSIM 14 #define TO 15 #define LTH 16 char *getword(), *nxtword(); long maxtasks; long piping; FILE *out; #define LTHS 20 long lthcnt[LTHS],hopcnt[LTHS]; int lthdn[]={8,16,32,64,128,256,512,1024,2048,4096,8192,16000,0}; int length,to,from; float lthvol[LTHS],totvol,totcnt,hopvol[LTHS]; main(argc, argv) int argc; char *argv[]; { FILE *tfptr; /* Stream pointer for the trace file. */ long i; long duration; float totalb; /* total busy */ long endtime, starttime; endtime = 0; starttime = 1<<30; totalb =0; out = stdout; if(argc != 2) tfptr = stdin; else if (*argv[1] == '-') { out = fopen(argv[1] + 1,"a"); piping = 1; tfptr = stdin; } else if((tfptr = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Can't open %s for reading\n", argv[1]); exit(1); } while(fgets(linestr, MAXLINE, tfptr) != NULL) { if (piping) printf("%s",linestr); parse(linestr); switch(t.type){ case TEXIT: node[t.tid].end = t.clock; node[t.tid].busy += t.clock - node[t.tid].tmptime; break; case SEND: node[t.tid].sndcnt++; totcnt++; totvol += length; for(i=0;i< LTHS;i++) if(length < lthdn[i]){ lthcnt[i]++; lthvol[i] += length; break; } i= distance(to,from); hopcnt[i]++; hopvol[i] += length; break; case RECV: case RECVW: node[t.tid].rcvcnt++; break; default: break; } } fprintf(out,"node start end duration busy utiliz sends recvs\n"); for(i=0;i endtime) endtime=node[i].end; if (node[i].start < starttime) starttime = node[i].start; duration = node[i].end-node[i].start; if (duration==0) duration =1; /*clock wasn't running*/ util = 100 * ((float)node[i].busy)/duration; if (i==0)fprintf(out,"HOST"); else fprintf(out,"%4ld",i-1); fprintf(out," %10ld %10ld %10ld %10ld %3.0f%% %7ld %7ld\n", node[i].start,node[i].end,duration, node[i].busy,util,node[i].sndcnt,node[i].rcvcnt); if (i != 0) totalu += util; totaluh += util; totalsnd += node[i].sndcnt; totalrcv += node[i].rcvcnt; totalb += node[i].busy; } fprintf(out, "\nNodal utilization %3.0f%% Nodal+host utilization %3.0f%% sends %ld recvs %ld\n", totalu/maxtasks,totaluh/(maxtasks+1),totalsnd,totalrcv); fprintf(out,"Gross utilization %3.0f%%\n", 100.*totalb/((endtime-starttime)*(maxtasks+1))); printf("\nTotal messages %7.0f %7.0f bytes\n",totcnt,totvol); printf("lth count bytes\n"); for (i=0;lthdn[i] >0 ;i++) printf("%5d %7ld %3.0f%% %7.0f %3.0f%%\n", lthdn[i],lthcnt[i],100.*lthcnt[i]/totcnt,lthvol[i], 100.*lthvol[i]/totvol); printf("\nhops count bytes\n"); for(i=0;i<8;i++) printf("%3ld %7ld %3.0f%% %7.0f %3.0f%%\n", i-1,hopcnt[i],100.*hopcnt[i]/totcnt, hopvol[i],100.*hopvol[i]/totvol); } distance(from,to) int from,to; { /* calculate hops */ int i,cnt; if (from == -1 || to == -1) return(0); /*host*/ i = from^to; cnt=0; while(i){ if (i & 1) cnt++; i = i>>1;} return(cnt+1); } parse(l) char *l; { /* parse a line l */ char *p, *w; long i; long tmp; t.type = -1; t.Qd = 0; p = getword(l); /* get first word */ while(*p){ for(i=0;(w = key_words[i]) != NULL;i++) if (!strcmp(w,p)) switch(i){ case STRACE: case TEXIT: case TFORK: case RECV: case RECVW: case CNT: case SEND: t.type = i; break; case MPSIM: ndflag=1; return; case QD: node[t.tid].busy += t.clock - node[t.tid].tmptime; node[t.tid].tmptime = t.clock; node[t.tid].state = !RUN; break; case NODE: ndflag=1; case TID: from = t.tid = atol(nxtword()); if (ndflag) t.tid = (t.tid== MPHOST || t.tid == VHOST) ? 0 : t.tid + 1 ; if (t.tid >maxtasks) maxtasks = t.tid; if (ndflag) from = (from == MPHOST || from == VHOST) ? -1 : from; else from--; break; case CLOCK: t.clock = atol(nxtword()); break; case WAKING: while(*(p=nxtword())) { tmp = atol(p); if (ndflag) tmp = (tmp== MPHOST || tmp == VHOST) ? 0 : tmp + 1 ; if (node[tmp].start ==0) node[tmp].start=t.clock; else node[tmp].idle += t.clock - node[tmp].tmptime; node[tmp].tmptime = t.clock; node[tmp].state = RUN; } break; case RUNNING: while(*(p=nxtword())) { tmp = atol(p); if (ndflag) tmp = (tmp== MPHOST || tmp == VHOST) ? 0 : tmp + 1 ; node[tmp].start= node[tmp].tmptime = t.clock; node[tmp].state = RUN; } break; case TO: to = atoi(nxtword()); if (to < 0) { to=from; break;} /*global send */ if (ndflag)to = (to == MPHOST || to == VHOST) ? -1 : to; else to--; break; case LTH: length = atoi(nxtword()); break; default: break; } p = nxtword(); } } static char *nxtwdp, word_str[100]; /*locals for getword and nxtword*/ char *getword(s) char *s; { char *p; while( *s && isspace(*s))s++; p=word_str; while(*s && !isspace(*s)) *p++ = *s++; *p=0; nxtwdp=s; return(word_str); } char *nxtword() { char *p; while(*nxtwdp && isspace(*nxtwdp)) nxtwdp++; p=word_str; while(*nxtwdp && !isspace(*nxtwdp)) *p++ = *nxtwdp++; *p=0; return(word_str); } SHAR_EOF if test -f 'ringhv1.f' then echo shar: over-writing existing file "'ringhv1.f'" fi cat << \SHAR_EOF > 'ringhv1.f' c ringh.f host pgm to drive rings -- pass np, revs, and lth implicit integer (a-z) integer copen,d integer cmds(3) integer*4 result d = copen(1) 10 write(6,55) 55 format(' number of nodes ',$) read(5,56)cmds(1) 56 format(i5) write(6,57) 57 format(' revolutions ',$) read(5,56)cmds(2) write(6,58) 58 format(' length ',$) read(5,56)cmds(3) call sendmsg(d,1,cmds,12,0,1) itype = 1 call recvmsg(d,itype,result,4,lth,node,pid) write(6,99)result,result/cmds(2), result/cmds(2)/cmds(1) 99 format(' time ',i7,' ms time/rev. ',i7,' ms avrg nn ',i7) go to 10 end SHAR_EOF if test -f 'iphv2.c' then echo shar: over-writing existing file "'iphv2.c'" fi cat << \SHAR_EOF > 'iphv2.c' /* iph.c vector matrix(transpose) inner product using messages */ #include #define PID 0 #define DIM 4 int vector[DIM] = {2,3,1,4}; int matrix[DIM][DIM] = { 1,2,3,4, 2,3,1,0, 3,3,1,2, 4,3,2,1 }; int result[DIM]; main() { /* main task */ int d; int i; int val; int type,lth,node,pid; setpid(PID); for (i=0;i 'ipnfv2.f' implicit integer (a-z) c multiply two vectors integer v1(4), v2(4) call crecv(1,v1,20) call crecv(2,v2,20) lth = infocount() node = infonode() pid = infopid() sum = 0 do 10 i=1,4 10 sum = sum + v1(i) * v2(i) call csend(3,sum,4,node,pid) end SHAR_EOF if test -f 'ringnv1.f' then echo shar: over-writing existing file "'ringnv1.f'" fi cat << \SHAR_EOF > 'ringnv1.f' c ringn.f fortran ring c host passes node 0 number of nodes in ring and lth and revolutions program ringn integer*4 clock,start,tim integer bsize parameter (bsize=3000) integer*4 msg(bsize) integer cmds(3) integer copen,ci integer cnt, next,me, mynode, gray,invgray integer node,pid,host ci = copen(1) me = mynode() cnt = 0 10 if (me .eq. 0 .and. cnt .eq. 0) then call recvw(ci,1,cmds,12,l,host,pid) np = cmds(1) msg(1) = np cnt = cmds(2) next = gray(mod(invgray(me)+1,np)) start = clock() call sendw(ci,1,msg,cmds(3),next,1) endif call recvw(ci,1,msg,4*bsize,lth,node,pid) np = msg(1) next = gray(mod(invgray(me)+1,np)) cnt = cnt -1 if (me .eq. 0 .and. cnt .eq. 0) then tim = clock() - start call sendw(ci,1,tim,4,host,1) else call sendw(ci,1,msg,lth,next,1) endif go to 10 end c gray and invgray -- uses shift and xor integer function gray(mm) integer mm,rshift,xor gray = xor(rshift(mm,1),mm) end integer function invgray(mm) integer mm,m,xor,rshift m=mm invgray = 0 10 if (m .eq. 0) return invgray = xor(invgray,m) m = rshift(m,1) go to 10 end integer function lshift( mm, nn ) integer mm,nn,i,m c c left shift mm by nn places c m=mm do 10 i=1,nn m = m*2 10 continue lshift = m end integer function rshift( mm, nn ) integer mm,nn,i,m c c right shift mm by nn places c m=mm do 10 i=1,nn m = m/2 10 continue rshift = m end integer function xor(mm,nn) integer mm,nn c c exclusive or c integer b,x,m,n m = mm n = nn x = 0 b = 1 10 if (m .eq. n ) go to 20 if (mod(m,2) .ne. mod(n,2)) x = x + b b = 2*b m = m/2 n = n/2 go to 10 20 xor = x end SHAR_EOF if test -f 'sbld' then echo shar: over-writing existing file "'sbld'" fi cat << \SHAR_EOF > 'sbld' cc -o $1 $1.c src/simlib.a -lm SHAR_EOF chmod +x 'sbld' if test -f 'sfbld' then echo shar: over-writing existing file "'sfbld'" fi cat << \SHAR_EOF > 'sfbld' f77 -o $1 $1.f src/simlib.a SHAR_EOF chmod +x 'sfbld' if test -f 'iphf.f' then echo shar: over-writing existing file "'iphf.f'" fi cat << \SHAR_EOF > 'iphf.f' c iphf.f host implicit integer (a-z) c do simple inner product with snd rcv integer matrix(4,4),vector(4) integer result(4) data matrix /1,2,3,4,2,3,1,0,3,3,1,2,4,3,2,1/ data vector /2,3,1,4/ d = copen(15) do 10 i=1,4 call sendmsg(d,1,matrix(1,i),16,i-1,15) call sendmsg(d,2,vector,16,i-1,15) 10 continue do 20 i=1,4 call recvmsg(d,itype,val,4,lth,node,pid) result(node+1) = val 20 continue write(*,*)result end SHAR_EOF if test -f 'ipnf.f' then echo shar: over-writing existing file "'ipnf.f'" fi cat << \SHAR_EOF > 'ipnf.f' implicit integer (a-z) c multiply two vectors integer v1(5), v2(5) d = copen(15) call recvw(d,1,v1,20,lth,node,pid) call recvw(d,2,v2,20,lth,node,pid) sum = 0 do 10 i=1,5 10 sum = sum + v1(i) * v2(i) call sendw(d,3,sum,4,node,pid) end SHAR_EOF if test -f 'ringh.f' then echo shar: over-writing existing file "'ringh.f'" fi cat << \SHAR_EOF > 'ringh.f' c ringh.f host pgm to drive rings -- pass np, revs, and lth implicit integer (a-z) integer cmds(3) integer*4 result call setpid(0) 10 write(6,55) 55 format('number of nodes ') read(5,56)cmds(1) 56 format(i5) write(6,57) 57 format(' revolutions ',$) read(5,56)cmds(2) write(6,58) 58 format(' length ',$) read(5,56)cmds(3) call csend(1,cmds,12,0,0) call crecv(-1,result,4) write(6,99)result,result/cmds(2), result/cmds(2)/cmds(1) 99 format(' time ',i7,' ms time/rev. ',i7,' ms avrg nn ',i7) go to 10 end SHAR_EOF if test -f 'ringn.f' then echo shar: over-writing existing file "'ringn.f'" fi cat << \SHAR_EOF > 'ringn.f' c ringn.f fortran ring c host passes node 0 number of nodes in ring and lth and revolutions program ringn integer*4 mclock,start,tim integer bsize parameter (bsize=3000) integer*4 msg(bsize) integer cmds(3) integer cnt, next,me, mynode, gray,invgray me = mynode() np = numnodes() next = gray(mod(invgray(me)+1,np)) cnt = 0 10 if (me .eq. 0 .and. cnt .eq. 0) then call crecv(1,cmds,12) np = cmds(1) msg(1) = np cnt = cmds(2) start = mclock() call csend(1,msg,cmds(3),next,0) endif call crecv(1,msg,4*bsize) lth = infocount() cnt = cnt -1 if (me .eq. 0 .and. cnt .eq. 0) then tim = mclock() - start call csend(1,tim,4,myhost(),0) else call csend(1,msg,lth,next,0) endif go to 10 end c gray and invgray -- uses shift and xor integer function gray(mm) integer mm,rshift,xor gray = xor(rshift(mm,1),mm) end integer function invgray(mm) integer mm,m,xor,rshift m=mm invgray = 0 10 if (m .eq. 0) return invgray = xor(invgray,m) m = rshift(m,1) go to 10 end integer function lshift( mm, nn ) integer mm,nn,i,m c c left shift mm by nn places c m=mm do 10 i=1,nn m = m*2 10 continue lshift = m end integer function rshift( mm, nn ) integer mm,nn,i,m c c right shift mm by nn places c m=mm do 10 i=1,nn m = m/2 10 continue rshift = m end integer function xor(mm,nn) integer mm,nn c c exclusive or c integer b,x,m,n m = mm n = nn x = 0 b = 1 10 if (m .eq. n ) go to 20 if (mod(m,2) .ne. mod(n,2)) x = x + b b = 2*b m = m/2 n = n/2 go to 10 20 xor = x end SHAR_EOF if test -f 'ipnv2.c' then echo shar: over-writing existing file "'ipnv2.c'" fi cat << \SHAR_EOF > 'ipnv2.c' #define DIM 4 main() { /* do inner product of two vectors */ int v1[DIM], v2[DIM]; int i, sum; int d; int me; int node,pid,lth; me=mynode(); crecv(1,v1,sizeof v1); crecv(2,v2,sizeof v2); lth = infocount(); node = infonode(); pid = infopid(); sum=0; for(i=0;i< (lth/ sizeof(int));i++) sum += v1[i] * v2[i]; csend(3,&sum,sizeof(int),node,pid); } SHAR_EOF if test -f 'chol.h' then echo shar: over-writing existing file "'chol.h'" fi cat << \SHAR_EOF > 'chol.h' #include #include #ifndef HOST #define HOST myhost() #endif #define PID 0 #define FORWARD 1 #define BACKWARD 0 #define NATURAL 0 #define GRAY 1 #define I 32000 #define M 32001 #define B 32002 typedef float REAL ; typedef int SHORT ; typedef int LONG ; typedef int CHNL ; char *malloc() ; SHAR_EOF if test -f 'README' then echo shar: over-writing existing file "'README'" fi cat << \SHAR_EOF > 'README' mpsim portable hypercube simulator Dunigan ORNL dunigan@msr.epm.ornl.gov MPSIM Portable (to most UNIX systems) message-passing simulator that supports both C and FORTRAN. Uses forks and pipes to support up to 8 to 16 processes on UNIX BSD 4.x, SYS V 3.x, DYNIX, Encore, Ultrix, Sun, XENIX, Tek UNIX, 3B2s. Simulates Intel iPSC/1 and iPSC/2 hypercubes and produces trace file of message events. Trace-file analyzers produces tabular or graphical summaries. (Encore and Sequent versions will utilize multiple processors.) top directory contains sample programs and trace file postprocessors (nstats.c, ccplot.c, trace1.c) src/ the beef, do a make here to create mpsim and simlib.a mpsim.man and makefiles; also sample build scripts (sbld, sfbld) --------------------------------------------------------- Information on other ORNL parallel processor simulators ORNL parallel processor simulators dunigan last change: 8/3/88 directories: (source distribution size) mpsim/ portable simulator, sample programs (122KB) src/ mpsim source smpsim/ sequent version (NS32x32) of mpsim using shared memory (147KB) sysVsmpsim/ beta version of mpsim using System V shared memory (62KB) ppsim/ interpretive simulator (2.8MB) doc/ man entries and such postp/ postprocessor of trace files samples/ sample codes lll/ original LLNL distribution seqdn/ version of simulators for Sequent (32032 CPUs only) kernel/ library source, aspp source local/ sample bld and makefile for applications sundn/ version of simulators for Sun workstations (Sun 3/68881 only) kernel/ library source, aspp source local/ sample bld and makefile for applications vaxdn/ version of simulators for VAX UNIX 4.x/Ultrix kernel/ library source, aspp source local/ sample bld and makefile for applications Simulator summary: We have several hypercube simulators and a simulator for shared-memory parallel processors; all support C and Fortran. A brief summary: Name Runs on Granularity Performance Debugging Info Help ppsim Vax, Sequent, Sun fine yes yes mpsim Any Unix with pipes coarse no yes smpsim Sequent coarse no yes PPSIM Interpretive parallel processor simulator, based on "Multitasker" by Eugene Brooks, modified to support message-passing parallel processors. Provides instruction level tracing, program tunable message-passing parameters (startup time, node-to-node rates, host-to-node rates, packet size). Trace file post-processing programs provide concurrency plots and tabular summary of message passing behavior. Supports C and f77 on UNIX 4.x on VAX, Sun 3 (68020/68881), or Sequent (32032) processors. Message-passing software emulates Intel iPSC/1 hypercube subroutines. Shared-memory simulation supports, locks, events, barriers, semaphores, enqueue, and task creation. We have used this package in teaching graduate course in parallel programming. SMPSIM MPSIM converted to use shared-memory of Sequent for message passing. The above table is a bit of an over simplification, of course. ppsim gives an interpretive simulation at the level of individual instructions and has tunable performance parameters. It runs as a single process under Unix and gives meaningful, graphical performance output and is also useful for debugging. mpsim uses multiple Unix processes communicating via pipes. It is helpful for debugging but doesn't give very meaningful performance info. Its advantage over ppsim is faster execution (in terms of wall clock time). smpsim is a specialization of mpsim to the Sequent in which communication by pipes is replaced by communication via shared memory, which is much faster still. In fact smpsim simulates an iPSC in faster than real time for the same number of (real) processors. Documentation: man entries are included in the various distribution directories. Additional technical reports: "A message-passing multiprocessor simulator",Dunigan, ORNL/TM-9966 "A portable hypercube simulator", Dunigan, ORNL/TM-10410 "Denelcor HEP multiprocessor simulator, Dunigan, ORNL/TM-9971 "A multitasking kernel for the C and Fortran programming languages", Brooks, LLNL UCID-20167 Work-in-progress: dcube-- hypercube simulator using TCP/IP-connected workstations as the nodes "Hypercube Simulation on a Local Area Network", Dunigan, ORNL/TM-10685 dunigan@msr.epm.ornl.gov SHAR_EOF if test -f 'recthv2.c' then echo shar: over-writing existing file "'recthv2.c'" fi cat << \SHAR_EOF > 'recthv2.c' /* recth.c rectangel rule host */ #define PID 0 #define TYPE1 1 #define TYPE2 2 struct Result { double pi; long ticks; } result; long n; main() { int p; int c,type,tmp, i; setpid(PID); p = numnodes(); while(1){ printf("rectangles "); scanf("%ld",&n); for(i=0;i 'rectnv2.c' /* rectn.c rectangel rule node */ #define PID 0 #define TYPE1 1 #define TYPE2 2 #define TYPE3 3 #define ROOT 0 #define F(x) 4.0/(1.0 + x * x ) struct Result { double pi; long ticks; } result; long n; main() { int p,host,pid; int me,c,type,tmp; long i; double sum,x; long mclock(), s; p = numnodes(); me = mynode(); while(1){ crecv(TYPE1,&n,sizeof(n)); s = mclock(); sum = 0.0; for(i=me;i 'ccplot.c' /* * ccplot.c thd 3/22/88 * generate plot data for graph command * should really only be used with data from ppsim */ #include #define MPHOST -32768 #define VHOST 32768 long ndflag; int concurrency; float avetasks; int maxtasks; int maxtid; int maxclock; int lasty, lastx; char *strfnd(); main(argc, argv) int argc; char *argv[]; { float f, df; float util; int start,delta; int maxn; int count; int tid; int clock, oldclock; char *p; FILE *tfptr; /* Stream pointer for the trace file. */ #define MAXLINE 1000 char linestr[MAXLINE]; /* Buffer to hold a line read from the trace file. */ #define STRLEN 100 char segstr[STRLEN]; char avestr[STRLEN]; int i; if(argc != 2) tfptr = stdin; else if((tfptr = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Can't open %s for reading\n", argv[1]); exit(1); } /* We read the first line from the tracefile which must be a header. */ while(fgets(linestr, MAXLINE, tfptr) != NULL) { if(strncmp(linestr, "strace", strlen("strace")) == 0) { /* Begin a new plot. */ lasty=1; lastx=0; start= -1; oldclock = -1; } else if(strncmp(linestr, "mpsim", strlen("mpsim")) == 0) ndflag=1; else if(strncmp(linestr, "cnt", strlen("cnt")) == 0) { if(sscanf(linestr, "cnt %d clock %d", &count, &clock) != 2) { fprintf(stderr, "Failed to read count and clock from linestr:%s\n", linestr); exit(1); } if (clock == -1){ fprintf(stderr,"?ccplot: trace clock not running build in aspp mode\n"); exit(1); } if (count>maxtasks) maxtasks =count; if (clock > maxclock) maxclock = clock; if (start == -1) start=clock; if (clockmaxtid)maxtid=tid; } else if ((p=strfnd(linestr,"node "))!=NULL){ tid=atoi(p+5); if (ndflag) tid = (tid== MPHOST || tid == VHOST) ? 0 : tid + 1 ; if (tid>maxtid)maxtid=tid; } } /* while more data to read */ count=0; printf("%d %d\n%d %d \" \"\n",clock,lasty,clock,count); maxn = maxtid > maxtasks ? maxtid : maxtasks; df = maxn/10.; /* locate labels */ f = maxn+ 2*df; delta=maxclock-start; printf("%d %f \"Concurrency plot x-time y-concurrent processes\"\n", start + delta/4,f); f -= df; util = 100. * ((float)avetasks) /((maxtid+1) * delta); printf("%d %f \"Nodes %d Max concurrency %d Gross Utilization %3.0f%% \"\n", start + delta/4,f,maxtid,maxtasks,util); } char *strfnd(t,s) char *t, *s; { /* see if string s is in t */ while ((strlen(t) >= strlen(s)) && *t) if (strncmp(t++,s,strlen(s))==0)return(--t); return(NULL); } SHAR_EOF if test -f 'trace1.c' then echo shar: over-writing existing file "'trace1.c'" fi cat << \SHAR_EOF > 'trace1.c' /* trace1.c thd 3/22/88 * generate plot file that shows which nodes are running over time * really should only be used with ppsim */ #include #include #define MPHOST -32768 #define VHOST 32768 long ndflag; #define MAXTASKS 1000 int begin[MAXTASKS]; /* begin time */ #define MAXLINE 1000 char linestr[MAXLINE]; /* hold a line read from the trace file. */ struct Tdata { int type; int Qd; int tid; int clock; } t; char *key_words[] = { "strace","tfork","texit","send","recv","recvw", "tid","waking","cnt","clock","blocking","running", "mpsim","node", 0}; #define STRACE 0 #define TFORK 1 #define TEXIT 2 #define SEND 3 #define RECV 4 #define RECVW 5 #define TID 6 #define WAKING 7 #define CNT 8 #define CLOCK 9 #define QD 10 #define RUNNING 11 #define MPSIM 12 #define NODE 13 char *getword(), *nxtword(); int maxtasks; int minclock = -1; main(argc, argv) int argc; char *argv[]; { FILE *tfptr; /* Stream pointer for the trace file. */ int i; if(argc != 2) tfptr = stdin; else if((tfptr = fopen(argv[1], "r")) == NULL) { fprintf(stderr, "Can't open %s for reading\n", argv[1]); exit(1); } while(fgets(linestr, MAXLINE, tfptr) != NULL) { parse(linestr); switch(t.type){ case STRACE: if (minclock == -1) minclock = t.clock; break; case TEXIT: plot(); break; } if (t.Qd) plot(); } printf("%d %d \" \"\n %d 0\n",minclock,maxtasks+1,minclock); } parse(l) char *l; { /* parse a line l */ char *p, *w; int i; long nd; t.type = -1; t.Qd = 0; p = getword(l); /* get first word */ while(*p){ for(i=0;(w = key_words[i]) != NULL;i++) if (!strcmp(w,p)) switch(i){ case STRACE: case TEXIT: case TFORK: case RECV: case RECVW: case SEND: t.type = i; break; case QD: t.Qd++; break; case TID: case NODE: t.tid = atoi(nxtword()); if (ndflag) t.tid = (t.tid== MPHOST || t.tid == VHOST) ? 0 : t.tid + 1 ; /*adjust to ppsim style */ break; case CLOCK: t.clock = atoi(nxtword()); break; case MPSIM: ndflag=1; return; case WAKING: case RUNNING: while(*(p=nxtword())){ nd = atoi(p); if (ndflag) nd = (nd== MPHOST || nd == VHOST) ? 0 : nd + 1 ; begin[nd] = t.clock; } break; } p = nxtword(); } } plot() { float task; /* plot task duration */ task = t.tid ; if (!t.tid) task += 0.25; if (t.tid > maxtasks) maxtasks = t.tid; printf("%d %f\n%d %f\n%d %f \" \"\n",begin[t.tid],task,t.clock,task, t.clock,task + 0.25); } static char *nxtwdp, word_str[100]; /*locals for getword and nxtword*/ char *getword(s) char *s; { char *p; while( *s && isspace(*s))s++; p=word_str; while(*s && !isspace(*s)) *p++ = *s++; *p=0; nxtwdp=s; return(word_str); } char *nxtword() { char *p; while(*nxtwdp && isspace(*nxtwdp)) nxtwdp++; p=word_str; while(*nxtwdp && !isspace(*nxtwdp)) *p++ = *nxtwdp++; *p=0; return(word_str); } SHAR_EOF cd .. # End of shell archive exit 0